/** * Whether data should be committed by the job (as opposed to being commited by the tasks). * Data should be committed by the job if either {@link ConfigurationKeys#JOB_COMMIT_POLICY_KEY} is set to "full", * or {@link ConfigurationKeys#PUBLISH_DATA_AT_JOB_LEVEL} is set to true. */ private static boolean shouldCommitDataInJob(State state) { boolean jobCommitPolicyIsFull = JobCommitPolicy.getCommitPolicy(state.getProperties()) == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS; boolean publishDataAtJobLevel = state.getPropAsBoolean(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL, ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL); boolean jobDataPublisherSpecified = !Strings.isNullOrEmpty(state.getProp(ConfigurationKeys.JOB_DATA_PUBLISHER_TYPE)); return jobCommitPolicyIsFull || publishDataAtJobLevel || jobDataPublisherSpecified; }
@Override public State getExecutionMetadata() { return new State(); }
@SuppressWarnings("unchecked") public T withProps(State props) { this.props = new State(props.getProperties()); return (T) this; }
@SuppressWarnings({ "serial", "unchecked" }) protected static <T> Optional<T> populateField(State state, String key, TypeToken<T> token) { if (state.contains(key)) { Optional<T> fieldValue; if (new TypeToken<Boolean>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsBoolean(key)); } else if (new TypeToken<Integer>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsInt(key)); } else if (new TypeToken<Long>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsLong(key)); } else if (new TypeToken<List<String>>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsList(key)); } else { fieldValue = (Optional<T>) Optional.of(state.getProp(key)); } state.removeProp(key); return fieldValue; } return Optional.<T> absent(); }
/** * Create a KafkaWrapper based on the given type of Kafka API and list of Kafka brokers. * * @param state A {@link State} object that should contain a list of comma separated Kafka brokers * in property "kafka.brokers". It may optionally specify whether to use the new Kafka API by setting * use.new.kafka.api=true. */ public static KafkaWrapper create(State state) { Preconditions.checkNotNull(state.getProp(ConfigurationKeys.KAFKA_BROKERS), "Need to specify at least one Kafka broker."); KafkaWrapper.Builder builder = new KafkaWrapper.Builder(); if (state.getPropAsBoolean(USE_NEW_KAFKA_API, DEFAULT_USE_NEW_KAFKA_API)) { builder = builder.withNewKafkaAPI(); } Config config = ConfigUtils.propertiesToConfig(state.getProperties()); return builder.withBrokers(state.getPropAsList(ConfigurationKeys.KAFKA_BROKERS)) .withConfig(config) .build(); }
/** * Starts metric reporting and appends the given metrics file suffix to the current value of * {@link ConfigurationKeys#METRICS_FILE_SUFFIX}. */ public void startMetricReportingWithFileSuffix(State state, String metricsFileSuffix) { Properties metricsReportingProps = new Properties(); metricsReportingProps.putAll(state.getProperties()); String oldMetricsFileSuffix = state.getProp(ConfigurationKeys.METRICS_FILE_SUFFIX, ConfigurationKeys.DEFAULT_METRICS_FILE_SUFFIX); if (Strings.isNullOrEmpty(oldMetricsFileSuffix)) { oldMetricsFileSuffix = metricsFileSuffix; } else { oldMetricsFileSuffix += "." + metricsFileSuffix; } metricsReportingProps.setProperty(ConfigurationKeys.METRICS_FILE_SUFFIX, oldMetricsFileSuffix); startMetricReporting(metricsReportingProps); }
@Override public Optional<CompactorListener> createCompactorListener(Properties properties) throws CompactorListenerCreationException { State state = new State(properties); if (Strings.isNullOrEmpty(state.getProp(COMPACTOR_LISTENERS))) { return Optional.absent(); } List<CompactorListener> listeners = new ArrayList<>(); for (String listenerClassName : state.getPropAsList(COMPACTOR_LISTENERS)) { try { listeners.add((CompactorListener) ConstructorUtils .invokeConstructor(Class.forName(listenerClassName), properties)); } catch (ReflectiveOperationException e) { throw new CompactorListenerCreationException(String .format("Unable to create CompactorListeners from key \"%s\" with value \"%s\"", COMPACTOR_LISTENERS, properties.getProperty(COMPACTOR_LISTENERS)), e); } } return Optional.<CompactorListener>of(new SerialCompactorListener(listeners)); } }
/** * Creates a unique {@link String} representing this branch. */ private static String getForkMetricsId(State state, int index) { return state.getProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + "." + index, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + index); }
/** * Copy dependency jars from local fs to HDFS. */ private void copyDependencyJarsToHdfs() throws IOException { if (!this.state.contains(ConfigurationKeys.JOB_JAR_FILES_KEY)) { return; } LocalFileSystem lfs = FileSystem.getLocal(this.conf); Path tmpJarFileDir = new Path(this.tmpOutputDir, "_gobblin_compaction_jars"); this.state.setProp(COMPACTION_JARS, tmpJarFileDir.toString()); this.fs.delete(tmpJarFileDir, true); for (String jarFile : this.state.getPropAsList(ConfigurationKeys.JOB_JAR_FILES_KEY)) { for (FileStatus status : lfs.globStatus(new Path(jarFile))) { Path tmpJarFile = new Path(this.fs.makeQualified(tmpJarFileDir), status.getPath().getName()); this.fs.copyFromLocalFile(status.getPath(), tmpJarFile); LOG.info(String.format("%s will be added to classpath", tmpJarFile)); } } }
public MRCompactor(Properties props, List<? extends Tag<?>> tags, Optional<CompactorListener> compactorListener) throws IOException { this.state = new State(); this.state.addAll(props); this.initilizeTime = getCurrentTime(); this.tags = tags; this.conf = HadoopUtils.getConfFromState(this.state); this.tmpOutputDir = getTmpOutputDir(); this.fs = getFileSystem(); this.datasets = getDatasetsFinder().findDistinctDatasets(); this.jobExecutor = createJobExecutor(); this.jobRunnables = Maps.newConcurrentMap(); this.closer = Closer.create(); this.stopwatch = Stopwatch.createStarted(); this.gobblinMetrics = initializeMetrics(); this.eventSubmitter = new EventSubmitter.Builder( GobblinMetrics.get(this.state.getProp(ConfigurationKeys.JOB_NAME_KEY)).getMetricContext(), MRCompactor.COMPACTION_TRACKING_EVENTS_NAMESPACE).build(); this.compactorListener = compactorListener; this.dataVerifTimeoutMinutes = getDataVerifTimeoutMinutes(); this.compactionTimeoutMinutes = getCompactionTimeoutMinutes(); this.shouldVerifDataCompl = shouldVerifyDataCompleteness(); this.compactionCompleteListener = getCompactionCompleteListener(); this.verifier = this.shouldVerifDataCompl ? Optional.of(this.closer.register(new DataCompletenessVerifier(this.state))) : Optional.<DataCompletenessVerifier> absent(); this.shouldPublishDataIfCannotVerifyCompl = shouldPublishDataIfCannotVerifyCompl(); }
/** * Compare an existing state and a new {@link State} to ensure that the existing {@link State} contains all entries in the new * {@link State}, and update {@link #result} accordingly. */ protected void checkExistingIsSuperstate(State existingState, State newState) { checkExistingIsSuperset(existingState.getProperties().entrySet(), newState.getProperties().entrySet()); }
public HiveAvroSerDeManager(State props) throws IOException { super(props); this.fs = FileSystem.get(HadoopUtils.getConfFromState(props)); this.useSchemaFile = props.getPropAsBoolean(USE_SCHEMA_FILE, DEFAULT_USE_SCHEMA_FILE); this.schemaFileName = props.getProp(SCHEMA_FILE_NAME, DEFAULT_SCHEMA_FILE_NAME); this.schemaLiteralLengthLimit = props.getPropAsInt(SCHEMA_LITERAL_LENGTH_LIMIT, DEFAULT_SCHEMA_LITERAL_LENGTH_LIMIT); this.metricContext = Instrumented.getMetricContext(props, HiveAvroSerDeManager.class); }
/** * Return true if the current publisher can be skipped. * * <p> * For a publisher that can be skipped, it should not have any effect on state persistence. It will be skipped when * a job is cancelled, and all finished tasks are configured to be committed. * </p> */ public boolean canBeSkipped() { return this.state.getPropAsBoolean(ConfigurationKeys.DATA_PUBLISHER_CAN_BE_SKIPPED, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_CAN_BE_SKIPPED); }
/** * Constructor. * * @param properties job configuration properties * @param previousDatasetStatesByUrns {@link SourceState} of the previous job run * @param previousWorkUnitStates an {@link Iterable} of {@link WorkUnitState}s of the previous job run */ public SourceState(State properties, Map<String, ? extends SourceState> previousDatasetStatesByUrns, Iterable<WorkUnitState> previousWorkUnitStates) { super.addAll(properties.getProperties()); this.previousDatasetStatesByUrns = ImmutableMap.copyOf(previousDatasetStatesByUrns); for (WorkUnitState workUnitState : previousWorkUnitStates) { this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState)); } }
/** * Constructor */ public PinotAuditCountHttpClient(State state) { int maxTotal = state.getPropAsInt(CONNECTION_MAX_TOTAL, DEFAULT_CONNECTION_MAX_TOTAL); int maxPerRoute = state.getPropAsInt(MAX_PER_ROUTE, DEFAULT_MAX_PER_ROUTE); cm = new PoolingHttpClientConnectionManager(); cm.setMaxTotal(maxTotal); cm.setDefaultMaxPerRoute(maxPerRoute); httpClient = HttpClients.custom() .setConnectionManager(cm) .build(); String host = state.getProp(TARGET_HOST); int port = state.getPropAsInt(TARGET_PORT); targetUrl = host + ":" + port + "/pql?pql="; }
/*** * Serialize a {@link QueryBasedHivePublishEntity} into a {@link State} at {@link #SERIALIZED_PUBLISH_TABLE_COMMANDS}. * @param state {@link State} to serialize entity into. * @param queryBasedHivePublishEntity to carry to publisher. */ public static void serializePublishCommands(State state, QueryBasedHivePublishEntity queryBasedHivePublishEntity) { state.setProp(HiveAvroORCQueryGenerator.SERIALIZED_PUBLISH_TABLE_COMMANDS, GSON.toJson(queryBasedHivePublishEntity)); }
/** * {@link Deprecated} use {@link #getEventSubmitterBuilder(Dataset, Optional, FileSystem)} */ @Deprecated public static void populateState(Dataset dataset, Optional<Job> job, FileSystem fs) { dataset.jobProps().setProp(SlaEventKeys.DATASET_URN_KEY, dataset.getUrn()); dataset.jobProps().setProp(SlaEventKeys.PARTITION_KEY, dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, "")); dataset.jobProps().setProp(SlaEventKeys.DEDUPE_STATUS_KEY, getOutputDedupeStatus(dataset.jobProps())); dataset.jobProps().setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY, getPreviousPublishTime(dataset, fs)); dataset.jobProps().setProp(SlaEventKeys.RECORD_COUNT_KEY, getRecordCount(job)); }