org.apache.gobblin.configuration.SourceState java code examples

/**
 * A topic can be configured to move to the latest offset in {@link #TOPICS_MOVE_TO_LATEST_OFFSET}.
 *
 * Need to be synchronized as access by multiple threads
 */
private synchronized boolean shouldMoveToLatestOffset(KafkaPartition partition, SourceState state) {
 if (!state.contains(TOPICS_MOVE_TO_LATEST_OFFSET)) {
  return false;
 }
 if (this.moveToLatestTopics.isEmpty()) {
  this.moveToLatestTopics.addAll(
    Splitter.on(',').trimResults().omitEmptyStrings().splitToList(state.getProp(TOPICS_MOVE_TO_LATEST_OFFSET)));
 }
 return this.moveToLatestTopics.contains(partition.getTopicName()) || this.moveToLatestTopics.contains(ALL_TOPICS);
}

private static SourceState getTestState(String dbName) {
 SourceState testState = new SourceState();
 testState.setProp("hive.dataset.database", dbName);
 testState.setProp("hive.dataset.table.pattern", "*");
 testState.setProp(ConfigurationKeys.JOB_ID_KEY, "testJobId");
 return testState;
}

/**
 * Gobblin calls the {@link Source#getWorkunits(SourceState)} method after creating a {@link Source} object with a
 * blank constructor, so any custom initialization of the object needs to be done here.
 */
protected void init(SourceState state) {
 retriever.init(state);
 try {
  initFileSystemHelper(state);
 } catch (FileBasedHelperException e) {
  Throwables.propagate(e);
 }
 AvroFsHelper fsHelper = (AvroFsHelper) this.fsHelper;
 this.fs = fsHelper.getFileSystem();
 this.sourceState = state;
 this.lowWaterMark =
   getLowWaterMark(state.getPreviousWorkUnitStates(), state.getProp(DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE,
     String.valueOf(DEFAULT_DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE)));
 this.maxFilesPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB,
   DEFAULT_DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB);
 this.maxWorkUnitsPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB,
   DEFAULT_DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB);
 this.tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase());
 this.fileCount = 0;
 this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
}

private static SourceState getCombinedState(SourceState state, State tableSpecificState) {
 if (tableSpecificState == null) {
  return state;
 }
 SourceState combinedState =
   new SourceState(state, state.getPreviousDatasetStatesByUrns(), state.getPreviousWorkUnitStates());
 combinedState.addAll(tableSpecificState);
 return combinedState;
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY)) {
  state.setProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY,
    KafkaGsonDeserializer.class.getName());
 }
 return super.getWorkunits(state);
}

@Override
public void init(SourceState state) {
 String regexPattern = state.getProp(PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_PATTERN);
 Preconditions.checkNotNull(regexPattern, "Must specify a regex pattern in " +
  PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_PATTERN
 );
 this.leadTime = PartitionAwareFileRetrieverUtils.getLeadTimeDurationFromConfig(state);
 this.pattern = Pattern.compile(regexPattern);
 this.helper = new HadoopFsHelper(state);
 this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY));
 this.schemaInSourceDir = state.getPropAsBoolean(ConfigurationKeys.SCHEMA_IN_SOURCE_DIR,
   ConfigurationKeys.DEFAULT_SCHEMA_IN_SOURCE_DIR);
 this.schemaFile = this.schemaInSourceDir ? state.getProp(ConfigurationKeys.SCHEMA_FILENAME,
   ConfigurationKeys.DEFAULT_SCHEMA_FILENAME) : "";
}

this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
 state.setProp(SlaEventKeys.SOURCE_URI, sourceFs.getUri());
 state.setProp(SlaEventKeys.DESTINATION_URI, targetFs.getUri());
 long maxSizePerBin = state.getPropAsLong(MAX_SIZE_MULTI_WORKUNITS, 0);
 long maxWorkUnitsPerMultiWorkUnit = state.getPropAsLong(MAX_WORK_UNITS_PER_BIN, 50);
 final long minWorkUnitWeight = Math.max(1, maxSizePerBin / maxWorkUnitsPerMultiWorkUnit);
 final Optional<CopyableFileWatermarkGenerator> watermarkGenerator =
   CopyableFileWatermarkHelper.getCopyableFileWatermarkGenerator(state);
 int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES, DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);
 final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build();
   .instantiateDatasetFinder(state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY,
     this.eventSubmitter, state);
 String filesetWuGeneratorAlias = state.getProp(ConfigurationKeys.COPY_SOURCE_FILESET_WU_GENERATOR_CLASS, FileSetWorkUnitGenerator.class.getName());
 Iterator<Callable<Void>> callableIterator =
   Iterators.transform(prioritizedFileSets, new Function<FileSet<CopyEntity>, Callable<Void>>() {
 if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) {
  log.info("Simulate mode enabled. Will not execute the copy.");
  for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) {

if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) {
 return ImmutableList.of();
if (state.contains(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)) {
 workUnitRetryPolicy = WorkUnitRetryPolicy.forName(state.getProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY));
} else {
 boolean retryFailedWorkUnits = state.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, true);
 workUnitRetryPolicy = retryFailedWorkUnits ? WorkUnitRetryPolicy.ALWAYS : WorkUnitRetryPolicy.NEVER;
for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) {
 if (workUnitState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) {
  if (state.getPropAsBoolean(ConfigurationKeys.OVERWRITE_CONFIGS_IN_STATESTORE,
    ConfigurationKeys.DEFAULT_OVERWRITE_CONFIGS_IN_STATESTORE)) {
  .forName(state.getProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY));
if ((workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_PARTIAL_SUCCESS
  && jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS)

@Override
protected List<WorkUnit> generateWorkUnits(SourceEntity sourceEntity, SourceState state, long previousWatermark) {
 WatermarkType watermarkType = WatermarkType.valueOf(
   state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE)
     .toUpperCase());
 String watermarkColumn = state.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
 int maxPartitions = state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS,
   ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS);
 int minTargetPartitionSize = state.getPropAsInt(MIN_TARGET_PARTITION_SIZE, DEFAULT_MIN_TARGET_PARTITION_SIZE);
 if (watermarkType == WatermarkType.SIMPLE || Strings.isNullOrEmpty(watermarkColumn) || !state.getPropAsBoolean(
   ENABLE_DYNAMIC_PARTITIONING) || maxPartitions <= 1) {
  return super.generateWorkUnits(sourceEntity, state, previousWatermark);
   histogramAdjust.add(group);
   if (histogramAdjust.getTotalRecordCount() > state
     .getPropAsLong(EARLY_STOP_TOTAL_RECORDS_LIMIT, DEFAULT_EARLY_STOP_TOTAL_RECORDS_LIMIT)) {
    break;
  HistogramGroup lastPlusOne = histogram.get(histogramAdjust.getGroups().size());
  long earlyStopHighWatermark = Long.parseLong(Utils.toDateTimeFormat(lastPlusOne.getKey(), SECONDS_FORMAT, Partitioner.WATERMARKTIMEFORMAT));
  log.info("Job {} will be stopped earlier. [LW : {}, early-stop HW : {}, expected HW : {}]", state.getProp(ConfigurationKeys.JOB_NAME_KEY), partition.getLowWatermark(), earlyStopHighWatermark, expectedHighWatermark);
  this.isEarlyStopped = true;
  expectedHighWatermark = earlyStopHighWatermark;
 } else {
  log.info("Job {} will be finished in a single run. [LW : {}, expected HW : {}]", state.getProp(ConfigurationKeys.JOB_NAME_KEY), partition.getLowWatermark(), expectedHighWatermark);
 state.setProp(Partitioner.HAS_USER_SPECIFIED_PARTITIONS, true);
 state.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, specifiedPartitions);

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 this.metricContext = Instrumented.getMetricContext(state, KafkaSource.class);
 this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
 if (state.getPropAsBoolean(KafkaSource.GOBBLIN_KAFKA_EXTRACT_ALLOW_TABLE_TYPE_NAMESPACE_CUSTOMIZATION)) {
  String tableTypeStr =
    state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, KafkaSource.DEFAULT_TABLE_TYPE.toString());
  tableType = Extract.TableType.valueOf(tableTypeStr);
  extractNamespace =
    state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, KafkaSource.DEFAULT_NAMESPACE_NAME);
 } else {
 isFullExtract = state.getPropAsBoolean(ConfigurationKeys.EXTRACT_IS_FULL_KEY);
 kafkaBrokers = state.getProp(ConfigurationKeys.KAFKA_BROKERS, "");
 this.shouldEnableDatasetStateStore = state.getPropAsBoolean(GOBBLIN_KAFKA_SHOULD_ENABLE_DATASET_STATESTORE,
   DEFAULT_GOBBLIN_KAFKA_SHOULD_ENABLE_DATASET_STATESTORE);
  Config config = ConfigUtils.propertiesToConfig(state.getProperties());
  GobblinKafkaConsumerClientFactory kafkaConsumerClientFactory = kafkaConsumerClientResolver
      .resolveClass(
          state.getProp(GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS,
              DEFAULT_GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS)).newInstance();
  int numOfThreads = state.getPropAsInt(ConfigurationKeys.KAFKA_SOURCE_WORK_UNITS_CREATION_THREADS,
    ConfigurationKeys.KAFKA_SOURCE_WORK_UNITS_CREATION_DEFAULT_THREAD_COUNT);
  ExecutorService threadPool =
    Executors.newFixedThreadPool(numOfThreads, ExecutorsUtils.newThreadFactory(Optional.of(LOG)));
  if (state.getPropAsBoolean(ConfigurationKeys.KAFKA_SOURCE_SHARE_CONSUMER_CLIENT,

@VisibleForTesting
public void initialize(SourceState state) throws IOException {
 this.updateProvider = UpdateProviderFactory.create(state);
 this.metricContext = Instrumented.getMetricContext(state, HiveSource.class);
 this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, EventConstants.CONVERSION_NAMESPACE).build();
 this.avroSchemaManager = new AvroSchemaManager(getSourceFs(state), state);
 this.workunits = Lists.newArrayList();
 this.watermarker =
   GobblinConstructorUtils.invokeConstructor(HiveSourceWatermarkerFactory.class,
     state.getProp(HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS))
     .createFromState(state);
 EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.CONVERSION_SETUP_EVENT);
 this.datasetFinder = GobblinConstructorUtils.invokeConstructor(HiveDatasetFinder.class,
   state.getProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, DEFAULT_HIVE_SOURCE_DATASET_FINDER_CLASS), getSourceFs(state), state.getProperties(),
   this.eventSubmitter);
 int maxLookBackDays = state.getPropAsInt(HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, DEFAULT_HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS);
 this.maxLookBackTime = new DateTime().minusDays(maxLookBackDays).getMillis();
 this.ignoreDataPathIdentifierList = COMMA_BASED_SPLITTER.splitToList(state.getProp(HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER_KEY,
   DEFAULT_HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER));
 silenceHiveLoggers();
}

SourceState sourceState = new SourceState();
sourceState.setProp(LoopingDatasetFinderSource.MAX_WORK_UNITS_PER_RUN_KEY, 3);
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();

@Test
public void testSubmitUnfulfilledRequestEvents()
  throws IOException, NoSuchMethodException, InvocationTargetException, IllegalAccessException {
 SourceState state = new SourceState();
 state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
 state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
 state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
 state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY,
   TestCopyablePartitionableDatasedFinder.class.getCanonicalName());
 state.setProp(CopySource.MAX_CONCURRENT_LISTING_SERVICES, 2);
 state.setProp(CopyConfiguration.MAX_COPY_PREFIX + ".size", "50");
 state.setProp(CopyConfiguration.MAX_COPY_PREFIX + ".copyEntities", 2);
 state.setProp(CopyConfiguration.STORE_REJECTED_REQUESTS_KEY,
   RequestAllocatorConfig.StoreRejectedRequestsConfig.ALL.name().toLowerCase());
 state.setProp(ConfigurationKeys.METRICS_CUSTOM_BUILDERS, "org.apache.gobblin.metrics.ConsoleEventReporterFactory");
   .getPropAsInt(CopySource.MAX_CONCURRENT_LISTING_SERVICES, CopySource.DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);
 final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build();
   .instantiateDatasetFinder(state.getProperties(), sourceFs, CopySource.DEFAULT_DATASET_PROFILE_CLASS_KEY,
     eventSubmitter, state);

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 JobConf jobConf = new JobConf(new Configuration());
 for (String key : state.getPropertyNames()) {
  jobConf.set(key, state.getProp(key));
 if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
  for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
   FileInputFormat.addInputPath(jobConf, new Path(inputPath));
  InputSplit[] fileSplits = fileInputFormat.getSplits(jobConf, state.getPropAsInt(
    HadoopFileInputSource.FILE_SPLITS_DESIRED_KEY, HadoopFileInputSource.DEFAULT_FILE_SPLITS_DESIRED));
  if (fileSplits == null || fileSplits.length == 0) {
  Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY) ?
    Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null;
  String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
  String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);

/**
 * Get low water mark
 *
 * @param watermarkType Watermark type
 * @param previousWatermark Previous water mark
 * @param deltaForNextWatermark delta number for next water mark
 * @return Previous watermark (fallback to {@link ConfigurationKeys#SOURCE_QUERYBASED_START_VALUE} iff previous watermark is unavailable)
 */
private long getSnapshotLowWatermark(WatermarkType watermarkType, long previousWatermark, int deltaForNextWatermark) {
 LOG.debug("Getting snapshot low water mark");
 String timeZone = this.state.getProp(ConfigurationKeys.SOURCE_TIMEZONE, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE);
 if (isPreviousWatermarkExists(previousWatermark)) {
  if (isSimpleWatermark(watermarkType)) {
   return previousWatermark + deltaForNextWatermark - this.state
     .getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0);
  }
  DateTime wm = Utils.toDateTime(previousWatermark, WATERMARKTIMEFORMAT, timeZone).plusSeconds(
    (deltaForNextWatermark - this.state
      .getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, 0)));
  return Long.parseLong(Utils.dateTimeToString(wm, WATERMARKTIMEFORMAT, timeZone));
 }
 // If previous watermark is not found, override with the start value
 // (irrespective of source.is.watermark.override flag)
 long startValue =
   Utils.getLongWithCurrentDate(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE), timeZone);
 LOG.info("Overriding low water mark with the given start value: " + startValue);
 return startValue;
}

/**
 * If full dump is true, the low watermark will be based on {@link ConfigurationKeys#SOURCE_QUERYBASED_START_VALUE}
 * Otherwise it will base on the previous watermark. Please refer to {@link Partitioner#getLowWatermark(ExtractType, WatermarkType, long, int)}
 * @return full dump or not
 */
public boolean isFullDump() {
 return Boolean.valueOf(this.state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY));
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 try {
  FileSystem fs = HadoopUtils.getSourceFileSystem(state);
  Config config = ConfigUtils.propertiesToConfig(state.getProperties());
  if (state.contains(COPY_TABLE_KEY)) {
   HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state);
   WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset,
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_VIEW)) {
   HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state);
   WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_QUERY)) {
   String query = state.getProp(MATERIALIZE_QUERY);
   WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null));
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  }
 } catch (IOException ioe) {
  throw new RuntimeException(ioe);
 }
 throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY,
   MATERIALIZE_VIEW));
}

submitCycleCompletionEvent();
this.maxWorkUnits = state
  .getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNITS_KEY, ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNITS);
this.maxWorkUnitExecutionAttempts = state
  .getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNIT_EXECUTION_ATTEMPTS_KEY,
    ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNIT_EXECUTION_ATTEMPTS);
String datasetFinderClass = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS,
  HivePartitionFinder.class.getName());
this.datasetFinder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, datasetFinderClass, state);
populateDatasets();
String policyClass =
  state.getProp(ComplianceConfigurationKeys.PURGE_POLICY_CLASS, HivePurgerPolicy.class.getName());
this.policy = GobblinConstructorUtils.invokeConstructor(PurgePolicy.class, policyClass, this.lowWatermark);
this.shouldProxy = state.getPropAsBoolean(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SHOULD_PROXY,
  ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DEFAULT_SHOULD_PROXY);
if (!this.shouldProxy) {

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY)) {
  state.setProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, ConvertibleHiveDatasetFinder.class.getName());
 }
 if (!state.contains(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY)) {
  state.setProp(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.conversion.avro");
 }
 this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
 List<WorkUnit> workunits = super.getWorkunits(state);
 for (WorkUnit workUnit : workunits) {
  if (LineageUtils.shouldSetLineageInfo(workUnit)) {
   setSourceLineageInfo(workUnit, this.lineageInfo);
  }
 }
 return workunits;
}

super.setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId);
for (WorkUnitState pre : state.getPreviousWorkUnitStates()) {
 Extract previousExtract = pre.getWorkunit().getExtract();
 if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) {
if (state.getPropAsBoolean(ConfigurationKeys.EXTRACT_IS_FULL_KEY)
  && !state.contains(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY)) {
 super.setProp(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY, System.currentTimeMillis());

Javadoc

A container for all meta data related to a particular source. This includes all properties defined in job configuration files and all properties from tasks of the previous run.

Properties can be overwritten at runtime and persisted upon job completion. Persisted properties will be loaded in the next run and made available to use by the org.apache.gobblin.source.Source.

Most used methods

getProp
setProp
contains
getPreviousWorkUnitStates
Get a List of previous WorkUnitStates for a given datasetUrn.
getPropAsBoolean
getPropAsInt
getProperties
<init>
Constructor.
getPropAsLong
getBroker
getPreviousWorkUnitStatesByDatasetUrns
Get a Map from dataset URNs (as being specified by ConfigurationKeys#DATASET_URN_KEYto the WorkUnitS
write

Popular in Java

Reactive rest calls using spring rest template
setScale (BigDecimal)
getResourceAsStream (ClassLoader)
compareTo (BigDecimal)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
JTable (javax.swing)
Top Sublime Text plugins

How to useSourceState in org.apache.gobblin.configuration

Best Java code snippets using org.apache.gobblin.configuration.SourceState (Showing top 20 results out of 315)

How to use
SourceState
in
org.apache.gobblin.configuration