org.apache.gobblin.configuration.SourceState.contains java code examples

/**
 * A topic can be configured to move to the latest offset in {@link #TOPICS_MOVE_TO_LATEST_OFFSET}.
 *
 * Need to be synchronized as access by multiple threads
 */
private synchronized boolean shouldMoveToLatestOffset(KafkaPartition partition, SourceState state) {
 if (!state.contains(TOPICS_MOVE_TO_LATEST_OFFSET)) {
  return false;
 }
 if (this.moveToLatestTopics.isEmpty()) {
  this.moveToLatestTopics.addAll(
    Splitter.on(',').trimResults().omitEmptyStrings().splitToList(state.getProp(TOPICS_MOVE_TO_LATEST_OFFSET)));
 }
 return this.moveToLatestTopics.contains(partition.getTopicName()) || this.moveToLatestTopics.contains(ALL_TOPICS);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY)) {
  state.setProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY,
    KafkaGsonDeserializer.class.getName());
 }
 return super.getWorkunits(state);
}

public static KafkaWorkUnitPacker getInstance(AbstractSource<?, ?> source, SourceState state) {
 if (state.contains(KAFKA_WORKUNIT_PACKER_TYPE)) {
  String packerTypeStr = state.getProp(KAFKA_WORKUNIT_PACKER_TYPE);
  Optional<PackerType> packerType = Enums.getIfPresent(PackerType.class, packerTypeStr);
  if (packerType.isPresent()) {
   return getInstance(packerType.get(), source, state);
  }
  throw new IllegalArgumentException("WorkUnit packer type " + packerTypeStr + " not found");
 }
 return getInstance(DEFAULT_PACKER_TYPE, source, state);
}

private KafkaWorkUnitSizeEstimator getWorkUnitSizeEstimator() {
 if (this.state.contains(KAFKA_WORKUNIT_SIZE_ESTIMATOR_TYPE)) {
  String sizeEstimatorTypeString = this.state.getProp(KAFKA_WORKUNIT_SIZE_ESTIMATOR_TYPE);
  Optional<SizeEstimatorType> sizeEstimatorType =
    Enums.getIfPresent(SizeEstimatorType.class, sizeEstimatorTypeString);
  if (sizeEstimatorType.isPresent()) {
   return getWorkUnitSizeEstimator(sizeEstimatorType.get());
  }
  throw new IllegalArgumentException("WorkUnit size estimator type " + sizeEstimatorType + " not found");
 }
 return getWorkUnitSizeEstimator(DEFAULT_SIZE_ESTIMATOR_TYPE);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 List<WorkUnit> workUnits = Lists.newArrayList();
 if (!state.contains(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)) {
  return workUnits;
 }
 // Create a single snapshot-type extract for all files
 Extract extract = new Extract(Extract.TableType.SNAPSHOT_ONLY,
   state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "ExampleNamespace"), "ExampleTable");
 String filesToPull = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL);
 for (String file : Splitter.on(',').omitEmptyStrings().split(filesToPull)) {
  // Create one work unit for each file to pull
  WorkUnit workUnit = WorkUnit.create(extract);
  workUnit.setProp(SOURCE_FILE_KEY, file);
  workUnits.add(workUnit);
 }
 return workUnits;
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY)) {
  state.setProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, ConvertibleHiveDatasetFinder.class.getName());
 }
 if (!state.contains(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY)) {
  state.setProp(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.conversion.avro");
 }
 this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
 List<WorkUnit> workunits = super.getWorkunits(state);
 for (WorkUnit workUnit : workunits) {
  if (LineageUtils.shouldSetLineageInfo(workUnit)) {
   setSourceLineageInfo(workUnit, this.lineageInfo);
  }
 }
 return workunits;
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY)) {
  state.setProp(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY,
    HiveSerDeWrapper.getDeserializer(state).getInputFormatClassName());
 }
 return super.getWorkunits(state);
}

Job job = Job.getInstance(new Configuration());
if (state.contains(FILE_INPUT_PATHS_KEY)) {
 for (String inputPath : state.getPropAsList(FILE_INPUT_PATHS_KEY)) {
  FileInputFormat.addInputPath(job, new Path(inputPath));
Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)
  ? Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null;
String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);

logFilesToPull(filesToPull);
int numPartitions = state.contains(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS)
  && state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS) <= filesToPull.size()
    ? state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS) : filesToPull.size();

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 try {
  FileSystem fs = HadoopUtils.getSourceFileSystem(state);
  Config config = ConfigUtils.propertiesToConfig(state.getProperties());
  if (state.contains(COPY_TABLE_KEY)) {
   HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state);
   WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset,
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_VIEW)) {
   HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state);
   WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_QUERY)) {
   String query = state.getProp(MATERIALIZE_QUERY);
   WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null));
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  }
 } catch (IOException ioe) {
  throw new RuntimeException(ioe);
 }
 throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY,
   MATERIALIZE_VIEW));
}

if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
 for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
  FileInputFormat.addInputPath(jobConf, new Path(inputPath));
 Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY) ?
   Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null;
 String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);

public List<WorkUnit> getWorkunits(SourceState state) {
 List<WorkUnit> workUnits = new ArrayList<>();
 if (state.contains(TEST_WORKUNIT_PERSISTENCE)) {
  testSkipWorkUnitPersistence(state);
  return workUnits;
 }
 for (int i = 0; i < NUMBER_OF_WORKUNITS; i++) {
  WorkUnit workUnit = WorkUnit.createEmpty();
  if (i < NUMBER_OF_SKIP_WORKUNITS) {
   workUnit.skip();
  }
  workUnits.add(workUnit);
 }
 return workUnits;
}

 && !state.contains(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY)) {
super.setProp(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY, System.currentTimeMillis());

/**
 * This method creates the list of all work units needed for the current execution.
 * Fresh work units are created for each partition starting from watermark and failed work units from the
 * previous run will be added to the list.
 */
protected void createWorkUnits(SourceState state)
  throws IOException {
 createWorkunitsFromPreviousState(state);
 if (this.datasets.isEmpty()) {
  return;
 }
 for (HivePartitionDataset dataset : this.datasets) {
  Optional<String> owner = dataset.getOwner();
  if (workUnitsExceeded()) {
   log.info("Workunits exceeded");
   setJobWatermark(state, dataset.datasetURN());
   return;
  }
  if (!this.policy.shouldPurge(dataset)) {
   continue;
  }
  WorkUnit workUnit = createNewWorkUnit(dataset);
  log.info("Created new work unit with partition " + workUnit.getProp(ComplianceConfigurationKeys.PARTITION_NAME));
  this.workUnitMap.put(workUnit.getProp(ComplianceConfigurationKeys.PARTITION_NAME), workUnit);
  this.workUnitsCreatedCount++;
 }
 if (!state.contains(ComplianceConfigurationKeys.HIVE_PURGER_WATERMARK)) {
  this.setJobWatermark(state, ComplianceConfigurationKeys.NO_PREVIOUS_WATERMARK);
 }
}

if (state.contains(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)) {

if (state.contains(SIMULATE) && state.getPropAsBoolean(SIMULATE)) {
 log.info("Simulate mode enabled. Will not execute the copy.");
 for (Map.Entry<FileSet<CopyEntity>, Collection<WorkUnit>> entry : workUnitsMap.asMap().entrySet()) {

/**
 * A topic can be configured to move to the latest offset in {@link #TOPICS_MOVE_TO_LATEST_OFFSET}.
 *
 * Need to be synchronized as access by multiple threads
 */
private synchronized boolean shouldMoveToLatestOffset(KafkaPartition partition, SourceState state) {
 if (!state.contains(TOPICS_MOVE_TO_LATEST_OFFSET)) {
  return false;
 }
 if (this.moveToLatestTopics.isEmpty()) {
  this.moveToLatestTopics.addAll(
    Splitter.on(',').trimResults().omitEmptyStrings().splitToList(state.getProp(TOPICS_MOVE_TO_LATEST_OFFSET)));
 }
 return this.moveToLatestTopics.contains(partition.getTopicName()) || this.moveToLatestTopics.contains(ALL_TOPICS);
}

private KafkaWorkUnitSizeEstimator getWorkUnitSizeEstimator() {
 if (this.state.contains(KAFKA_WORKUNIT_SIZE_ESTIMATOR_TYPE)) {
  String sizeEstimatorTypeString = this.state.getProp(KAFKA_WORKUNIT_SIZE_ESTIMATOR_TYPE);
  Optional<SizeEstimatorType> sizeEstimatorType =
    Enums.getIfPresent(SizeEstimatorType.class, sizeEstimatorTypeString);
  if (sizeEstimatorType.isPresent()) {
   return getWorkUnitSizeEstimator(sizeEstimatorType.get());
  }
  throw new IllegalArgumentException("WorkUnit size estimator type " + sizeEstimatorType + " not found");
 }
 return getWorkUnitSizeEstimator(DEFAULT_SIZE_ESTIMATOR_TYPE);
}

public static KafkaWorkUnitPacker getInstance(AbstractSource<?, ?> source, SourceState state) {
 if (state.contains(KAFKA_WORKUNIT_PACKER_TYPE)) {
  String packerTypeStr = state.getProp(KAFKA_WORKUNIT_PACKER_TYPE);
  Optional<PackerType> packerType = Enums.getIfPresent(PackerType.class, packerTypeStr);
  if (packerType.isPresent()) {
   return getInstance(packerType.get(), source, state);
  }
  throw new IllegalArgumentException("WorkUnit packer type " + packerTypeStr + " not found");
 }
 return getInstance(DEFAULT_PACKER_TYPE, source, state);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 if (!state.contains(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY)) {
  state.setProp(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY,
    HiveSerDeWrapper.getDeserializer(state).getInputFormatClassName());
 }
 return super.getWorkunits(state);
}

Popular methods of SourceState

getProp
setProp
getPreviousWorkUnitStates
Get a List of previous WorkUnitStates for a given datasetUrn.
getPropAsBoolean
getPropAsInt
getProperties
<init>
Constructor.
getPropAsLong
getBroker
getPreviousWorkUnitStatesByDatasetUrns
Get a Map from dataset URNs (as being specified by ConfigurationKeys#DATASET_URN_KEYto the WorkUnitS
write
getPreviousDatasetStatesByUrns

Popular in Java

Finding current android device location
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSystemService (Context)
addToBackStack (FragmentTransaction)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top PhpStorm plugins

How to use containsmethodin org.apache.gobblin.configuration.SourceState

Best Java code snippets using org.apache.gobblin.configuration.SourceState.contains (Showing top 20 results out of 315)

How to use
contains
method
in
org.apache.gobblin.configuration.SourceState