org.apache.gobblin.configuration.WorkUnitState.getPropAsBoolean java code examples

/**
 * Take in an input schema of type string, the schema must be in JSON format
 * @return a JsonArray representation of the schema
 */
@Override
public JsonArray convertSchema(String inputSchema, WorkUnitState workUnit)
  throws SchemaConversionException {
 this.unpackComplexSchemas =
   workUnit.getPropAsBoolean(UNPACK_COMPLEX_SCHEMAS_KEY, DEFAULT_UNPACK_COMPLEX_SCHEMAS_KEY);
 JsonParser jsonParser = new JsonParser();
 log.info("Schema: " + inputSchema);
 JsonElement jsonSchema = jsonParser.parse(inputSchema);
 return jsonSchema.getAsJsonArray();
}

/**
 * If partition already exists then new partition location will be a separate time stamp dir
 * If partition location is /a/b/c/<oldTimeStamp> then new partition location is /a/b/c/<currentTimeStamp>
 * If partition location is /a/b/c/ then new partition location is /a/b/c/<currentTimeStamp>
 **/
public static String updatePartitionLocation(String outputDataPartitionLocation, WorkUnitState workUnitState,
  Optional<Path> destPartitionLocation)
  throws DataConversionException {
 if (workUnitState.getPropAsBoolean(HIVE_DATASET_PARTITION_OVERWRITE, DEFAULT_HIVE_DATASET_PARTITION_OVERWRITE)) {
  return outputDataPartitionLocation;
 }
 if (!destPartitionLocation.isPresent()) {
  return outputDataPartitionLocation;
 }
 long timeStamp = System.currentTimeMillis();
 return StringUtils.join(Arrays.asList(outputDataPartitionLocation, timeStamp), '/');
}

public SalesforceExtractor(WorkUnitState state) {
 super(state);
 this.sfConnector = (SalesforceConnector) this.connector;
 // don't allow pk chunking if max partitions too high or have user specified partitions
 if (state.getPropAsBoolean(Partitioner.HAS_USER_SPECIFIED_PARTITIONS, false)
   || state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS,
   ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS) > PK_CHUNKING_MAX_PARTITIONS_LIMIT) {
  if (state.getPropAsBoolean(ENABLE_PK_CHUNKING_KEY, false)) {
   log.warn("Max partitions too high, so PK chunking is not enabled");
  }
  this.pkChunking = false;
 } else {
  this.pkChunking = state.getPropAsBoolean(ENABLE_PK_CHUNKING_KEY, false);
 }
 this.pkChunkingSize =
   Math.max(MIN_PK_CHUNKING_SIZE,
     Math.min(MAX_PK_CHUNKING_SIZE, state.getPropAsInt(PK_CHUNKING_SIZE_KEY, DEFAULT_PK_CHUNKING_SIZE)));
 this.pkChunkingSkipCountCheck = state.getPropAsBoolean(PK_CHUNKING_SKIP_COUNT_CHECK, DEFAULT_PK_CHUNKING_SKIP_COUNT_CHECK);
 this.bulkApiUseQueryAll = state.getPropAsBoolean(BULK_API_USE_QUERY_ALL, DEFAULT_BULK_API_USE_QUERY_ALL);
 // Get batch size from .pull file
 int tmpBatchSize = state.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE,
   ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE);
 this.batchSize = tmpBatchSize == 0 ? ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE : tmpBatchSize;
 this.fetchRetryLimit = state.getPropAsInt(FETCH_RETRY_LIMIT_KEY, DEFAULT_FETCH_RETRY_LIMIT);
}

public TestExtractor(WorkUnitState workUnitState) {
 super(workUnitState);
 if (workUnitState.getPropAsBoolean(RAISE_ERROR, false)) {
  throw new RuntimeException(EXCEPTION_MESSAGE);
 }
}

@Override
public Converter<String, String, String, List<String>> init(WorkUnitState workUnit) {
 String stringSplitterDelimiterKey =
   ForkOperatorUtils.getPropertyNameForBranch(workUnit, ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER);
 Preconditions.checkArgument(workUnit.contains(stringSplitterDelimiterKey),
   "Cannot use " + this.getClass().getName() + " with out specifying "
     + ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER);
 this.splitter = Splitter.on(workUnit.getProp(stringSplitterDelimiterKey));
 this.shouldTrimResults = workUnit.getPropAsBoolean(ConfigurationKeys.CONVERTER_STRING_SPLITTER_SHOULD_TRIM_RESULTS,
   ConfigurationKeys.DEFAULT_CONVERTER_STRING_SPLITTER_SHOULD_TRIM_RESULTS);
 return this;
}

protected void addWriterOutputToExistingDir(Path writerOutputDir, Path publisherOutputDir,
  WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner)
  throws IOException {
 boolean preserveFileName = workUnitState.getPropAsBoolean(ForkOperatorUtils
     .getPropertyNameForBranch(ConfigurationKeys.SOURCE_FILEBASED_PRESERVE_FILE_NAME, this.numBranches, branchId),
   false);
 // Go through each file in writerOutputDir and move it into publisherOutputDir
 for (FileStatus status : this.writerFileSystemByBranches.get(branchId).listStatus(writerOutputDir)) {
  // Preserve the file name if configured, use specified name otherwise
  Path finalOutputPath = preserveFileName ? new Path(publisherOutputDir, workUnitState.getProp(ForkOperatorUtils
    .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_NAME, this.numBranches, branchId)))
    : new Path(publisherOutputDir, status.getPath().getName());
  movePath(parallelRunner, workUnitState, status.getPath(), finalOutputPath, branchId);
 }
}

@Override
public List<Command> getSchemaMetadata(String schema, String entity) throws SchemaException {
 log.debug("Build query to get schema");
 List<Command> commands = new ArrayList<>();
 boolean promoteUnsignedInt = this.workUnitState.getPropAsBoolean(
   ConfigurationKeys.SOURCE_QUERYBASED_PROMOTE_UNSIGNED_INT_TO_BIGINT,
   ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_PROMOTE_UNSIGNED_INT_TO_BIGINT);
 String promoteUnsignedIntQueryParam = promoteUnsignedInt ? "% unsigned" : "dummy";
 List<String> queryParams = Arrays.asList(promoteUnsignedIntQueryParam, entity, schema);
 String metadataSql = "select " + " col.column_name, "
   + " case when col.column_type like (?) and col.data_type = 'int' then 'bigint' else col.data_type end"
   + " as data_type,"
   + " case when CHARACTER_OCTET_LENGTH is null then 0 else 0 end as length, "
   + " case when NUMERIC_PRECISION is null then 0 else NUMERIC_PRECISION end as precesion, "
   + " case when NUMERIC_SCALE is null then 0 else NUMERIC_SCALE end as scale, "
   + " case when is_nullable='NO' then 'false' else 'true' end as nullable, " + " '' as format, "
   + " case when col.column_comment is null then '' else col.column_comment end as comment "
   + " from information_schema.COLUMNS col "
   + " WHERE upper(col.table_name)=upper(?) AND upper(col.table_schema)=upper(?) "
   + " order by col.ORDINAL_POSITION ";
 commands.add(getCommand(metadataSql, JdbcCommand.JdbcCommandType.QUERY));
 commands.add(getCommand(queryParams, JdbcCommand.JdbcCommandType.QUERYPARAMS));
 return commands;
}

public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException {
 super(state);
 if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
  log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY));
  return;
 }
 try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
  Table table = client.get().getTable(this.dbName, this.tableName);
  String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY));
  List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString);
  String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA));
  List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString);
  Path dataFilesPath = new Path(table.getSd().getLocation());
  compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties());
 }
}

 /**
  * Sets metadata to indicate whether this is the first time this table or partition is being published.
  * @param wus to set if this is first publish for this table or partition
  */
 public static void setIsFirstPublishMetadata(WorkUnitState wus) {
  if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) {
   LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class);
   wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0));
  }
 }
}

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
 if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) {
  throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
 }
 JobConf jobConf = new JobConf(new Configuration());
 for (String key : workUnitState.getPropertyNames()) {
  jobConf.set(key, workUnitState.getProp(key));
 }
 String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY);
 FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
 FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf);
 RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
 boolean readKeys = workUnitState.getPropAsBoolean(
   HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS);
 return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
}

 if (workUnitState.getPropAsBoolean(GLOBAL_WATERMARK_DATASET_KEY, false)) {
  maxWorkUnit = Optional.of(workUnitState);
  break;
if (maxWorkUnit.isPresent() && !maxWorkUnit.get().getPropAsBoolean(END_OF_DATASETS_KEY, false)) {
 previousDatasetUrnWatermark = maxWorkUnit.get().getProp(DATASET_URN);
 previousPartitionUrnWatermark = maxWorkUnit.get().getProp(PARTITION_URN);

/**
 * Check if it's appropriate to remove data pull upper bounds in the last work unit, fetching as much data as possible
 * from the source. As between the time when data query was created and that was executed, there might be some
 * new data generated in the source. Removing the upper bounds will help us grab the new data.
 *
 * Note: It's expected that there might be some duplicate data between runs because of removing the upper bounds
 *
 * @return should remove or not
 */
private boolean shouldRemoveDataPullUpperBounds() {
 if (!this.workUnitState.getPropAsBoolean(ConfigurationKeys.SOURCE_QUERYBASED_ALLOW_REMOVE_UPPER_BOUNDS, true)) {
  return false;
 }
 // Only consider the last work unit
 if (!partition.isLastPartition()) {
  return false;
 }
 // Don't remove if user specifies one or is recorded in previous run
 if (partition.getHasUserSpecifiedHighWatermark() ||
   this.workUnitState.getProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY) != null) {
  return false;
 }
 return true;
}

/**
 * Sets the actual high watermark by reading the expected high watermark
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#setActualHighWatermark(org.apache.gobblin.configuration.WorkUnitState)
 */
@Override
public void setActualHighWatermark(WorkUnitState wus) {
 if (Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) {
  wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(MultiKeyValueLongWatermark.class));
 } else {
  wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class));
 }
}

public HiveConvertExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException {
 super(state);
 if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
  log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY));
  return;
 }
 if (!(this.hiveDataset instanceof ConvertibleHiveDataset)) {
  throw new IllegalStateException("HiveConvertExtractor is only compatible with ConvertibleHiveDataset");
 }
 ConvertibleHiveDataset convertibleHiveDataset = (ConvertibleHiveDataset) this.hiveDataset;
 try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
  Table table = client.get().getTable(this.dbName, this.tableName);
  SchemaAwareHiveTable schemaAwareHiveTable = new SchemaAwareHiveTable(table, AvroSchemaManager.getSchemaFromUrl(this.hiveWorkUnit.getTableSchemaUrl(), fs));
  SchemaAwareHivePartition schemaAwareHivePartition = null;
  if (this.hiveWorkUnit.getPartitionName().isPresent() && this.hiveWorkUnit.getPartitionSchemaUrl().isPresent()) {
   Partition partition = client.get().getPartition(this.dbName, this.tableName, this.hiveWorkUnit.getPartitionName().get());
   schemaAwareHivePartition =
     new SchemaAwareHivePartition(table, partition, AvroSchemaManager.getSchemaFromUrl(this.hiveWorkUnit.getPartitionSchemaUrl().get(), fs));
  }
  QueryBasedHiveConversionEntity entity =
    new QueryBasedHiveConversionEntity(convertibleHiveDataset, schemaAwareHiveTable, Optional.fromNullable(schemaAwareHivePartition));
  this.conversionEntities.add(entity);
 }
}

public HiveBaseExtractor(WorkUnitState state) throws IOException {
 if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
  return;
 }
 this.hiveWorkUnit = new HiveWorkUnit(state.getWorkunit());
 this.hiveDataset = hiveWorkUnit.getHiveDataset();
 this.dbName = hiveDataset.getDbAndTable().getDb();
 this.tableName = hiveDataset.getDbAndTable().getTable();
 this.pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(),
   Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
}

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
 if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) {
  throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
 }
 Configuration configuration = new Configuration();
 FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration);
 String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY);
 FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
 TaskAttemptContext taskAttemptContext =
   getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID());
 try {
  RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext);
  recordReader.initialize(fileSplit, taskAttemptContext);
  boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS);
  return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

this.statusCount = this.workUnit.getPropAsInt(ConfigurationKeys.FILEBASED_REPORT_STATUS_ON_COUNT,
  ConfigurationKeys.DEFAULT_FILEBASED_REPORT_STATUS_ON_COUNT);
this.shouldSkipFirstRecord = this.workUnitState.getPropAsBoolean(ConfigurationKeys.SOURCE_SKIP_FIRST_RECORD, false);

public AsyncHttpJoinConverter init(WorkUnitState workUnitState) {
 super.init(workUnitState);
 Config config = ConfigBuilder.create().loadProps(workUnitState.getProperties(), CONF_PREFIX).build();
 config = config.withFallback(DEFAULT_FALLBACK);
 skipFailedRecord = workUnitState.getPropAsBoolean(ConfigurationKeys.CONVERTER_SKIP_FAILED_RECORD, false);
 httpClient = createHttpClient(config, workUnitState.getTaskBroker());
 responseHandler = createResponseHandler(config);
 requestBuilder = createRequestBuilder(config);
 return this;
}

public SimpleJsonExtractor(WorkUnitState workUnitState) throws FileSystemException {
 this.workUnitState = workUnitState;
 // Resolve the file to pull
 if (workUnitState.getPropAsBoolean(ConfigurationKeys.SOURCE_CONN_USE_AUTHENTICATION, false)) {
  // Add authentication credential if authentication is needed
  UserAuthenticator auth =
    new StaticUserAuthenticator(workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_DOMAIN, ""),
      workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME), PasswordManager.getInstance(workUnitState)
        .readPassword(workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD)));
  FileSystemOptions opts = new FileSystemOptions();
  DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, auth);
  this.fileObject = VFS.getManager().resolveFile(workUnitState.getProp(SOURCE_FILE_KEY), opts);
 } else {
  this.fileObject = VFS.getManager().resolveFile(workUnitState.getProp(SOURCE_FILE_KEY));
 }
 // Open the file for reading
 LOGGER.info("Opening file " + this.fileObject.getURL().toString());
 this.bufferedReader =
   this.closer.register(new BufferedReader(new InputStreamReader(this.fileObject.getContent().getInputStream(),
     ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
}

@Override
public Schema convertSchema(JsonArray schema, WorkUnitState workUnit)
  throws SchemaConversionException {
 try {
  JsonSchema jsonSchema = new JsonSchema(schema);
  jsonSchema.setColumnName(workUnit.getExtract().getTable());
  recordConverter = new RecordConverter(jsonSchema, workUnit, workUnit.getExtract().getNamespace());
 } catch (UnsupportedDateTypeException e) {
  throw new SchemaConversionException(e);
 }
 Schema recordSchema = recordConverter.schema();
 if (workUnit
   .getPropAsBoolean(CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED, DEFAULT_CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED)) {
  return this.generateSchemaWithNullifiedField(workUnit, recordSchema);
 }
 return recordSchema;
}

Popular methods of WorkUnitState

getProp
<init>
setProp
contains
getProperties
getWorkunit
Get an ImmutableWorkUnit that wraps the internal WorkUnit.
getJobState
getPropAsInt
getActualHighWatermark
Get the actual high Watermark. If the WorkUnitState does not contain the actual high watermark (whic
setActualHighWatermark
This method should set the actual, runtime high Watermark for this WorkUnitState. A high Watermark i
setWorkingState
Set the current runtime state of the WorkUnit.
addAll

Popular in Java

Creating JSON documents from java classes using gson
scheduleAtFixedRate (ScheduledExecutorService)
addToBackStack (FragmentTransaction)
runOnUiThread (Activity)
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
JPanel (javax.swing)
Top plugins for Android Studio

How to use getPropAsBooleanmethodin org.apache.gobblin.configuration.WorkUnitState

Best Java code snippets using org.apache.gobblin.configuration.WorkUnitState.getPropAsBoolean (Showing top 20 results out of 315)

How to use
getPropAsBoolean
method
in
org.apache.gobblin.configuration.WorkUnitState