org.apache.gobblin.configuration.WorkUnitState java code examples

/**
 * Get the actual high {@link Watermark} as a {@link JsonElement}.
 *
 * @return a {@link JsonElement} representing the actual high {@link Watermark},
 *         or {@code null} if the actual  high {@link Watermark} is not set.
 */
public JsonElement getActualHighWatermark() {
 if (!contains(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY)) {
  return null;
 }
 return JSON_PARSER.parse(getProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY));
}

/**
 * Copy WorkUnitState so that work unit also contains job state. FileBasedExtractor needs properties from job state (mostly source.* properties),
 * where it has been already removed when reached here.
 *
 * @param src
 * @return
 */
private WorkUnitState copyOf(WorkUnitState src) {
 WorkUnit copiedWorkUnit = WorkUnit.copyOf(src.getWorkunit());
 copiedWorkUnit.addAllIfNotExist(src.getJobState());
 WorkUnitState workUnitState = new WorkUnitState(copiedWorkUnit, src.getJobState());
 workUnitState.addAll(src);
 return workUnitState;
}

 /**
  * Sets metadata to indicate whether this is the first time this table or partition is being published.
  * @param wus to set if this is first publish for this table or partition
  */
 public static void setIsFirstPublishMetadata(WorkUnitState wus) {
  if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) {
   LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class);
   wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0));
  }
 }
}

public ImmutableWorkUnitState(WorkUnitState workUnitState) {
 super(workUnitState.getWorkunit(), workUnitState.getJobState());
 super.addAll(workUnitState.getSpecProperties());
}

public TaskState(WorkUnitState workUnitState) {
 // Since getWorkunit() returns an immutable WorkUnit object,
 // the WorkUnit object in this object is also immutable.
 super(workUnitState.getWorkunit(), workUnitState.getJobState(), workUnitState.getTaskBrokerNullable());
 addAll(workUnitState);
 this.jobId = workUnitState.getProp(ConfigurationKeys.JOB_ID_KEY);
 this.taskId = workUnitState.getProp(ConfigurationKeys.TASK_ID_KEY);
 this.taskKey = workUnitState.getProp(ConfigurationKeys.TASK_KEY_KEY, "unknown_task_key");
 this.taskAttemptId = Optional.fromNullable(workUnitState.getProp(ConfigurationKeys.TASK_ATTEMPT_ID_KEY));
 this.setId(this.taskId);
}

public void testConversionWithJsonTemplate() throws DataConversionException, IOException, JSONException {
 JsonParser parser = new JsonParser();
 String expectedResourceKey = "/sobject/user/John";
 String expectedJsonStr = "{ \"name\" : \"John\", \"favoriteNumber\" : 9, \"city\" : \"Mountain view\" }";
 RestEntry<JsonObject> expected = new RestEntry<JsonObject>(expectedResourceKey, parser.parse(expectedJsonStr).getAsJsonObject());
 WorkUnitState workUnitState = new WorkUnitState();
 workUnitState.setProp(AvroToRestJsonEntryConverter.CONVERTER_AVRO_REST_ENTRY_RESOURCE_KEY, "/sobject/user/${name}");
 String template = "name=${name},favoriteNumber=${favorite_number},city=${address.city}";
 workUnitState.setProp(AvroToRestJsonEntryConverter.CONVERTER_AVRO_REST_JSON_ENTRY_TEMPLATE, template);
 testConversion(expected, workUnitState);
}

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
 if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) {
  throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
 }
 JobConf jobConf = new JobConf(new Configuration());
 for (String key : workUnitState.getPropertyNames()) {
  jobConf.set(key, workUnitState.getProp(key));
 }
 String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY);
 FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
 FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf);
 RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
 boolean readKeys = workUnitState.getPropAsBoolean(
   HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS);
 return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
}

@Override
public Converter<String, String, String, List<String>> init(WorkUnitState workUnit) {
 String stringSplitterDelimiterKey =
   ForkOperatorUtils.getPropertyNameForBranch(workUnit, ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER);
 Preconditions.checkArgument(workUnit.contains(stringSplitterDelimiterKey),
   "Cannot use " + this.getClass().getName() + " with out specifying "
     + ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER);
 this.splitter = Splitter.on(workUnit.getProp(stringSplitterDelimiterKey));
 this.shouldTrimResults = workUnit.getPropAsBoolean(ConfigurationKeys.CONVERTER_STRING_SPLITTER_SHOULD_TRIM_RESULTS,
   ConfigurationKeys.DEFAULT_CONVERTER_STRING_SPLITTER_SHOULD_TRIM_RESULTS);
 return this;
}

private String readProp(String key, WorkUnitState workUnitState) {
 String value = workUnitState.getWorkunit().getProp(key);
 if (StringUtils.isBlank(value)) {
  value = workUnitState.getProp(key);
 }
 if (StringUtils.isBlank(value)) {
  value = workUnitState.getJobState().getProp(key);
 }
 return value;
}

@Test
public void testAppendsMetadataWithNormalRecord() throws IOException {
 state = new WorkUnitState();
 dummyWriter = new MetadataDummyWriter();
 writer = new MetadataWriterWrapper<>(dummyWriter, byte[].class, 1, 0, state.getJobState());
 byte[] recordBytes = new byte[]{'a', 'b', 'c', 'd'};
 writer.write(recordBytes);
 writer.commit();
 String writerMetadata = state.getProp(ConfigurationKeys.WRITER_METADATA_KEY);
 Assert.assertNotNull(writerMetadata, "Expected there to be metadata");
 Assert.assertNotEquals(writerMetadata.indexOf("\"default-encoding\""),
   -1, "Expected to find default metadata in metadata");
}

public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException {
 super(state);
 if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
  log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY));
  return;
 }
 try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
  Table table = client.get().getTable(this.dbName, this.tableName);
  String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY));
  List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString);
  String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA));
  List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString);
  Path dataFilesPath = new Path(table.getSd().getLocation());
  compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties());
 }
}

@Override
public String getSchema() throws IOException {
 // Source is responsible to set SOURCE_SCHEMA
 ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
 IOUtils.copyBytes(fs.open(
   new Path(workUnitState.getProp(ConfigurationKeys.SOURCE_SCHEMA))), outputStream, 4096, false);
 String schema = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);
 workUnitState.setProp((ConfigurationKeys.CONVERTER_AVRO_SCHEMA_KEY), schema);
 return schema;
}

 @Test
 public void testConvertRecord() throws DataConversionException {
  TextToStringConverter textToStringConverter =
    (TextToStringConverter) new TextToStringConverter().init(new WorkUnitState());
  Text text = new Text("test");
  Iterator<String> iterator = textToStringConverter.convertRecord(null, text, new WorkUnitState()).iterator();
  Assert.assertTrue(iterator.hasNext());
  String textString = iterator.next();
  Assert.assertEquals(textString, text.toString());
  Assert.assertFalse(iterator.hasNext());
 }
}

/**
 * Get iterator from protocol specific api if is.specific.api.active is false
 * Get iterator from source specific api if is.specific.api.active is true
 * @return iterator
 */
private Iterator<D> getIterator() throws DataRecordException, IOException {
 if (Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_SPECIFIC_API_ACTIVE))) {
  return this.getRecordSetFromSourceApi(this.schema, this.entity, this.workUnit, this.predicateList);
 }
 return this.getRecordSet(this.schema, this.entity, this.workUnit, this.predicateList);
}

public QueryBasedExtractor(WorkUnitState workUnitState) {
 this.workUnitState = workUnitState;
 this.workUnit = this.workUnitState.getWorkunit();
 this.schema = this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA);
 this.entity = this.workUnitState.getProp(ConfigurationKeys.SOURCE_ENTITY);
 partition = Partition.deserialize(workUnit);
 MDC.put("tableName", getWorkUnitName());
}

/**
 * Sets the actual high watermark by reading the expected high watermark
 * {@inheritDoc}
 * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#setActualHighWatermark(org.apache.gobblin.configuration.WorkUnitState)
 */
@Override
public void setActualHighWatermark(WorkUnitState wus) {
 if (Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) {
  wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(MultiKeyValueLongWatermark.class));
 } else {
  wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class));
 }
}

@Override
public Extractor<String, Object> getExtractor(WorkUnitState state)
  throws IOException {
 Config config = ConfigFactory.parseProperties(state.getProperties());
 configureIfNeeded(config);
 final LongWatermark lowWatermark = state.getWorkunit().getLowWatermark(LongWatermark.class);
 final WorkUnitState workUnitState = state;
 final int index = state.getPropAsInt(WORK_UNIT_INDEX);
 final TestBatchExtractor extractor = new TestBatchExtractor(index, lowWatermark, numRecordsPerExtract,
   sleepTimePerRecord, workUnitState);
 if (!streaming) {
  return extractor;
 } else {
  return (Extractor) new TestStreamingExtractor(extractor);
}
}

 @Test
 public void testWatermarkWorkUnitStateSerialization() {
  long actualHighWatermarkValue = 50;

  TestWatermark actualHighWatermark = new TestWatermark();
  actualHighWatermark.setLongWatermark(actualHighWatermarkValue);

  WorkUnitState workUnitState = new WorkUnitState();
  workUnitState.setActualHighWatermark(actualHighWatermark);

  TestWatermark deserializedActualHighWatermark =
    WatermarkSerializerHelper.convertJsonToWatermark(workUnitState.getActualHighWatermark(),
      TestWatermark.class);

  Assert.assertEquals(deserializedActualHighWatermark.getLongWatermark(), actualHighWatermarkValue);
 }
}

@BeforeMethod
public void setUp() {
 state = new WorkUnitState();
 dummyWriter = new DummyWriter();
 writer = new MetadataWriterWrapper<>(dummyWriter, byte[].class, 1, 0, state.getJobState());
}

protected void addWriterOutputToExistingDir(Path writerOutputDir, Path publisherOutputDir,
  WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner)
  throws IOException {
 boolean preserveFileName = workUnitState.getPropAsBoolean(ForkOperatorUtils
     .getPropertyNameForBranch(ConfigurationKeys.SOURCE_FILEBASED_PRESERVE_FILE_NAME, this.numBranches, branchId),
   false);
 // Go through each file in writerOutputDir and move it into publisherOutputDir
 for (FileStatus status : this.writerFileSystemByBranches.get(branchId).listStatus(writerOutputDir)) {
  // Preserve the file name if configured, use specified name otherwise
  Path finalOutputPath = preserveFileName ? new Path(publisherOutputDir, workUnitState.getProp(ForkOperatorUtils
    .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_NAME, this.numBranches, branchId)))
    : new Path(publisherOutputDir, status.getPath().getName());
  movePath(parallelRunner, workUnitState, status.getPath(), finalOutputPath, branchId);
 }
}

Javadoc

This class encapsulates a WorkUnit instance and additionally holds all the task runtime state of that WorkUnit.

Properties set in the encapsulated WorkUnit can be overridden at runtime, with the original values available through the #getWorkunit() method. Getters will return values set at task runtime if available, or the corresponding values from encapsulated WorkUnit if they are not set at task runtime.

Most used methods

getProp
<init>
setProp
contains
getProperties
getWorkunit
Get an ImmutableWorkUnit that wraps the internal WorkUnit.
getPropAsBoolean
getJobState
getPropAsInt
getActualHighWatermark
Get the actual high Watermark. If the WorkUnitState does not contain the actual high watermark (whic
setActualHighWatermark
This method should set the actual, runtime high Watermark for this WorkUnitState. A high Watermark i
setWorkingState
Set the current runtime state of the WorkUnit.

Popular in Java

Making http requests using okhttp
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (Timer)
getSupportFragmentManager (FragmentActivity)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
JList (javax.swing)
Top PhpStorm plugins

How to useWorkUnitState in org.apache.gobblin.configuration

Best Java code snippets using org.apache.gobblin.configuration.WorkUnitState (Showing top 20 results out of 504)

How to use
WorkUnitState
in
org.apache.gobblin.configuration