/** * Get the low {@link Watermark} as a {@link JsonElement}. * * @return a {@link JsonElement} representing the low {@link Watermark} or * {@code null} if the low {@link Watermark} is not set. */ public JsonElement getLowWatermark() { if (!contains(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)) { return null; } return JSON_PARSER.parse(getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)).getAsJsonObject() .get(WatermarkInterval.LOW_WATERMARK_TO_JSON_KEY); }
protected static double getWorkUnitEstSize(WorkUnit workUnit) { Preconditions.checkArgument(workUnit.contains(ESTIMATED_WORKUNIT_SIZE)); return workUnit.getPropAsDouble(ESTIMATED_WORKUNIT_SIZE); }
/** * Assumption is that all files in the input directory have the same schema */ @Override public Schema getSchema() { if (this.workUnit.contains(ConfigurationKeys.SOURCE_SCHEMA)) { return new Schema.Parser().parse(this.workUnit.getProp(ConfigurationKeys.SOURCE_SCHEMA)); } AvroFsHelper hfsHelper = (AvroFsHelper) this.fsHelper; if (this.filesToPull.isEmpty()) { return null; } try { return hfsHelper.getAvroSchema(this.filesToPull.get(0)); } catch (FileBasedHelperException e) { Throwables.propagate(e); return null; } } }
@Override public boolean contains(String key) { return super.contains(key) || this.workUnit.contains(key) || this.jobState.contains(key); }
@Override public boolean apply(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { Preconditions.checkArgument(!workUnit.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot be skipped"); for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { Preconditions.checkArgument(!wu.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot contain skipped WorkUnit"); } } if (workUnit.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_SKIP_KEY, false)) { WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState); workUnitState.setWorkingState(WorkUnitState.WorkingState.SKIPPED); this.jobState.addSkippedTaskState(new TaskState(workUnitState)); return false; } return true; } }
private void addTopicSpecificPropsToWorkUnit(WorkUnit workUnit, Map<String, State> topicSpecificStateMap) { if (workUnit instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { addTopicSpecificPropsToWorkUnit(wu, topicSpecificStateMap); } } else if (!workUnit.contains(TOPIC_NAME)) { return; } else { addDatasetUrnOptionally(workUnit); if (topicSpecificStateMap == null) { return; } else if (!topicSpecificStateMap.containsKey(workUnit.getProp(TOPIC_NAME))) { return; } else { workUnit.addAll(topicSpecificStateMap.get(workUnit.getProp(TOPIC_NAME))); } } }
private static void computeAndSetWorkUnitGuid(WorkUnit workUnit) throws IOException { Guid guid = Guid.fromStrings(workUnit.contains(ConfigurationKeys.CONVERTER_CLASSES_KEY) ? workUnit .getProp(ConfigurationKeys.CONVERTER_CLASSES_KEY) : ""); setWorkUnitGuid(workUnit, guid.append(deserializeCopyEntity(workUnit))); }
Preconditions.checkArgument(workUnit.contains(ComplianceConfigurationKeys.PARTITION_NAME), "Older WorkUnit doesn't contain property partition name."); int executionAttempts = workUnit.getPropAsInt(ComplianceConfigurationKeys.EXECUTION_ATTEMPTS,
if (previousWorkunits.get(0).getWorkunit().contains(ConfigurationKeys.SOURCE_FILEBASED_FS_SNAPSHOT)) { prevFsSnapshot.addAll(previousWorkunits.get(0).getWorkunit().getPropAsSet(ConfigurationKeys.SOURCE_FILEBASED_FS_SNAPSHOT)); } else if (state.getPropAsBoolean(ConfigurationKeys.SOURCE_FILEBASED_FS_PRIOR_SNAPSHOT_REQUIRED,
@Test public void testGetWorkUnitsForPartitions() throws Exception { String dbName = "testdb3"; String tableName = "testtable3"; String tableSdLoc = "/tmp/testtable3"; this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true); SourceState testState = getTestState(dbName); Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.of("field")); this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis()); List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState); // One workunit for the partition + 1 dummy watermark workunit Assert.assertEquals(workUnits.size(), 2); WorkUnit wu = workUnits.get(0); WorkUnit wu2 = workUnits.get(1); HiveWorkUnit hwu = null; if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) { hwu = new HiveWorkUnit(wu); } else { hwu = new HiveWorkUnit(wu2); } Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName); Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName); Assert.assertEquals(hwu.getPartitionName().get(), "field=f1"); }
@Test public void testJobStateNotCopiedToWorkUnit() { DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource(); SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY); state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2); state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN); state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print( this.startDateTime.minusMinutes(1))); state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY); state.setProp("date.partitioned.source.partition.prefix", PREFIX); state.setProp("date.partitioned.source.partition.suffix", SUFFIX); String dummyKey = "dummy.job.config"; state.setProp(dummyKey, "dummy"); List<WorkUnit> workunits = source.getWorkunits(state); Assert.assertEquals(workunits.size(), 4); for(WorkUnit wu : workunits) { if (wu instanceof MultiWorkUnit) { for (WorkUnit workUnit : ((MultiWorkUnit) wu).getWorkUnits()) { Assert.assertFalse(workUnit.contains(dummyKey)); } } else { Assert.assertFalse(wu.contains(dummyKey)); } } }
protected static double getWorkUnitEstSize(WorkUnit workUnit) { Preconditions.checkArgument(workUnit.contains(ESTIMATED_WORKUNIT_SIZE)); return workUnit.getPropAsDouble(ESTIMATED_WORKUNIT_SIZE); }
/** * Get the low {@link Watermark} as a {@link JsonElement}. * * @return a {@link JsonElement} representing the low {@link Watermark} or * {@code null} if the low {@link Watermark} is not set. */ public JsonElement getLowWatermark() { if (!contains(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)) { return null; } return JSON_PARSER.parse(getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)).getAsJsonObject() .get(WatermarkInterval.LOW_WATERMARK_TO_JSON_KEY); }
/** * Assumption is that all files in the input directory have the same schema */ @Override public Schema getSchema() { if (this.workUnit.contains(ConfigurationKeys.SOURCE_SCHEMA)) { return new Schema.Parser().parse(this.workUnit.getProp(ConfigurationKeys.SOURCE_SCHEMA)); } AvroFsHelper hfsHelper = (AvroFsHelper) this.fsHelper; if (this.filesToPull.isEmpty()) { return null; } try { return hfsHelper.getAvroSchema(this.filesToPull.get(0)); } catch (FileBasedHelperException e) { Throwables.propagate(e); return null; } } }
@Override public boolean contains(String key) { return super.contains(key) || this.workUnit.contains(key) || this.jobState.contains(key); }
@Override public boolean apply(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { Preconditions.checkArgument(!workUnit.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot be skipped"); for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { Preconditions.checkArgument(!wu.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot contain skipped WorkUnit"); } } if (workUnit.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_SKIP_KEY, false)) { WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState); workUnitState.setWorkingState(WorkUnitState.WorkingState.SKIPPED); this.jobState.addSkippedTaskState(new TaskState(workUnitState)); return false; } return true; } }
private void addTopicSpecificPropsToWorkUnit(WorkUnit workUnit, Map<String, State> topicSpecificStateMap) { if (workUnit instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { addTopicSpecificPropsToWorkUnit(wu, topicSpecificStateMap); } } else if (!workUnit.contains(TOPIC_NAME)) { return; } else { addDatasetUrnOptionally(workUnit); if (topicSpecificStateMap == null) { return; } else if (!topicSpecificStateMap.containsKey(workUnit.getProp(TOPIC_NAME))) { return; } else { workUnit.addAll(topicSpecificStateMap.get(workUnit.getProp(TOPIC_NAME))); } } }
private static void computeAndSetWorkUnitGuid(WorkUnit workUnit) throws IOException { Guid guid = Guid.fromStrings(workUnit.contains(ConfigurationKeys.CONVERTER_CLASSES_KEY) ? workUnit .getProp(ConfigurationKeys.CONVERTER_CLASSES_KEY) : ""); setWorkUnitGuid(workUnit, guid.append(deserializeCopyEntity(workUnit))); }
if (previousWorkunits.get(0).getWorkunit().contains(ConfigurationKeys.SOURCE_FILEBASED_FS_SNAPSHOT)) { prevFsSnapshot.addAll(previousWorkunits.get(0).getWorkunit().getPropAsSet(ConfigurationKeys.SOURCE_FILEBASED_FS_SNAPSHOT)); } else if (state.getPropAsBoolean(ConfigurationKeys.SOURCE_FILEBASED_FS_PRIOR_SNAPSHOT_REQUIRED,