/** * Fetches the value of a watermark given its key from the previous run. */ protected static String getWatermarkFromPreviousWorkUnits(SourceState state, String watermark) { if (state.getPreviousWorkUnitStates().isEmpty()) { return ComplianceConfigurationKeys.NO_PREVIOUS_WATERMARK; } return state.getPreviousWorkUnitStates().get(0) .getProp(watermark, ComplianceConfigurationKeys.NO_PREVIOUS_WATERMARK); } }
for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { List<KafkaPartition> partitions = KafkaUtils.getPartitions(workUnitState); WorkUnit workUnit = workUnitState.getWorkunit();
private void readPreAvgRecordSizes(SourceState state) { this.estAvgSizes.clear(); for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { List<KafkaPartition> partitions = KafkaUtils.getPartitions(workUnitState); for (KafkaPartition partition : partitions) { if (KafkaUtils.containsPartitionAvgRecordSize(workUnitState, partition)) { long previousAvgSize = KafkaUtils.getPartitionAvgRecordSize(workUnitState, partition); this.estAvgSizes.put(partition, previousAvgSize); } } } } }
private static SourceState getCombinedState(SourceState state, State tableSpecificState) { if (tableSpecificState == null) { return state; } SourceState combinedState = new SourceState(state, state.getPreviousDatasetStatesByUrns(), state.getPreviousWorkUnitStates()); combinedState.addAll(tableSpecificState); return combinedState; }
Map<String, List<Double>> prevAvgMillis = Maps.newHashMap(); for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { List<KafkaPartition> partitions = KafkaUtils.getPartitions(workUnitState); for (KafkaPartition partition : partitions) {
boolean commitOnFullSuccess = JobCommitPolicy.getCommitPolicy(state) == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS; for (WorkUnitState previousWus : state.getPreviousWorkUnitStates()) { Optional<SourceEntity> sourceEntity = SourceEntity.fromState(previousWus); if (!sourceEntity.isPresent()) {
return; if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) { return; for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { if (workUnitState.getWorkingState() == WorkUnitState.WorkingState.COMMITTED) { continue;
.getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN) : state.getPreviousWorkUnitStates();
public void testSkipWorkUnitPersistence(SourceState state) { if (!state.getPropAsBoolean(TEST_WORKUNIT_PERSISTENCE)) { return; } int skipCount = 0; for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { if (workUnitState.getWorkingState() == WorkUnitState.WorkingState.SKIPPED) { skipCount++; } } Assert.assertEquals(skipCount, NUMBER_OF_SKIP_WORKUNITS, "All skipped work units are not persisted in the state store"); }
List<WorkUnitState> previousWorkunits = Lists.newArrayList(state.getPreviousWorkUnitStates()); Set<String> prevFsSnapshot = Sets.newHashSet();
if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) { return ImmutableList.of(); for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { if (workUnitState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { if (state.getPropAsBoolean(ConfigurationKeys.OVERWRITE_CONFIGS_IN_STATESTORE,
/** * Gobblin calls the {@link Source#getWorkunits(SourceState)} method after creating a {@link Source} object with a * blank constructor, so any custom initialization of the object needs to be done here. */ protected void init(SourceState state) { retriever.init(state); try { initFileSystemHelper(state); } catch (FileBasedHelperException e) { Throwables.propagate(e); } AvroFsHelper fsHelper = (AvroFsHelper) this.fsHelper; this.fs = fsHelper.getFileSystem(); this.sourceState = state; this.lowWaterMark = getLowWaterMark(state.getPreviousWorkUnitStates(), state.getProp(DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE, String.valueOf(DEFAULT_DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE))); this.maxFilesPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB); this.maxWorkUnitsPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB); this.tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); this.fileCount = 0; this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY)); }
super.setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId); for (WorkUnitState pre : state.getPreviousWorkUnitStates()) { Extract previousExtract = pre.getWorkunit().getExtract(); if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) {
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
@Override public List<WorkUnit> getWorkunits(SourceState sourceState) { sourceState.setProp(FOO, BAR); if (Iterables.isEmpty(sourceState.getPreviousWorkUnitStates())) { return initializeWorkUnits(); } List<WorkUnit> workUnits = Lists.newArrayList(); for (WorkUnitState workUnitState : sourceState.getPreviousWorkUnitStates()) { WorkUnit workUnit = WorkUnit.create(createExtract(Extract.TableType.SNAPSHOT_ONLY, NAMESPACE, TABLE)); workUnit.setLowWaterMark(workUnitState.getPropAsInt(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY) + NUM_WORK_UNITS * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR); workUnit.setHighWaterMark(workUnitState.getPropAsInt(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY) + NUM_WORK_UNITS * NUM_RECORDS_TO_EXTRACT_PER_EXTRACTOR); workUnit.setProp(WORK_UNIT_INDEX_KEY, workUnitState.getPropAsInt(WORK_UNIT_INDEX_KEY)); workUnits.add(workUnit); } return workUnits; }
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnits.subList(workUnitSize - 1, workUnitSize).stream().map(WorkUnitState::new).collect(Collectors.toList()); SourceState sourceStateSpy = Mockito.spy(sourceState); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN);
@Override public List<WorkUnit> getWorkunits(SourceState state) { configureIfNeeded(ConfigFactory.parseProperties(state.getProperties())); final List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates(); if (!previousWorkUnitStates.isEmpty())
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN); workUnitStates = workUnits.subList(workUnitSize - 1, workUnitSize).stream().map(WorkUnitState::new).collect(Collectors.toList()); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN); workUnitStates = workUnits.subList(workUnitSize - 1, workUnitSize).stream().map(WorkUnitState::new).collect(Collectors.toList()); Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates(ConfigurationKeys.GLOBAL_WATERMARK_DATASET_URN);
private static SourceState getCombinedState(SourceState state, State tableSpecificState) { if (tableSpecificState == null) { return state; } SourceState combinedState = new SourceState(state, state.getPreviousDatasetStatesByUrns(), state.getPreviousWorkUnitStates()); combinedState.addAll(tableSpecificState); return combinedState; }