public TableLevelWatermarker(State state) { this.tableWatermarks = Maps.newHashMap(); // Load previous watermarks in case of sourceState if (state instanceof SourceState) { SourceState sourceState = (SourceState)state; for (Map.Entry<String, Iterable<WorkUnitState>> datasetWorkUnitStates : sourceState .getPreviousWorkUnitStatesByDatasetUrns().entrySet()) { // Use the minimum of all previous watermarks for this dataset List<LongWatermark> previousWatermarks = FluentIterable.from(datasetWorkUnitStates.getValue()) .filter(Predicates.not(PartitionLevelWatermarker.WATERMARK_WORKUNIT_PREDICATE)) .transform(new Function<WorkUnitState, LongWatermark>() { @Override public LongWatermark apply(WorkUnitState w) { return w.getActualHighWatermark(LongWatermark.class); } }).toList(); if (!previousWatermarks.isEmpty()) { this.tableWatermarks.put(datasetWorkUnitStates.getKey(), Collections.min(previousWatermarks)); } } log.debug("Loaded table watermarks from previous state " + this.tableWatermarks); } }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Map<String, Iterable<WorkUnitState>> previousWorkUnits = state.getPreviousWorkUnitStatesByDatasetUrns(); List<String> titles = new LinkedList<>(Splitter.on(",").omitEmptyStrings(). splitToList(state.getProp(WikipediaExtractor.SOURCE_PAGE_TITLES)));
this.previousStartFetchEpochTimes.clear(); this.previousStopFetchEpochTimes.clear(); Map<String, Iterable<WorkUnitState>> workUnitStatesByDatasetUrns = state.getPreviousWorkUnitStatesByDatasetUrns();
public TableLevelWatermarker(State state) { this.tableWatermarks = Maps.newHashMap(); // Load previous watermarks in case of sourceState if (state instanceof SourceState) { SourceState sourceState = (SourceState)state; for (Map.Entry<String, Iterable<WorkUnitState>> datasetWorkUnitStates : sourceState .getPreviousWorkUnitStatesByDatasetUrns().entrySet()) { // Use the minimum of all previous watermarks for this dataset List<LongWatermark> previousWatermarks = FluentIterable.from(datasetWorkUnitStates.getValue()) .filter(Predicates.not(PartitionLevelWatermarker.WATERMARK_WORKUNIT_PREDICATE)) .transform(new Function<WorkUnitState, LongWatermark>() { @Override public LongWatermark apply(WorkUnitState w) { return w.getActualHighWatermark(LongWatermark.class); } }).toList(); if (!previousWatermarks.isEmpty()) { this.tableWatermarks.put(datasetWorkUnitStates.getKey(), Collections.min(previousWatermarks)); } } log.debug("Loaded table watermarks from previous state " + this.tableWatermarks); } }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Map<String, Iterable<WorkUnitState>> previousWorkUnits = state.getPreviousWorkUnitStatesByDatasetUrns(); List<String> titles = new LinkedList<>(Splitter.on(",").omitEmptyStrings(). splitToList(state.getProp(WikipediaExtractor.SOURCE_PAGE_TITLES)));
.getPreviousWorkUnitStatesByDatasetUrns().entrySet()) {
this.previousStartFetchEpochTimes.clear(); this.previousStopFetchEpochTimes.clear(); Map<String, Iterable<WorkUnitState>> workUnitStatesByDatasetUrns = state.getPreviousWorkUnitStatesByDatasetUrns();