@Override public LongWatermark apply(WorkUnitState wus) { return wus.getActualHighWatermark(LongWatermark.class); } });
@Override public LongWatermark apply(WorkUnitState w) { return w.getActualHighWatermark(LongWatermark.class); } }).toList();
/** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * <p>A default {@link Gson} object will be used to deserialize the watermark.</p> * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass) { return getActualHighWatermark(watermarkClass, GSON); }
/** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @param gson a {@link Gson} object used to deserialize the watermark. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass, Gson gson) { JsonElement json = getActualHighWatermark(); if (json == null) { json = this.workUnit.getLowWatermark(); if (json == null) { return null; } } return gson.fromJson(json, watermarkClass); }
waterMarkObj = previousWus.getActualHighWatermark(LongWatermark.class); if(waterMarkObj != null){ highWm = waterMarkObj.getValue();
} else { MultiKeyValueLongWatermark multiKeyValueLongWatermark = watermarkWorkUnits.get(0).getActualHighWatermark(MultiKeyValueLongWatermark.class); if (multiKeyValueLongWatermark != null) { this.previousWatermarks.setPartitionWatermarks(datasetWorkUnitStates.getKey(),
WorkUnit workUnit = workUnitState.getWorkunit(); MultiLongWatermark watermark = workUnitState.getActualHighWatermark(MultiLongWatermark.class); MultiLongWatermark previousLowWatermark = workUnit.getLowWatermark(MultiLongWatermark.class); MultiLongWatermark previousExpectedHighWatermark = workUnit.getExpectedHighWatermark(MultiLongWatermark.class);
@Test(dependsOnMethods = "testSerializeToSequenceFile") public void testDeserializeFromSequenceFile() throws IOException { Queue<WorkUnitState> workUnitStates = Queues.newConcurrentLinkedQueue(); Path seqPath1 = new Path(this.outputPath, "seq1"); Path seqPath2 = new Path(this.outputPath, "seq2"); try (ParallelRunner parallelRunner = new ParallelRunner(2, this.fs)) { parallelRunner.deserializeFromSequenceFile(Text.class, WorkUnitState.class, seqPath1, workUnitStates, true); parallelRunner.deserializeFromSequenceFile(Text.class, WorkUnitState.class, seqPath2, workUnitStates, true); } Assert.assertFalse(this.fs.exists(seqPath1)); Assert.assertFalse(this.fs.exists(seqPath2)); Assert.assertEquals(workUnitStates.size(), 2); for (WorkUnitState workUnitState : workUnitStates) { TestWatermark watermark = new Gson().fromJson(workUnitState.getActualHighWatermark(), TestWatermark.class); Assert.assertTrue(watermark.getLongWatermark() == 10L || watermark.getLongWatermark() == 100L); } }
@Test public void testWatermarkWorkUnitStateSerialization() { long actualHighWatermarkValue = 50; TestWatermark actualHighWatermark = new TestWatermark(); actualHighWatermark.setLongWatermark(actualHighWatermarkValue); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setActualHighWatermark(actualHighWatermark); TestWatermark deserializedActualHighWatermark = WatermarkSerializerHelper.convertJsonToWatermark(workUnitState.getActualHighWatermark(), TestWatermark.class); Assert.assertEquals(deserializedActualHighWatermark.getLongWatermark(), actualHighWatermarkValue); } }
if (workUnitState.getWorkingState().equals(WorkUnitState.WorkingState.COMMITTED)) LongWatermark watermark = workUnitState.getActualHighWatermark(LongWatermark.class); LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract); WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark);
@Override protected WorkUnit workUnitForDataset(Dataset dataset) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, dataset.getUrn()); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }
@Override protected WorkUnit workUnitForDatasetPartition(PartitionableDataset.DatasetPartition partition) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { String datasetUrn = partition.getDataset().getUrn()+"@"+partition.getUrn(); JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, datasetUrn); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }
@Override public LongWatermark apply(WorkUnitState w) { return w.getActualHighWatermark(LongWatermark.class); } }).toList();
@Override public LongWatermark apply(WorkUnitState wus) { return wus.getActualHighWatermark(LongWatermark.class); } });
/** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * <p>A default {@link Gson} object will be used to deserialize the watermark.</p> * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass) { return getActualHighWatermark(watermarkClass, GSON); }
/** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @param gson a {@link Gson} object used to deserialize the watermark. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass, Gson gson) { JsonElement json = getActualHighWatermark(); if (json == null) { json = this.workUnit.getLowWatermark(); if (json == null) { return null; } } return gson.fromJson(json, watermarkClass); }
waterMarkObj = previousWus.getActualHighWatermark(LongWatermark.class); if(waterMarkObj != null){ highWm = waterMarkObj.getValue();
WorkUnit workUnit = workUnitState.getWorkunit(); MultiLongWatermark watermark = workUnitState.getActualHighWatermark(MultiLongWatermark.class); MultiLongWatermark previousLowWatermark = workUnit.getLowWatermark(MultiLongWatermark.class); MultiLongWatermark previousExpectedHighWatermark = workUnit.getExpectedHighWatermark(MultiLongWatermark.class);
} else { MultiKeyValueLongWatermark multiKeyValueLongWatermark = watermarkWorkUnits.get(0).getActualHighWatermark(MultiKeyValueLongWatermark.class); if (multiKeyValueLongWatermark != null) { this.previousWatermarks.setPartitionWatermarks(datasetWorkUnitStates.getKey(),
if (workUnitState.getWorkingState().equals(WorkUnitState.WorkingState.COMMITTED)) LongWatermark watermark = workUnitState.getActualHighWatermark(LongWatermark.class); LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract); WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark);