/** * Get the low {@link Watermark}. A default {@link Gson} object will be used to deserialize the watermark. * * @param watermarkClass the watermark class for this {@code WorkUnit}. * @return the low watermark in this {@code WorkUnit}. */ public <T extends Watermark> T getLowWatermark(Class<T> watermarkClass) { return getLowWatermark(watermarkClass, GSON); }
/** * Get the low {@link Watermark}. * * @param watermarkClass the watermark class for this {@code WorkUnit}. * @param gson a {@link Gson} object used to deserialize the watermark. * @return the low watermark in this {@code WorkUnit}. */ public <T extends Watermark> T getLowWatermark(Class<T> watermarkClass, Gson gson) { JsonElement json = getLowWatermark(); if (json == null) { return null; } return gson.fromJson(json, watermarkClass); }
/** * Backoff the actual high watermark to the low watermark returned by {@link WorkUnit#getLowWatermark()}. */ public void backoffActualHighWatermark() { JsonElement lowWatermark = this.workUnit.getLowWatermark(); if (lowWatermark == null) { return; } setProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY, lowWatermark.toString()); }
/** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @param gson a {@link Gson} object used to deserialize the watermark. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass, Gson gson) { JsonElement json = getActualHighWatermark(); if (json == null) { json = this.workUnit.getLowWatermark(); if (json == null) { return null; } } return gson.fromJson(json, watermarkClass); }
/** * Sets metadata to indicate whether this is the first time this table or partition is being published. * @param wus to set if this is first publish for this table or partition */ public static void setIsFirstPublishMetadata(WorkUnitState wus) { if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class); wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0)); } } }
this.topicName = KafkaUtils.getTopicName(state); this.partitions = KafkaUtils.getPartitions(state); this.lowWatermark = state.getWorkunit().getLowWatermark(MultiLongWatermark.class); this.highWatermark = state.getWorkunit().getExpectedHighWatermark(MultiLongWatermark.class); this.nextWatermark = new MultiLongWatermark(this.lowWatermark);
LongWatermark waterMarkObj = previousWus.getWorkunit().getLowWatermark(LongWatermark.class);
MultiLongWatermark previousLowWatermark = workUnit.getLowWatermark(MultiLongWatermark.class); MultiLongWatermark previousExpectedHighWatermark = workUnit.getExpectedHighWatermark(MultiLongWatermark.class); Preconditions.checkArgument(partitions.size() == watermark.size(), String
long baseRevision = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class, new Gson()).getValue(); if (baseRevision < 0) { try {
public static Partition deserialize(WorkUnit workUnit) { long lowWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; long highWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; if (workUnit.getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY) != null) { lowWatermark = workUnit.getLowWatermark(LongWatermark.class).getValue(); highWatermark = workUnit.getExpectedHighWatermark(LongWatermark.class).getValue(); } return new Partition(lowWatermark, highWatermark, workUnit.getPropAsBoolean(Partition.IS_LAST_PARTIITON), workUnit.getPropAsBoolean(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK)); } }
new GoogleWebmasterExtractor(wuState, wuState.getWorkunit().getLowWatermark(LongWatermark.class).getValue(), wuState.getWorkunit().getExpectedHighWatermark(LongWatermark.class).getValue(), positionMap, dimensions, metrics, null, Arrays.asList(dataFetcher1, dataFetcher2));
@Override public Extractor<String, Object> getExtractor(WorkUnitState state) throws IOException { Config config = ConfigFactory.parseProperties(state.getProperties()); configureIfNeeded(config); final LongWatermark lowWatermark = state.getWorkunit().getLowWatermark(LongWatermark.class); final WorkUnitState workUnitState = state; final int index = state.getPropAsInt(WORK_UNIT_INDEX); final TestBatchExtractor extractor = new TestBatchExtractor(index, lowWatermark, numRecordsPerExtract, sleepTimePerRecord, workUnitState); if (!streaming) { return extractor; } else { return (Extractor) new TestStreamingExtractor(extractor); } }
List<Dataset> datasets1 = new ArrayList<>(); Assert.assertEquals(workUnits.get(0).getProp(ConfigurationKeys.DATASET_URN_KEY), "dataset1"); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(dataset1); Assert.assertEquals(workUnits.get(1).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(1).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(dataset2); Assert.assertEquals(workUnits.get(2).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(2).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(dataset3); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks2.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets2.add(dataset4); Assert.assertEquals(workUnits.get(1).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks2.add(workUnits.get(1).getExpectedHighWatermark(LongWatermark.class)); datasets2.add(dataset5); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), watermarks1.get(0).getValue()); watermarks3.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets3.add(dataset1); Assert.assertEquals(workUnits.get(1).getLowWatermark(LongWatermark.class).getValue(), watermarks1.get(1).getValue()); watermarks3.add(workUnits.get(1).getExpectedHighWatermark(LongWatermark.class)); datasets3.add(dataset2);
.setProfileId(Preconditions.checkNotNull(wuState.getProp(VIEW_ID), VIEW_ID + " is required")) .setTitle(Preconditions.checkNotNull(wuState.getProp(SOURCE_ENTITY), SOURCE_ENTITY + " is required.")) .setStartDate(convertFormat(wuState.getWorkunit().getLowWatermark(LongWatermark.class).getValue())) .setEndDate(convertFormat(wuState.getWorkunit().getExpectedHighWatermark(LongWatermark.class).getValue())) .setMetrics(Preconditions.checkNotNull(wuState.getProp(METRICS), METRICS + " is required."))
LongWatermark watermark = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class); LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract); WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark);
Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(dataset1); Assert.assertEquals(workUnits.get(1).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(1).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(new SimpleDatasetForTesting("dataset2@p1")); Assert.assertEquals(workUnits.get(2).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks1.add(workUnits.get(2).getExpectedHighWatermark(LongWatermark.class)); datasets1.add(new SimpleDatasetForTesting("dataset2@p2")); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks2.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets2.add(new SimpleDatasetForTesting("dataset2@p3")); Assert.assertEquals(workUnits.get(1).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks2.add(workUnits.get(1).getExpectedHighWatermark(LongWatermark.class)); datasets2.add(new SimpleDatasetForTesting("dataset3@p1")); Assert.assertEquals(workUnits.get(2).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks2.add(workUnits.get(2).getExpectedHighWatermark(LongWatermark.class)); datasets2.add(new SimpleDatasetForTesting("dataset3@p2")); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), 0); watermarks3.add(workUnits.get(0).getExpectedHighWatermark(LongWatermark.class)); datasets3.add(new SimpleDatasetForTesting("dataset3@p3")); Assert.assertEquals(workUnits.get(0).getLowWatermark(LongWatermark.class).getValue(), watermarks1.get(0).getValue());
@Test public void testWatermarkWorkUnitSerialization() { long lowWatermarkValue = 0; long expectedHighWatermarkValue = 100; TestWatermark lowWatermark = new TestWatermark(); lowWatermark.setLongWatermark(lowWatermarkValue); TestWatermark expectedHighWatermark = new TestWatermark(); expectedHighWatermark.setLongWatermark(expectedHighWatermarkValue); WatermarkInterval watermarkInterval = new WatermarkInterval(lowWatermark, expectedHighWatermark); WorkUnit workUnit = new WorkUnit(null, null, watermarkInterval); TestWatermark deserializedLowWatermark = WatermarkSerializerHelper.convertJsonToWatermark(workUnit.getLowWatermark(), TestWatermark.class); TestWatermark deserializedExpectedHighWatermark = WatermarkSerializerHelper.convertJsonToWatermark(workUnit.getExpectedHighWatermark(), TestWatermark.class); Assert.assertEquals(deserializedLowWatermark.getLongWatermark(), lowWatermarkValue); Assert.assertEquals(deserializedExpectedHighWatermark.getLongWatermark(), expectedHighWatermarkValue); }
/** * Get the low {@link Watermark}. A default {@link Gson} object will be used to deserialize the watermark. * * @param watermarkClass the watermark class for this {@code WorkUnit}. * @return the low watermark in this {@code WorkUnit}. */ public <T extends Watermark> T getLowWatermark(Class<T> watermarkClass) { return getLowWatermark(watermarkClass, GSON); }
/** * Backoff the actual high watermark to the low watermark returned by {@link WorkUnit#getLowWatermark()}. */ public void backoffActualHighWatermark() { JsonElement lowWatermark = this.workUnit.getLowWatermark(); if (lowWatermark == null) { return; } setProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY, lowWatermark.toString()); }
/** * Sets metadata to indicate whether this is the first time this table or partition is being published. * @param wus to set if this is first publish for this table or partition */ public static void setIsFirstPublishMetadata(WorkUnitState wus) { if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class); wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0)); } } }