@Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(tableProcessTime); }
long baseRevision = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class, new Gson()).getValue(); if (baseRevision < 0) { try { workUnitState.setActualHighWatermark(new LongWatermark(this.lastRevisionId)); this.currentBatch = new LinkedList<>();
@Override public Object readRecord(@Deprecated Object reuse) throws DataRecordException, IOException { if (recordsExtracted < numRecordsPerExtract) { try { Thread.sleep(sleepTimePerRecord); } catch (InterruptedException e) { Throwables.propagate(e); } TestRecord record = new TestRecord(this.partition, this.currentWatermark.getValue(), null); log.debug("Extracted record -> {}", record); currentWatermark.increment(); recordsExtracted++; return record; } else { return null; } }
/** * Check if workunit needs to be created. Returns <code>true</code> If the * <code>updateTime</code> is greater than the <code>lowWatermark</code> and <code>maxLookBackTime</code> * <code>createTime</code> is not used. It exists for backward compatibility */ protected boolean shouldCreateWorkunit(long createTime, long updateTime, LongWatermark lowWatermark) { if (new DateTime(updateTime).isBefore(this.maxLookBackTime)) { return false; } return new DateTime(updateTime).isAfter(lowWatermark.getValue()); }
@Override public int compareTo(CheckpointableWatermark o) { Preconditions.checkArgument(o instanceof KafkaWatermark); KafkaWatermark ko = (KafkaWatermark) o; Preconditions.checkArgument(_topicPartition.equals(ko._topicPartition)); return _lwm.compareTo(ko._lwm); }
public boolean validateWatermarks(boolean exact, Map<String, CheckpointableWatermark> watermarkMap) { if (!watermarkMap.isEmpty()) { // watermark must be <= the index LongWatermark longWatermark = (LongWatermark) watermarkMap.values().iterator().next().getWatermark(); if (exact) { System.out.println(index-1 + ":" + longWatermark.getValue()); return ((index-1) == longWatermark.getValue()); } else { return (index > longWatermark.getValue()); } } return true; }
@Override public void close() throws IOException { LOG.info("Updating the current state high water mark with " + nextWatermark); this.wuState.setActualHighWatermark(new LongWatermark(nextWatermark)); closer.close(); }
public void testWatermarkComputation(Long committed, Long unacknowledged, Long expected) throws IOException { State state = new State(); state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TestPartitioner.class.getCanonicalName()); String defaultSource = "default"; WatermarkAwareWriter mockDataWriter = mock(WatermarkAwareWriter.class); when(mockDataWriter.isWatermarkCapable()).thenReturn(true); when(mockDataWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(committed)))); when(mockDataWriter.getUnacknowledgedWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(unacknowledged)))); PartitionAwareDataWriterBuilder builder = mock(PartitionAwareDataWriterBuilder.class); when(builder.validatePartitionSchema(any(Schema.class))).thenReturn(true); when(builder.forPartition(any(GenericRecord.class))).thenReturn(builder); when(builder.withWriterId(any(String.class))).thenReturn(builder); when(builder.build()).thenReturn(mockDataWriter); PartitionedDataWriter writer = new PartitionedDataWriter<String, String>(builder, state); RecordEnvelope<String> recordEnvelope = new RecordEnvelope<String>("0"); recordEnvelope.addCallBack( new AcknowledgableWatermark(new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(0)))); writer.writeEnvelope(recordEnvelope); Map<String, CheckpointableWatermark> watermark = writer.getCommittableWatermark(); System.out.println(watermark.toString()); if (expected == null) { Assert.assertTrue(watermark.isEmpty(), "Expected watermark to be absent"); } else { Assert.assertTrue(watermark.size() == 1); Assert.assertEquals((long) expected, ((LongWatermark) watermark.values().iterator().next().getWatermark()).getValue()); } }
public boolean validateWatermarks(boolean exact, Map<String, CheckpointableWatermark> watermarkMap) { if (!watermarkMap.isEmpty()) { // watermark must be <= the index LongWatermark longWatermark = (LongWatermark) watermarkMap.values().iterator().next().getWatermark(); if (exact) { System.out.println(index-1 + ":" + longWatermark.getValue()); return ((index-1) == longWatermark.getValue()); } else { return (index > longWatermark.getValue()); } } return true; }
@Override public Object readRecord(@Deprecated Object reuse) throws DataRecordException, IOException { if (recordsExtracted < numRecordsPerExtract) { try { Thread.sleep(sleepTimePerRecord); } catch (InterruptedException e) { Throwables.propagate(e); } TestRecord record = new TestRecord(this.partition, this.currentWatermark.getValue(), null); log.debug("Extracted record -> {}", record); currentWatermark.increment(); recordsExtracted++; return record; } else { return null; } }
public void serialize(WorkUnit workUnit) { workUnit.setWatermarkInterval( new WatermarkInterval(new LongWatermark(lowWatermark), new LongWatermark(highWatermark))); if (hasUserSpecifiedHighWatermark) { workUnit.setProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK, true); } if (isLastPartition) { workUnit.setProp(Partition.IS_LAST_PARTIITON, true); } }
LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract); WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark); workUnit = WorkUnit.create(newExtract(tableType, namespace, table), watermarkInterval); LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract); WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark); workUnit = WorkUnit.create(newExtract(tableType, namespace, table), watermarkInterval);
/** * Sets metadata to indicate whether this is the first time this table or partition is being published. * @param wus to set if this is first publish for this table or partition */ public static void setIsFirstPublishMetadata(WorkUnitState wus) { if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class); wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0)); } } }
@Override public void close() throws IOException { if (_current == _iterators.size()) { log.info(String.format("Successfully finished fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); _wuState.setActualHighWatermark(new LongWatermark(_expectedHighWaterMark)); } else { log.error(String.format("Had problems fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); } }
@Test public void testPersistWatermarkStateToZk() throws IOException { CheckpointableWatermark watermark = new DefaultCheckpointableWatermark("source", new LongWatermark(startTime)); TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setProp(ConfigurationKeys.JOB_NAME_KEY, "JobName-" + startTime); // watermark storage configuration taskState.setProp(StateStoreBasedWatermarkStorage.WATERMARK_STORAGE_TYPE_KEY, "zk"); taskState.setProp(StateStoreBasedWatermarkStorage.WATERMARK_STORAGE_CONFIG_PREFIX + ZkStateStoreConfigurationKeys.STATE_STORE_ZK_CONNECT_STRING_KEY, testingServer.getConnectString()); StateStoreBasedWatermarkStorage watermarkStorage = new StateStoreBasedWatermarkStorage(taskState); watermarkStorage.commitWatermarks(ImmutableList.of(watermark)); Map<String, CheckpointableWatermark> watermarkMap = watermarkStorage.getCommittedWatermarks(DefaultCheckpointableWatermark.class, ImmutableList.of("source")); Assert.assertEquals(watermarkMap.size(), 1); Assert.assertEquals(((LongWatermark) watermarkMap.get("source").getWatermark()).getValue(), startTime); }
public static Partition deserialize(WorkUnit workUnit) { long lowWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; long highWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; if (workUnit.getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY) != null) { lowWatermark = workUnit.getLowWatermark(LongWatermark.class).getValue(); highWatermark = workUnit.getExpectedHighWatermark(LongWatermark.class).getValue(); } return new Partition(lowWatermark, highWatermark, workUnit.getPropAsBoolean(Partition.IS_LAST_PARTIITON), workUnit.getPropAsBoolean(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK)); } }
/** * * {@inheritDoc} * * Uses the <code>table</code>'s modified time as watermark. The modified time is read using * {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @throws UpdateNotFoundException if there was an error fetching update time using {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Table, long) */ @Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(this.updateProvider.getUpdateTime(table)); }
long baseRevision = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class, new Gson()).getValue(); if (baseRevision < 0) { try { workUnitState.setActualHighWatermark(new LongWatermark(this.lastRevisionId)); this.currentBatch = new LinkedList<>();
@Override public void start(WatermarkStorage watermarkStorage) throws IOException { Preconditions.checkArgument(watermarkStorage != null, "Watermark Storage should not be null"); Map<String, CheckpointableWatermark> watermarkMap = watermarkStorage.getCommittedWatermarks(KafkaWatermark.class, Collections.singletonList(_partition.toString())); KafkaWatermark watermark = (KafkaWatermark) watermarkMap.get(_partition.toString()); if (watermark == null) { LOG.info("Offset is null - seeking to beginning of topic and partition for {} ", _partition.toString()); _consumer.seekToBeginning(_partition); } else { // seek needs to go one past the last committed offset LOG.info("Offset found in consumer for partition {}. Seeking to one past what we found : {}", _partition.toString(), watermark.getLwm().getValue() + 1); _consumer.seek(_partition, watermark.getLwm().getValue() + 1); } _isStarted.set(true); }
@Override public LongWatermark getPreviousHighWatermark(Table table) { if (this.tableWatermarks.containsKey(table.getCompleteName())) { return this.tableWatermarks.get(table.getCompleteName()); } return new LongWatermark(0); }