@Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(tableProcessTime); }
@Override public void close() throws IOException { LOG.info("Updating the current state high water mark with " + nextWatermark); this.wuState.setActualHighWatermark(new LongWatermark(nextWatermark)); closer.close(); }
public void serialize(WorkUnit workUnit) { workUnit.setWatermarkInterval( new WatermarkInterval(new LongWatermark(lowWatermark), new LongWatermark(highWatermark))); if (hasUserSpecifiedHighWatermark) { workUnit.setProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK, true); } if (isLastPartition) { workUnit.setProp(Partition.IS_LAST_PARTIITON, true); } }
@Override public void close() throws IOException { if (_current == _iterators.size()) { log.info(String.format("Successfully finished fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); _wuState.setActualHighWatermark(new LongWatermark(_expectedHighWaterMark)); } else { log.error(String.format("Had problems fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); } }
/** * * {@inheritDoc} * * Uses the <code>table</code>'s modified time as watermark. The modified time is read using * {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @throws UpdateNotFoundException if there was an error fetching update time using {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Table, long) */ @Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(this.updateProvider.getUpdateTime(table)); }
@Override public LongWatermark getPreviousHighWatermark(Table table) { if (this.tableWatermarks.containsKey(table.getCompleteName())) { return this.tableWatermarks.get(table.getCompleteName()); } return new LongWatermark(0); }
/** * close extractor read stream * update high watermark */ @Override public void close() { log.info("Updating the current state high water mark with " + this.highWatermark); this.workUnitState.setActualHighWatermark(new LongWatermark(this.highWatermark)); try { this.closeConnection(); } catch (Exception e) { log.error("Failed to close the extractor", e); } }
@Override public RecordEnvelope<Object> readRecordEnvelope() throws DataRecordException, IOException { TestRecord record = (TestRecord) extractor.readRecord(null); return new RecordEnvelope<>((Object) record, new DefaultCheckpointableWatermark(""+record.getPartition(), new LongWatermark(record.getSequence()))); }
@Override public Map<String, CheckpointableWatermark> getCommittableWatermark() { watermark++; return Collections.singletonMap("default", (CheckpointableWatermark) new DefaultCheckpointableWatermark("default", new LongWatermark(watermark))); }
@Override public Map<String, CheckpointableWatermark> getCommittableWatermark() { watermark++; if (watermark % failEvery == 0) { throw new RuntimeException("Failed because you asked me to"); } return Collections.singletonMap("default", (CheckpointableWatermark) new DefaultCheckpointableWatermark("default", new LongWatermark(watermark))); }
private void commits(WatermarkTracker watermarkTracker, String source, int... commit) { for (int oneCommit: commit) { watermarkTracker.committedWatermark(new DefaultCheckpointableWatermark(source, new LongWatermark(oneCommit))); } }
public void run () { TopicPartition tP = new TopicPartition(topic, 0); KafkaSimpleStreamingExtractor.KafkaWatermark kwm = new KafkaSimpleStreamingExtractor.KafkaWatermark(tP, new LongWatermark(0)); byte[] reuse = new byte[1]; RecordEnvelope<byte[]> oldRecord = new RecordEnvelope<>(reuse, kwm); try { RecordEnvelope<byte[]> record = kSSE.readRecordEnvelope(); } catch (Exception e) { Assert.assertTrue((e instanceof WakeupException) || (e instanceof ClosedChannelException)); } } };
/** * Get the expected high watermark for this partition * {@inheritDoc} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Partition, long, long) */ @Override public LongWatermark getExpectedHighWatermark(Partition partition, long tableProcessTime, long partitionProcessTime) { return new LongWatermark(this.expectedHighWatermarks.getPartitionWatermark(tableKey(partition.getTable()), partitionKey(partition))); }
public static WorkUnitState getWorkUnitState1() { WorkUnit wu = new WorkUnit(new Extract(Extract.TableType.APPEND_ONLY, "namespace", "table")); wu.setWatermarkInterval( new WatermarkInterval(new LongWatermark(20160101235959L), new LongWatermark(20160102235959L))); State js = new State(); return new WorkUnitState(wu, js); } }
@Test public void testNoWhitelist() throws Exception { BackfillHiveSource backfillHiveSource = new BackfillHiveSource(); SourceState state = new SourceState(); backfillHiveSource.initBackfillHiveSource(state); Partition sourcePartition = Mockito.mock(Partition.class, Mockito.RETURNS_SMART_NULLS); Assert.assertTrue(backfillHiveSource.shouldCreateWorkunit(sourcePartition, new LongWatermark(0))); }
private void writeEnvelope(ConsoleWriter consoleWriter, String content, String source, long value) throws IOException { CheckpointableWatermark watermark = new DefaultCheckpointableWatermark(source, new LongWatermark(value)); AcknowledgableWatermark ackable = new AcknowledgableWatermark(watermark); RecordEnvelope<String> mockEnvelope = (RecordEnvelope<String>) new RecordEnvelope<>(content).addCallBack(ackable); consoleWriter.writeEnvelope(mockEnvelope); Assert.assertTrue(ackable.isAcked()); }
@Test public void testPreviousState() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus.setActualHighWatermark(new LongWatermark(100l)); // Watermark will be lowest of 100l and 101l WorkUnitState previousWus1 = new WorkUnitState(); previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus1.setActualHighWatermark(new LongWatermark(101l)); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); TableLevelWatermarker watermarker = new TableLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l)); }
@Test public void testGetPreviousHighWatermarkForPartition() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); Partition partition2016 = mockPartition(table, ImmutableList.of("2016")); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l)); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l)); }
public static WorkUnitState createWus(String dbName, String tableName, long watermark) { WorkUnitState wus = new WorkUnitState(); wus.setActualHighWatermark(new LongWatermark(watermark)); wus.setProp(ConfigurationKeys.DATASET_URN_KEY, dbName + "@" + tableName); wus.setProp(ConfigurationKeys.JOB_ID_KEY, "jobId"); return wus; } }
@Test public void testPreviousStateWithPartitionWatermark() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus.setActualHighWatermark(new LongWatermark(100l)); // Watermark workunits created by PartitionLevelWatermarker need to be ignored. WorkUnitState previousWus1 = new WorkUnitState(); previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus1.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus1.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("part1", 200l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); TableLevelWatermarker watermarker = new TableLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l)); }