@Override public void close() throws IOException { workUnitState.setActualHighWatermark(currentWatermark); }
@Override public void close() throws IOException { LOG.info("Updating the current state high water mark with " + nextWatermark); this.wuState.setActualHighWatermark(new LongWatermark(nextWatermark)); closer.close(); }
@Override public void close() throws IOException { if (_current == _iterators.size()) { log.info(String.format("Successfully finished fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); _wuState.setActualHighWatermark(new LongWatermark(_expectedHighWaterMark)); } else { log.error(String.format("Had problems fetching data from Google Search Console from %s to %s.", dateFormatter.print(_startDate), dateFormatter.print(_expectedHighWaterMarkDate))); } }
/** * close extractor read stream * update high watermark */ @Override public void close() { log.info("Updating the current state high water mark with " + this.highWatermark); this.workUnitState.setActualHighWatermark(new LongWatermark(this.highWatermark)); try { this.closeConnection(); } catch (Exception e) { log.error("Failed to close the extractor", e); } }
@Override public void setActualHighWatermark(WorkUnitState wus) { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class)); }
/** * Sets the actual high watermark by reading the expected high watermark * {@inheritDoc} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#setActualHighWatermark(org.apache.gobblin.configuration.WorkUnitState) */ @Override public void setActualHighWatermark(WorkUnitState wus) { if (Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(MultiKeyValueLongWatermark.class)); } else { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class)); } }
this.workUnitState.setActualHighWatermark(this.lowWatermark);
@Override public void close() throws IOException { if (currentPartitionIdx != INITIAL_PARTITION_IDX) { updateStatisticsForCurrentPartition(); } Map<KafkaPartition, Map<String, String>> tagsForPartitionsMap = Maps.newHashMap(); // Add error partition count and error message count to workUnitState this.workUnitState.setProp(ConfigurationKeys.ERROR_PARTITION_COUNT, this.errorPartitions.size()); this.workUnitState.setProp(ConfigurationKeys.ERROR_MESSAGE_UNDECODABLE_COUNT, this.undecodableMessageCount); for (int i = 0; i < this.partitions.size(); i++) { LOG.info(String.format("Actual high watermark for partition %s=%d, expected=%d", this.partitions.get(i), this.nextWatermark.get(i), this.highWatermark.get(i))); tagsForPartitionsMap.put(this.partitions.get(i), createTagsForPartition(i)); } this.workUnitState.setActualHighWatermark(this.nextWatermark); if (isInstrumentationEnabled()) { for (Map.Entry<KafkaPartition, Map<String, String>> eventTags : tagsForPartitionsMap.entrySet()) { new EventSubmitter.Builder(getMetricContext(), GOBBLIN_KAFKA_NAMESPACE).build() .submit(KAFKA_EXTRACTOR_TOPIC_METADATA_EVENT_NAME, eventTags.getValue()); } } this.closer.close(); }
@Test @SuppressWarnings("deprecation") public void testSerializeToSequenceFile() throws IOException { Closer closer = Closer.create(); Configuration conf = new Configuration(); WritableShimSerialization.addToHadoopConfiguration(conf); try { SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class)); Text key = new Text(); WorkUnitState workUnitState = new WorkUnitState(); TestWatermark watermark = new TestWatermark(); watermark.setLongWatermark(10L); workUnitState.setActualHighWatermark(watermark); writer1.append(key, workUnitState); SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class)); watermark.setLongWatermark(100L); workUnitState.setActualHighWatermark(watermark); writer2.append(key, workUnitState); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
@Test(expectedExceptions = IllegalStateException.class) public void testMoreThanOneWatermarkWorkunits() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l))); WorkUnitState previousWus2 = new WorkUnitState(); previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn"); previousWus2.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus2.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2016", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2)); // Expecting IllegalStateException new PartitionLevelWatermarker(state); }
@Test public void testWatermarkWorkUnitStateSerialization() { long actualHighWatermarkValue = 50; TestWatermark actualHighWatermark = new TestWatermark(); actualHighWatermark.setLongWatermark(actualHighWatermarkValue); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setActualHighWatermark(actualHighWatermark); TestWatermark deserializedActualHighWatermark = WatermarkSerializerHelper.convertJsonToWatermark(workUnitState.getActualHighWatermark(), TestWatermark.class); Assert.assertEquals(deserializedActualHighWatermark.getLongWatermark(), actualHighWatermarkValue); } }
@Test public void testDroppedPartitions() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus .setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", ""))); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); // partition 2015 replaces 2015-01 and 2015-02 Mockito.when(partition2015.getParameters()).thenReturn( ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02")); watermarker.onPartitionProcessBegin(partition2015, 0l, 0l); Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l)); }
public static WorkUnitState createWus(String dbName, String tableName, long watermark) { WorkUnitState wus = new WorkUnitState(); wus.setActualHighWatermark(new LongWatermark(watermark)); wus.setProp(ConfigurationKeys.DATASET_URN_KEY, dbName + "@" + tableName); wus.setProp(ConfigurationKeys.JOB_ID_KEY, "jobId"); return wus; } }
@Test public void testNoPreviousWatermarkWorkunits() throws Exception { // Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY set to true WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l))); // Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY not set (false) WorkUnitState previousWus2 = new WorkUnitState(); previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2"); previousWus2.setActualHighWatermark(new LongWatermark(101l)); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1); Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l)); }
@Test public void testPreviousState() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus.setActualHighWatermark(new LongWatermark(100l)); // Watermark will be lowest of 100l and 101l WorkUnitState previousWus1 = new WorkUnitState(); previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus1.setActualHighWatermark(new LongWatermark(101l)); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); TableLevelWatermarker watermarker = new TableLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l)); }
@Test public void testPreviousStateWithPartitionWatermark() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus.setActualHighWatermark(new LongWatermark(100l)); // Watermark workunits created by PartitionLevelWatermarker need to be ignored. WorkUnitState previousWus1 = new WorkUnitState(); previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus1.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus1.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("part1", 200l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); TableLevelWatermarker watermarker = new TableLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l)); }
@Test public void testReadPreviousWatermarks() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1); Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l, "2016", 101l)); // Make sure all the previousWatermarks are added into current expectedHighWatermarks Assert.assertEquals(watermarker.getPreviousWatermarks(), watermarker.getExpectedHighWatermarks()); }
@Test public void testGetPreviousHighWatermarkForPartition() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Table table = mockTable("test_dataset_urn"); Partition partition2015 = mockPartition(table, ImmutableList.of("2015")); Partition partition2016 = mockPartition(table, ImmutableList.of("2016")); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l)); Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l)); }
@Test public void testReadPreviousWatermarksMultipleTables() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn"); previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l))); WorkUnitState previousWus2 = new WorkUnitState(); previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2"); previousWus2.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true); previousWus2.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("01", 1l, "02", 2l))); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2)); PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state); Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 2); Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l, "2016", 101l)); Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn2"), ImmutableMap.of("01", 1l, "02", 2l)); // Make sure all the previousWatermarks are added into current expectedHighWatermarks Assert.assertEquals(watermarker.getPreviousWatermarks(), watermarker.getExpectedHighWatermarks()); }
/** * Make sure that all partitions get the same previous high watermark (table's watermark) */ @Test public void testPartitionWatermarks() throws Exception { WorkUnitState previousWus = new WorkUnitState(); previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table"); previousWus.setActualHighWatermark(new LongWatermark(100l)); SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus)); TableLevelWatermarker watermarker = new TableLevelWatermarker(state); Table mockTable = mockTable("test_table"); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l)); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l)); Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l)); }