@Override public void emitTuples() { try { if (!available) { available = reader.advance(); } if (available) { OutputT data = reader.getCurrent(); Instant timestamp = reader.getCurrentTimestamp(); available = reader.advance(); if (traceTuples) { LOG.debug("\nemitting '{}' timestamp {}\n", data, timestamp); } output.emit( DataTuple.of( WindowedValue.of(data, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING))); } } catch (Exception e) { Throwables.propagateIfPossible(e); throw new RuntimeException(e); } } }
CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark(); checkpointMarks.add(mark); KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv =
@Override public void teardown() { try { if (reader != null) { reader.close(); } } catch (IOException e) { throw new RuntimeException(e); } }
.getLocalReaders() .stream() .anyMatch(reader -> reader.getWatermark().getMillis() == 0)) {
int numElements = 0; do { if (deduplicator.shouldOutput(reader.getCurrentRecordId())) { output.add( WindowedValue.timestampedValueInGlobalWindow( reader.getCurrent(), reader.getCurrentTimestamp())); } while (numElements < ARBITRARY_MAX_ELEMENTS && reader.advance()); Instant watermark = reader.getWatermark(); toClose.close(); WindowedValue.timestampedValueInGlobalWindow(residual, watermark))); } else { Instant watermark = reader.getWatermark(); if (watermark.isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE)) { UnboundedReader<?> toClose = reader; toClose.close(); } catch (final IOException closeEx) { if (ioe != null) { reader.close();
source.getCheckpointMarkCoder(), (KafkaCheckpointMark) reader.getCheckpointMark()); advanceOnce(reader, i > initialNumElements); expected.add(i); actual.add(reader.getCurrent().getKV().getValue());
CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark(); checkpointMarks.add(mark); KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv = KV.of(source, mark);
CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark(); checkpointMarks.add(mark); KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv = KV.of(source, mark);
final CheckpointMark oldMark = shard.getCheckpoint(); @SuppressWarnings("unchecked") final CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark(); if (oldMark != null) { oldMark.finalizeCheckpoint();
assertEquals(numToSkip - 1, (long) reader.getCurrent().getKV().getValue()); assertEquals(numToSkip - 1, reader.getCurrentTimestamp().getMillis()); source.getCheckpointMarkCoder(), (KafkaCheckpointMark) reader.getCheckpointMark()); reader = source.createReader(null, mark); assertEquals(i, (long) reader.getCurrent().getKV().getValue()); assertEquals(i, reader.getCurrentTimestamp().getMillis());
@Override public void onProcessingTime(long timestamp) throws Exception { if (this.isRunning) { synchronized (context.getCheckpointLock()) { // find minimum watermark over all localReaders long watermarkMillis = Long.MAX_VALUE; for (UnboundedSource.UnboundedReader<OutputT> reader: localReaders) { Instant watermark = reader.getWatermark(); if (watermark != null) { watermarkMillis = Math.min(watermark.getMillis(), watermarkMillis); } } context.emitWatermark(new Watermark(watermarkMillis)); if (watermarkMillis >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { this.isRunning = false; } } setNextWatermarkTimer(this.runtimeContext); } }
@Test public void testUnboundedSourceCheckpointMark() throws Exception { UnboundedSource<Long, CounterMark> source = CountingSource.unboundedWithTimestampFn(new ValueAsTimestampFn()); UnboundedReader<Long> reader = source.createReader(null, null); final long numToSkip = 3; assertTrue(reader.start()); // Advance the source numToSkip elements and manually save state. for (long l = 0; l < numToSkip; ++l) { reader.advance(); } // Confirm that we get the expected element in sequence before checkpointing. assertEquals(numToSkip, (long) reader.getCurrent()); assertEquals(numToSkip, reader.getCurrentTimestamp().getMillis()); // Checkpoint and restart, and confirm that the source continues correctly. CounterMark mark = CoderUtils.clone(source.getCheckpointMarkCoder(), (CounterMark) reader.getCheckpointMark()); reader = source.createReader(null, mark); assertTrue(reader.start()); // Confirm that we get the next element in sequence. assertEquals(numToSkip + 1, (long) reader.getCurrent()); assertEquals(numToSkip + 1, reader.getCurrentTimestamp().getMillis()); } }
@ProcessElement public void process( @Element Shard<T> shard, OutputReceiver<ValueWithRecordId<T>> out, PipelineOptions options) throws Exception { Instant endTime = shard.getMaxReadTime() == null ? null : Instant.now().plus(shard.getMaxReadTime()); if (shard.getMaxNumRecords() <= 0 || (shard.getMaxReadTime() != null && shard.getMaxReadTime().getMillis() == 0)) { return; } try (UnboundedSource.UnboundedReader<T> reader = SerializableUtils.clone(shard.getSource()).createReader(options, null)) { for (long i = 0L; i < shard.getMaxNumRecords(); ++i) { boolean available = (i == 0) ? reader.start() : reader.advance(); if (!available && !advanceWithBackoff(reader, endTime)) { break; } out.outputWithTimestamp( new ValueWithRecordId<T>(reader.getCurrent(), reader.getCurrentRecordId()), reader.getCurrentTimestamp()); } reader.getCheckpointMark().finalizeCheckpoint(); } }
@Override public void onProcessingTime(long timestamp) { if (this.isRunning) { synchronized (context.getCheckpointLock()) { // find minimum watermark over all localReaders long watermarkMillis = Long.MAX_VALUE; for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) { Instant watermark = reader.getWatermark(); if (watermark != null) { watermarkMillis = Math.min(watermark.getMillis(), watermarkMillis); } } context.emitWatermark(new Watermark(watermarkMillis)); if (shutdownOnFinalWatermark && watermarkMillis >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { this.isRunning = false; } } setNextWatermarkTimer(this.runtimeContext); } }
@Override public void onProcessingTime(long timestamp) { if (this.isRunning) { synchronized (context.getCheckpointLock()) { // find minimum watermark over all localReaders long watermarkMillis = Long.MAX_VALUE; for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) { Instant watermark = reader.getWatermark(); if (watermark != null) { watermarkMillis = Math.min(watermark.getMillis(), watermarkMillis); } } context.emitWatermark(new Watermark(watermarkMillis)); if (watermarkMillis >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { this.isRunning = false; } } setNextWatermarkTimer(this.runtimeContext); } }
@Override public Object readCurrent() { try { if (!isStarted) { isStarted = true; isCurrentAvailable = reader.start(); } else { isCurrentAvailable = reader.advance(); } } catch (final Exception e) { throw new RuntimeException(e); } if (isCurrentAvailable) { final O elem = reader.getCurrent(); return WindowedValue.timestampedValueInGlobalWindow(elem, reader.getCurrentTimestamp()); } else { throw new NoSuchElementException(); } }
@Override public Instant getWatermark() { if (reader instanceof UnboundedSource.UnboundedReader) { org.joda.time.Instant watermark = ((UnboundedSource.UnboundedReader) reader).getWatermark(); if (watermark.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) { return Watermark.MAX(); } else { return TranslatorUtils.jodaTimeToJava8Time(watermark); } } else { if (available) { return Watermark.MIN(); } else { return Watermark.MAX(); } } } }
/** Emit the current element from the given Reader. The reader is guaranteed to have data. */ private void emitElement( SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx, UnboundedSource.UnboundedReader<OutputT> reader) { // make sure that reader state update and element emission are atomic // with respect to snapshots OutputT item = reader.getCurrent(); byte[] recordId = reader.getCurrentRecordId(); Instant timestamp = reader.getCurrentTimestamp(); WindowedValue<ValueWithRecordId<OutputT>> windowedValue = WindowedValue.of( new ValueWithRecordId<>(item, recordId), timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); ctx.collectWithTimestamp(windowedValue, timestamp.getMillis()); }
/** * Emit the current element from the given Reader. The reader is guaranteed to have data. */ private void emitElement( SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx, UnboundedSource.UnboundedReader<OutputT> reader) { // make sure that reader state update and element emission are atomic // with respect to snapshots synchronized (ctx.getCheckpointLock()) { OutputT item = reader.getCurrent(); byte[] recordId = reader.getCurrentRecordId(); Instant timestamp = reader.getCurrentTimestamp(); WindowedValue<ValueWithRecordId<OutputT>> windowedValue = WindowedValue.of(new ValueWithRecordId<>(item, recordId), timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); ctx.collectWithTimestamp(windowedValue, timestamp.getMillis()); } }
private boolean advanceWithBackoff(UnboundedReader<T> reader, Instant endTime) throws IOException { // Try reading from the source with exponential backoff BackOff backoff = BACKOFF_FACTORY.backoff(); long nextSleep = backoff.nextBackOffMillis(); while (true) { if (nextSleep == BackOff.STOP || (endTime != null && Instant.now().isAfter(endTime))) { return false; } if (reader.advance()) { return true; } Uninterruptibles.sleepUninterruptibly(nextSleep, TimeUnit.MILLISECONDS); nextSleep = backoff.nextBackOffMillis(); } } }