/** * Returns the policy object used by the Core API processors to drive the * sliding window logic. * */ public SlidingWindowPolicy toSlidingWindowPolicy() { return slidingWinPolicy(windowSize, slideBy); } }
/** * Converts this definition to one defining a tumbling window of the * same length as this definition's frame. */ public SlidingWindowPolicy toTumblingByFrame() { return new SlidingWindowPolicy(frameSize, frameOffset, 1); }
/** * Returns the lowest frame timestamp greater than the given timestamp. If * there is no such {@code long} value, returns {@code Long.MAX_VALUE}. */ public long higherFrameTs(long timestamp) { long tsPlusFrame = timestamp + frameSize; return sumHadOverflow(timestamp, frameSize, tsPlusFrame) ? addClamped(floorFrameTs(timestamp), frameSize) : floorFrameTs(tsPlusFrame); }
private Map<K, A> recomputeWindow(long frameTs) { Map<K, A> window = new HashMap<>(); for (long ts = frameTs - winPolicy.windowSize() + winPolicy.frameSize(); ts <= frameTs; ts += winPolicy.frameSize() ) { assert combineFn != null : "combineFn == null"; for (Entry<K, A> entry : tsToKeyToAcc.getOrDefault(ts, emptyMap()).entrySet()) { combineFn.accept( window.computeIfAbsent(entry.getKey(), k -> aggrOp.createFn().get()), entry.getValue()); } } return window; }
private void completeWindow(long frameTs) { long frameToEvict = frameTs - winPolicy.windowSize() + winPolicy.frameSize(); Map<K, A> evictedFrame = tsToKeyToAcc.remove(frameToEvict); if (evictedFrame != null) { lazyAdd(totalKeysInFrames, -evictedFrame.size()); lazyAdd(totalFrames, -1); if (!winPolicy.isTumbling() && aggrOp.deductFn() != null) { // deduct trailing-edge frame patchSlidingWindow(aggrOp.deductFn(), evictedFrame); } } assert tsToKeyToAcc.values().stream().mapToInt(Map::size).sum() == totalKeysInFrames.get() : "totalKeysInFrames mismatch, expected=" + tsToKeyToAcc.values().stream().mapToInt(Map::size).sum() + ", actual=" + totalKeysInFrames.get(); }
@Override public boolean finishSnapshotRestore() { // In the first stage we should theoretically have saved `nextWinToEmit` // to the snapshot. We don't bother since the first stage is effectively a // tumbling window and it makes no difference in that case. So we don't // restore and remain at MIN_VALUE. if (isLastStage) { // if nextWinToEmit is not on frame boundary, push it to next boundary nextWinToEmit = minRestoredNextWinToEmit > Long.MIN_VALUE ? winPolicy.higherFrameTs(minRestoredNextWinToEmit - 1) : minRestoredNextWinToEmit; logFine(getLogger(), "Restored nextWinToEmit from snapshot to: %s", nextWinToEmit); // Delete too old restored frames. This can happen when restoring from exported state and new job // has smaller window size if (nextWinToEmit > Long.MIN_VALUE + winPolicy.windowSize()) { for (long ts = minRestoredFrameTs; ts <= nextWinToEmit - winPolicy.windowSize(); ts += winPolicy.frameSize()) { Map<K, A> removed = tsToKeyToAcc.remove(ts); if (removed != null) { lazyAdd(totalFrames, -1); lazyAdd(totalKeysInFrames, -removed.size()); } } } } return true; }
private Traverser<Object> windowTraverserAndEvictor(long wm) { long rangeStart; if (nextWinToEmit != Long.MIN_VALUE) { rangeStart = nextWinToEmit; } else { if (tsToKeyToAcc.isEmpty()) { // no item was observed, but initialize nextWinToEmit to the next window return Traversers.empty(); } // This is the first watermark we are acting upon. Find the lowest frame // timestamp that can be emitted: at most the top existing timestamp lower // than wm, but even lower than that if there are older frames on record. // The above guarantees that the sliding window can be correctly // initialized using the "add leading/deduct trailing" approach because we // start from a window that covers at most one existing frame -- the lowest // one on record. long bottomTs = tsToKeyToAcc .keySet().stream() .min(naturalOrder()) .orElseThrow(() -> new AssertionError("Failed to find the min key in a non-empty map")); rangeStart = min(bottomTs, winPolicy.floorFrameTs(wm)); } return traverseStream(range(rangeStart, wm, winPolicy.frameSize()).boxed()) .flatMap(winEnd -> traverseIterable(computeWindow(winEnd).entrySet()) .map(e -> mapToOutputFn.apply( winEnd - winPolicy.windowSize(), winEnd, e.getKey(), aggrOp.finishFn().apply(e.getValue()))) .onFirstNull(() -> completeWindow(winEnd))); }
/** * The partition count is initially set to 0, call * {@link #increasePartitionCount} to set it. * * @param eventTimePolicy event time policy as passed in {@link * Sources#streamFromProcessorWithWatermarks} **/ public EventTimeMapper(EventTimePolicy<? super T> eventTimePolicy) { this.idleTimeoutNanos = MILLISECONDS.toNanos(eventTimePolicy.idleTimeoutMillis()); this.timestampFn = eventTimePolicy.timestampFn(); this.wrapFn = eventTimePolicy.wrapFn(); this.newWmPolicyFn = eventTimePolicy.newWmPolicyFn(); if (eventTimePolicy.watermarkThrottlingFrameSize() != 0) { this.watermarkThrottlingFrame = tumblingWinPolicy(eventTimePolicy.watermarkThrottlingFrameSize()) .withOffset(eventTimePolicy.watermarkThrottlingFrameOffset()); } else { this.watermarkThrottlingFrame = null; } }
checkTrue(keyFns.size() == aggrOp.arity(), keyFns.size() + " key functions " + "provided for " + aggrOp.arity() + "-arity aggregate operation"); if (!winPolicy.isTumbling()) { requireNonNull(aggrOp.combineFn(), "AggregateOperation.combineFn is required for sliding windows"); wm -> windowTraverserAndEvictor(wm.timestamp()) .append(wm) .onFirstNull(() -> nextWinToEmit = winPolicy.higherFrameTs(wm.timestamp())) ); this.emptyAcc = aggrOp.createFn().get();
private static DistributedToLongFunction<Object> toFrameTimestampFn( @Nonnull DistributedToLongFunction<?> timestampFnX, @Nonnull TimestampKind timestampKind, @Nonnull SlidingWindowPolicy winPolicy ) { @SuppressWarnings("unchecked") DistributedToLongFunction<Object> timestampFn = (DistributedToLongFunction<Object>) timestampFnX; return timestampKind == EVENT ? item -> winPolicy.higherFrameTs(timestampFn.applyAsLong(item)) : item -> winPolicy.higherFrameTs(timestampFn.applyAsLong(item) - 1); }
timestampFns, timestampKind, winPolicy.toTumblingByFrame(), aggrOp.withIdentityFinish(), TimestampedEntry::fromWindowResult,
private Map<K, A> computeWindow(long frameTs) { if (winPolicy.isTumbling()) { return tsToKeyToAcc.getOrDefault(frameTs, emptyMap()); } if (aggrOp.deductFn() == null) { return recomputeWindow(frameTs); } if (slidingWindow == null) { slidingWindow = recomputeWindow(frameTs); } else { // add leading-edge frame patchSlidingWindow(aggrOp.combineFn(), tsToKeyToAcc.get(frameTs)); } return slidingWindow; }
private boolean flushBuffers() { if (flushTraverser == null) { if (tsToKeyToAcc.isEmpty()) { return true; } flushTraverser = windowTraverserAndEvictor(topTs + winPolicy.windowSize() - winPolicy.frameSize()) .onFirstNull(() -> flushTraverser = null); } return emitFromTraverser(flushTraverser); }
long higherFrameTs = winPolicy.higherFrameTs(k.timestamp - 1); if (higherFrameTs != k.timestamp) { if (!badFrameRestored) {
/** * Returns the definition of a tumbling window of length {@code * windowSize}. The tumbling window is a special case of the sliding * window with {@code slideBy = windowSize}. Given {@code * windowSize = 4}, the generated windows would cover timestamps {@code * ..., [-4, 0), [0..4), [4..8), ...} */ public static SlidingWindowPolicy tumblingWinPolicy(long windowSize) { return slidingWinPolicy(windowSize, windowSize); } }
@Override protected boolean tryProcess(int ordinal, @Nonnull Object item) { @SuppressWarnings("unchecked") final long frameTs = frameTimestampFns.get(ordinal).applyAsLong(item); assert frameTs == winPolicy.floorFrameTs(frameTs) : "getFrameTsFn returned an invalid frame timestamp"; // Ensure the event isn't late. We don't allow a "partially late" event: // one which belongs to some windows that are already emitted, even though // we still have the frame it belongs to. Such frames were already combined // into `slidingWindow` and we can't modify the value because that would // disturb the value that we'll deduct from `slidingWindow` later on. if (frameTs < nextWinToEmit) { logLateEvent(getLogger(), nextWinToEmit, item); lazyIncrement(lateEventsDropped); return true; } final K key = keyFns.get(ordinal).apply(item); A acc = tsToKeyToAcc .computeIfAbsent(frameTs, createMapPerTsFunction) .computeIfAbsent(key, createAccFunction); aggrOp.accumulateFn(ordinal).accept(acc, item); topTs = max(topTs, frameTs); return true; }
/** * Returns a new window definition where all the frames are shifted by the * given offset. More formally, it specifies the value of the lowest * non-negative frame timestamp. * <p> * Given a tumbling window of {@code windowLength = 4}, with no offset the * windows would cover the timestamps {@code ..., [-4, 0), [0..4), ...} * With {@code offset = 2} they will cover {@code ..., [-2, 2), [2..6), * ...} */ public SlidingWindowPolicy withOffset(long offset) { return new SlidingWindowPolicy(frameSize, offset, windowSize / frameSize); }
DistributedToLongFunction<? super Trade> timestampFn = Trade::getTime; DistributedFunction<? super Trade, ?> keyFn = Trade::getTicker; SlidingWindowPolicy winPolicy = slidingWinPolicy( SLIDING_WINDOW_LENGTH_MILLIS, SLIDE_STEP_MILLIS);
long newWm = watermarkThrottlingFrame != null ? watermarkThrottlingFrame.floorFrameTs(min) : Long.MIN_VALUE; if (newWm > lastEmittedWm) { traverser.append(new Watermark(newWm));
/** * Returns the definition of a sliding window of length {@code * windowSize} that slides by {@code slideBy}. Given {@code * windowSize = 4} and {@code slideBy = 2}, the generated windows would * cover timestamps {@code ..., [-2, 2), [0..4), [2..6), [4..8), [6..10), * ...} * <p> * Since the window will be computed internally by maintaining {@link * SlidingWindowPolicy frames} of size equal to the sliding step, the * configured window length must be an integer multiple of the sliding * step. * * @param windowSize the length of the window, must be a multiple of {@code slideBy} * @param slideBy the amount to slide the window by */ public static SlidingWindowPolicy slidingWinPolicy(long windowSize, long slideBy) { Preconditions.checkPositive(windowSize, "windowSize must be >= 1"); Preconditions.checkPositive(slideBy, "slideBy must be >= 1"); Preconditions.checkTrue(windowSize % slideBy == 0, "windowSize must be an integer multiple of slideBy"); return new SlidingWindowPolicy(slideBy, 0, windowSize / slideBy); }