/** * Attaches a stage that performs the given group-and-aggregate operation. * It emits one key-value pair (in a {@link TimestampedEntry}) for each * distinct key it observes in its input belonging to a given window. The * value is the result of the aggregate operation across all the items with * the given grouping key. * * @see com.hazelcast.jet.aggregate.AggregateOperations AggregateOperations * @param aggrOp the aggregate operation to perform * @param <R> type of the aggregation result */ @Nonnull default <R> StreamStage<TimestampedEntry<K, R>> aggregate(@Nonnull AggregateOperation1<? super T, ?, R> aggrOp) { return aggregate(aggrOp, TimestampedEntry::fromWindowResult); }
@Nonnull AggregateOperation3<? super T, ? super T1, ? super T2, ?, ? extends R> aggrOp ) { return aggregate3(stage1, stage2, aggrOp, TimestampedEntry::fromWindowResult);
/** * Attaches a stage that performs the given cogroup-and-aggregate operation * over the items from both this stage and {@code stage1} you supply. It * emits one key-value pair (in a {@link TimestampedEntry}) for each * distinct key it observes in the input belonging to a given window. The * value is the result of the aggregate operation across all the items with * the given grouping key. * <p> * This variant requires you to provide a two-input aggregate operation * (refer to its {@linkplain AggregateOperation2 Javadoc} for a simple * example). If you can express your logic in terms of two single-input * aggregate operations, one for each input stream, then you should use * {@link #aggregate2(AggregateOperation1, StreamStageWithKey, AggregateOperation1) * stage0.aggregate2(aggrOp0, stage1, aggrOp1)} because it offers a simpler * API and you can use the already defined single-input operations. Use * this variant only when you have the need to implement an aggregate * operation that combines the input streams into the same accumulator. * * @see com.hazelcast.jet.aggregate.AggregateOperations AggregateOperations * @param aggrOp the aggregate operation to perform * @param <T1> type of items in {@code stage1} * @param <R> type of the aggregation result */ @Nonnull default <T1, R> StreamStage<TimestampedEntry<K, R>> aggregate2( @Nonnull StreamStageWithKey<T1, ? extends K> stage1, @Nonnull AggregateOperation2<? super T, ? super T1, ?, ? extends R> aggrOp ) { return aggregate2(stage1, aggrOp, TimestampedEntry::fromWindowResult); }
@SuppressWarnings("unchecked") public GrAggBuilder(StageWithKeyAndWindow<?, K> stage) { ComputeStageImplBase computeStage = ((StageWithGroupingBase) stage).computeStage; ensureJetEvents(computeStage, "This pipeline stage"); pipelineImpl = (PipelineImpl) computeStage.getPipeline(); wDef = stage.windowDefinition(); upstreamStages.add(computeStage); keyFns.add(stage.keyFn()); }
/** * Attaches a stage that passes through just the items that are distinct * within their window according to the grouping key (no two items emitted * for a window map to the same key). There is no guarantee which one of * the items with the same key will pass through. The stage emits results * in the form of {@link TimestampedItem TimestampedItem(windowEnd, * distinctItem)}. * * @return the newly attached stage */ @Nonnull default StreamStage<TimestampedItem<T>> distinct() { return distinct(TimestampedItem::fromWindowResult); }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupWithBuilder() { Pipeline p = Pipeline.create(); StreamStageWithKey<PageVisit, Integer> pageVisits = p .drawFrom(Sources.<PageVisit, Integer, PageVisit>mapJournal(PAGE_VISIT, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(pv -> pv.timestamp(), 100) .groupingKey(pv -> pv.userId()); StreamStageWithKey<AddToCart, Integer> addToCarts = p .drawFrom(Sources.<AddToCart, Integer, AddToCart>mapJournal(ADD_TO_CART, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(atc -> atc.timestamp(), 100) .groupingKey(atc -> atc.userId()); StreamStageWithKey<Payment, Integer> payments = p .drawFrom(Sources.<Payment, Integer, Payment>mapJournal(PAYMENT, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(pm -> pm.timestamp(), 100) .groupingKey(pm -> pm.userId()); StageWithKeyAndWindow<PageVisit, Integer> windowStage = pageVisits.window(sliding(10, 1)); WindowGroupAggregateBuilder<Integer, List<PageVisit>> builder = windowStage.aggregateBuilder(toList()); Tag<List<PageVisit>> pageVisitTag = builder.tag0(); Tag<List<AddToCart>> addToCartTag = builder.add(addToCarts, toList()); Tag<List<Payment>> paymentTag = builder.add(payments, toList()); StreamStage<TimestampedEntry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = builder.build((winStart, winEnd, key, r) -> new TimestampedEntry<>( winEnd, key, tuple3(r.get(pageVisitTag), r.get(addToCartTag), r.get(paymentTag)))); coGrouped.drainTo(Sinks.logger()); return p; }
/** * Attaches a stage that passes through just the items that are distinct * within their window (no two items emitted for a window are equal). There * is no guarantee among the items with the same key which one it will pass * through. To create the item to emit, the stage calls the supplied {@code * mapToOutputFn}. * * @param mapToOutputFn function that returns the items to emit * @return the newly attached stage */ @Nonnull default <R> StreamStage<R> distinct(@Nonnull WindowResultFunction<? super T, ? extends R> mapToOutputFn) { return groupingKey(wholeItem()).distinct(mapToOutputFn); }
/** * Attaches a stage that passes through just the items that are distinct * within their window according to the grouping key (no two items emitted * for a window map to the same key). There is no guarantee among the items * with the same key which one it will pass through. To create the item to * emit, the stage calls the supplied {@code mapToOutputFn}. * * @param mapToOutputFn function that returns the items to emit * @return the newly attached stage */ @Nonnull default <R> StreamStage<R> distinct(@Nonnull WindowResultFunction<? super T, ? extends R> mapToOutputFn) { return aggregate(pickAny(), mapToOutputFn.toKeyedWindowResultFn()); }
KeyedWindowResultFunction<K, Tuple3<R0, R1, R2>, TimestampedEntry<K, Tuple3<R0, R1, R2>>> outputFn = TimestampedEntry::fromWindowResult; return aggregate3(stage1, stage2, aggregateOperation3(aggrOp0, aggrOp1, aggrOp2, Tuple3::tuple3), outputFn);
/** * Attaches a stage that passes through just the items that are distinct * within their window (no two items emitted for a window are equal). There * is no guarantee which one of the items with the same key will pass * through. The stage emits results in the form of {@link TimestampedItem * TimestampedItem(windowEnd, distinctItem)}. * * @return the newly attached stage */ @Nonnull default StreamStage<TimestampedItem<T>> distinct() { return groupingKey(wholeItem()).distinct(); }
/** * Attaches a stage that performs the given cogroup-and-aggregate operation * over the items from both this stage and {@code stage1} you supply. For * each distinct grouping key it observes in the input belonging to a given * window, it performs the supplied aggregate operation across all the * items sharing that key. It performs the aggregation separately for each * input stage: {@code aggrOp0} on this stage and {@code aggrOp1} on {@code * stage1}. Once it has received all the items belonging to a window, it * emits for each distinct key a {@code TimestampedEntry(key, Tuple2(result0, * result1))}. * * @see com.hazelcast.jet.aggregate.AggregateOperations AggregateOperations * @param aggrOp0 aggregate operation to perform on this stage * @param stage1 the other stage * @param aggrOp1 aggregate operation to perform on the other stage * @param <T1> type of the items in the other stage * @param <R0> type of the aggregated result for this stage * @param <R1> type of the aggregated result for the other stage */ @Nonnull default <T1, R0, R1> StreamStage<TimestampedEntry<K, Tuple2<R0, R1>>> aggregate2( @Nonnull AggregateOperation1<? super T, ?, ? extends R0> aggrOp0, @Nonnull StreamStageWithKey<T1, ? extends K> stage1, @Nonnull AggregateOperation1<? super T1, ?, ? extends R1> aggrOp1 ) { KeyedWindowResultFunction<K, Tuple2<R0, R1>, TimestampedEntry<K, Tuple2<R0, R1>>> outputFn = TimestampedEntry::fromWindowResult; return aggregate2(stage1, aggregateOperation2(aggrOp0, aggrOp1, Tuple2::tuple2), outputFn); }
.groupingKey(CarCount::getLocation) .window(sliding(MINUTES.toMillis(120), MINUTES.toMillis(15))) .aggregate(linearTrend(CarCount::getTime, CarCount::getCount)) .map((TimestampedEntry<String, Double> e) -> entry(new TrendKey(e.getKey(), e.getTimestamp()), e.getValue()))
AggregateOperation3<T, T1, T2, ?, Tuple3<R0, R1, R2>> aggrOp = aggregateOperation3(aggrOp0, aggrOp1, aggrOp2, Tuple3::tuple3); return aggregate3(stage1, stage2, aggrOp, (start, end, key, t3) -> mapToOutputFn.apply(start, end, key, t3.f0(), t3.f1(), t3.f2()));
) { AggregateOperation2<T, T1, ?, Tuple2<R0, R1>> aggrOp = aggregateOperation2(aggrOp0, aggrOp1, Tuple2::tuple2); return aggregate2(stage1, aggrOp, (start, end, key, t2) -> mapToOutputFn.apply(start, end, key, t2.f0(), t2.f1()));
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Trade, Integer, Trade>mapJournal(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, START_FROM_CURRENT)) .addTimestamps(Trade::getTime, 3000) .groupingKey(Trade::getTicker) .window(WindowDefinition.sliding(SLIDING_WINDOW_LENGTH_MILLIS, SLIDE_STEP_MILLIS)) .aggregate(counting(), (winStart, winEnd, key, result) -> String.format("%s %5s %4d", toLocalTime(winEnd), key, result)) .drainTo(Sinks.logger()); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroup() { Pipeline p = Pipeline.create(); StreamStageWithKey<PageVisit, Integer> pageVisits = p .drawFrom(Sources.<PageVisit, Integer, PageVisit>mapJournal(PAGE_VISIT, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(pv -> pv.timestamp(), 100) .groupingKey(pv -> pv.userId()); StreamStageWithKey<Payment, Integer> payments = p .drawFrom(Sources.<Payment, Integer, Payment>mapJournal(PAYMENT, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(pm -> pm.timestamp(), 100) .groupingKey(pm -> pm.userId()); StreamStageWithKey<AddToCart, Integer> addToCarts = p .drawFrom(Sources.<AddToCart, Integer, AddToCart>mapJournal(ADD_TO_CART, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(atc -> atc.timestamp(), 100) .groupingKey(atc -> atc.userId()); StageWithKeyAndWindow<PageVisit, Integer> windowStage = pageVisits.window(sliding(10, 1)); StreamStage<TimestampedEntry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = windowStage.aggregate3(toList(), addToCarts, toList(), payments, toList()); coGrouped.drainTo(Sinks.logger()); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline groupAndAggregate() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<PageVisit, Integer, PageVisit>mapJournal(PAGE_VISIT, mapPutEvents(), mapEventNewValue(), START_FROM_OLDEST)) .addTimestamps(pv -> pv.timestamp(), 100) .window(sliding(10, 1)) .groupingKey(pv -> pv.userId()) .aggregate(toList()) .drainTo(Sinks.logger()); return p; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<PriceUpdateEvent, String, Tuple2<Integer, Long>>mapJournal( "prices", mapPutEvents(), e -> new PriceUpdateEvent(e.getKey(), e.getNewValue().f0(), e.getNewValue().f1()), START_FROM_CURRENT )) .addTimestamps(PriceUpdateEvent::timestamp, LAG_SECONDS * 1000) .setLocalParallelism(1) .groupingKey(PriceUpdateEvent::ticker) .window(WindowDefinition.sliding(WINDOW_SIZE_SECONDS * 1000, 1000)) .aggregate(AggregateOperations.counting()) .drainTo(Sinks.logger()); return p; }
private static Pipeline buildPipeline() { // we'll calculate two aggregations over the same input data: // 1. number of viewed product listings // 2. set of purchased product IDs // Output of the aggregation will be List{Integer, Set<String>} AggregateOperation1<ProductEvent, ?, Tuple2<Long, Set<String>>> aggrOp = allOf( summingLong(e -> e.getProductEventType() == VIEW_LISTING ? 1 : 0), mapping(e -> e.getProductEventType() == PURCHASE ? e.getProductId() : null, toSet()) ); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<ProductEvent>streamFromProcessor("generator", ProcessorMetaSupplier.of(GenerateEventsP::new, 1))) .addTimestamps(ProductEvent::getTimestamp, 0) .groupingKey(ProductEvent::getUserId) .window(WindowDefinition.session(SESSION_TIMEOUT)) .aggregate(aggrOp, SessionWindow::sessionToString) .drainTo(Sinks.logger()); return p; }