public static <T> JavaPairDStream<Integer, Iterable<Long>> getPartitionOffset( JavaDStream<MessageAndMetadata<T>> unionStreams, Properties props) { JavaPairDStream<Integer, Long> partitonOffsetStream = unionStreams.mapPartitionsToPair(new PartitionOffsetPair<>()); JavaPairDStream<Integer, Iterable<Long>> partitonOffset = partitonOffsetStream.groupByKey(1); return partitonOffset; }
@Override public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions, StageStatisticsCollector collector) { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, T> keyedCollection = stream.transformToPair(new DynamicAggregatorGroupBy<Object, T>(dynamicDriverContext)); JavaPairDStream<Object, Iterable<T>> groupedCollection = partitions == null ? keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions); return wrap(groupedCollection.transform(new DynamicAggregatorAggregate<Object, T, Object>(dynamicDriverContext))); }
static void streamSpansToStorage( JavaDStream<byte[]> stream, ReadSpans readSpans, AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId ) { JavaDStream<Span> spans = stream.flatMap(readSpans); // TODO: plug in some filter to drop spans regardless of trace ID // spans = spans.filter(spanFilter); JavaPairDStream<String, Iterable<Span>> tracesById = spans .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s)) .groupByKey(); tracesById.foreachRDD(rdd -> { rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId); }); }
JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Iterable<String>> grouped = pairStream.groupByKey(); JavaTestUtils.attachTestOutputStream(grouped); List<List<Tuple2<String, Iterable<String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Iterable<String>> grouped = pairStream.groupByKey(); JavaTestUtils.attachTestOutputStream(grouped); List<List<Tuple2<String, Iterable<String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2);