private <U> SparkCollection<U> wrap(JavaDStream<U> stream) { return new DStreamCollection<>(sec, stream); } }
@Override public <U> SparkCollection<U> flatMap(StageSpec stageSpec, FlatMapFunction<T, U> function) { return wrap(stream.flatMap(function)); }
@Override public <T> SparkCollection<T> flatMap(FlatMapFunction<Tuple2<K, V>, T> function) { return new DStreamCollection<>(sec, pairStream.flatMap(function)); }
@Override public SparkCollection<T> cache() { return wrap(stream.cache()); }
@Override protected SparkCollection<Object> mergeJoinResults( StageSpec stageSpec, SparkPairCollection<Object, List<JoinElement<Object>>> joinedInputs, StageStatisticsCollector collector) throws Exception { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, List<JoinElement<Object>>> pairDStream = joinedInputs.getUnderlying(); JavaDStream<Object> result = pairDStream.transform(new DynamicJoinMerge<>(dynamicDriverContext)); return new DStreamCollection<>(sec, result); } }
@SuppressWarnings("unchecked") @Override public SparkCollection<T> union(SparkCollection<T> other) { return wrap(stream.union((JavaDStream<T>) other.getUnderlying())); }
.transform(new CountingTransformFunction<>(stageSpec.getName(), sec.getMetrics(), "records.out", dataTracer)) .map(new WrapOutputTransformFunction<>(stageSpec.getName())); return new DStreamCollection<>(sec, outputDStream);
@Override public SparkCollection<RecordInfo<Object>> multiOutputTransform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), true))); }
@Override public SparkCollection<RecordInfo<Object>> transform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), false))); }
@Override public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions, StageStatisticsCollector collector) { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, T> keyedCollection = stream.transformToPair(new DynamicAggregatorGroupBy<Object, T>(dynamicDriverContext)); JavaPairDStream<Object, Iterable<T>> groupedCollection = partitions == null ? keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions); return wrap(groupedCollection.transform(new DynamicAggregatorAggregate<Object, T, Object>(dynamicDriverContext))); }
@Override public <U> SparkCollection<U> compute(final StageSpec stageSpec, SparkCompute<T, U> compute) throws Exception { final SparkCompute<T, U> wrappedCompute = new DynamicSparkCompute<>(new DynamicDriverContext(stageSpec, sec, new NoopStageStatisticsCollector()), compute); Transactionals.execute(sec, new TxRunnable() { @Override public void run(DatasetContext datasetContext) throws Exception { PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec); SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, pipelineRuntime, stageSpec); wrappedCompute.initialize(sparkPluginContext); } }, Exception.class); return wrap(stream.transform(new ComputeTransformFunction<>(sec, stageSpec, wrappedCompute))); }
@Override public SparkCollection<T> window(StageSpec stageSpec, Windower windower) { String stageName = stageSpec.getName(); return wrap(stream.transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.in", null)) .window(Durations.seconds(windower.getWidth()), Durations.seconds(windower.getSlideInterval())) .transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)))); }