@Override public SparkCollection<T> cache() { return wrap(stream.cache()); }
@Override public <U> SparkCollection<U> flatMap(StageSpec stageSpec, FlatMapFunction<T, U> function) { return wrap(stream.flatMap(function)); }
@SuppressWarnings("unchecked") @Override public SparkCollection<T> union(SparkCollection<T> other) { return wrap(stream.union((JavaDStream<T>) other.getUnderlying())); }
@Override public SparkCollection<RecordInfo<Object>> multiOutputTransform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), true))); }
@Override public SparkCollection<RecordInfo<Object>> transform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), false))); }
@Override public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions, StageStatisticsCollector collector) { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, T> keyedCollection = stream.transformToPair(new DynamicAggregatorGroupBy<Object, T>(dynamicDriverContext)); JavaPairDStream<Object, Iterable<T>> groupedCollection = partitions == null ? keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions); return wrap(groupedCollection.transform(new DynamicAggregatorAggregate<Object, T, Object>(dynamicDriverContext))); }
@Override public <U> SparkCollection<U> compute(final StageSpec stageSpec, SparkCompute<T, U> compute) throws Exception { final SparkCompute<T, U> wrappedCompute = new DynamicSparkCompute<>(new DynamicDriverContext(stageSpec, sec, new NoopStageStatisticsCollector()), compute); Transactionals.execute(sec, new TxRunnable() { @Override public void run(DatasetContext datasetContext) throws Exception { PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec); SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, pipelineRuntime, stageSpec); wrappedCompute.initialize(sparkPluginContext); } }, Exception.class); return wrap(stream.transform(new ComputeTransformFunction<>(sec, stageSpec, wrappedCompute))); }
@Override public SparkCollection<T> window(StageSpec stageSpec, Windower windower) { String stageName = stageSpec.getName(); return wrap(stream.transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.in", null)) .window(Durations.seconds(windower.getWidth()), Durations.seconds(windower.getSlideInterval())) .transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)))); }