@Override public void publishAlerts(final StageSpec stageSpec, StageStatisticsCollector collector) throws Exception { Compat.foreachRDD((JavaDStream<Alert>) stream, new StreamingAlertPublishFunction(sec, stageSpec)); }
@Override public void run() { // cache since the streaming sink function will check if the rdd is empty, which can cause recomputation // and confusing metrics if its not cached. Compat.foreachRDD(stream.cache(), new StreamingBatchSinkFunction<>(sinkFunction, sec, stageSpec)); } };
@Override public void run() { Compat.foreachRDD(stream.cache(), new StreamingSparkSinkFunction<T>(sec, stageSpec)); } };
if (dataTracer.isEnabled()) { javaDStream = javaDStream.transform(new LimitingFunction<>(spec.getNumOfRecordsPreview())); .transform(new CountingTransformFunction<>(stageSpec.getName(), sec.getMetrics(), "records.out", dataTracer)) .map(new WrapOutputTransformFunction<>(stageSpec.getName())); return new DStreamCollection<>(sec, outputDStream);
@Override public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions, StageStatisticsCollector collector) { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, T> keyedCollection = stream.transformToPair(new DynamicAggregatorGroupBy<Object, T>(dynamicDriverContext)); JavaPairDStream<Object, Iterable<T>> groupedCollection = partitions == null ? keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions); return wrap(groupedCollection.transform(new DynamicAggregatorAggregate<Object, T, Object>(dynamicDriverContext))); }
@Override public <U> SparkCollection<U> compute(final StageSpec stageSpec, SparkCompute<T, U> compute) throws Exception { final SparkCompute<T, U> wrappedCompute = new DynamicSparkCompute<>(new DynamicDriverContext(stageSpec, sec, new NoopStageStatisticsCollector()), compute); Transactionals.execute(sec, new TxRunnable() { @Override public void run(DatasetContext datasetContext) throws Exception { PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec); SparkExecutionPluginContext sparkPluginContext = new BasicSparkExecutionPluginContext(sec, JavaSparkContext.fromSparkContext(stream.context().sparkContext()), datasetContext, pipelineRuntime, stageSpec); wrappedCompute.initialize(sparkPluginContext); } }, Exception.class); return wrap(stream.transform(new ComputeTransformFunction<>(sec, stageSpec, wrappedCompute))); }
@Override public SparkCollection<T> window(StageSpec stageSpec, Windower windower) { String stageName = stageSpec.getName(); return wrap(stream.transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.in", null)) .window(Durations.seconds(windower.getWidth()), Durations.seconds(windower.getSlideInterval())) .transform(new CountingTransformFunction<T>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)))); }
@Override public SparkCollection<RecordInfo<Object>> multiOutputTransform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), true))); }
@Override public JavaRDD<U> transform(SparkExecutionPluginContext context, JavaRDD<T> input) throws Exception { lazyInit(JavaSparkContext.fromSparkContext(input.context())); return delegate.transform(context, input); }
@Override protected SparkPairCollection<Object, Object> addJoinKey(StageSpec stageSpec, String inputStageName, SparkCollection<Object> inputCollection, StageStatisticsCollector collector) throws Exception { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaDStream<Object> dStream = inputCollection.getUnderlying(); JavaPairDStream<Object, Object> result = dStream.transformToPair(new DynamicJoinOn<>(dynamicDriverContext, inputStageName)); return new PairDStreamCollection<>(sec, result); }
@Override protected SparkCollection<Object> mergeJoinResults( StageSpec stageSpec, SparkPairCollection<Object, List<JoinElement<Object>>> joinedInputs, StageStatisticsCollector collector) throws Exception { DynamicDriverContext dynamicDriverContext = new DynamicDriverContext(stageSpec, sec, collector); JavaPairDStream<Object, List<JoinElement<Object>>> pairDStream = joinedInputs.getUnderlying(); JavaDStream<Object> result = pairDStream.transform(new DynamicJoinMerge<>(dynamicDriverContext)); return new DStreamCollection<>(sec, result); } }
@Override public SparkCollection<RecordInfo<Object>> transform(StageSpec stageSpec, StageStatisticsCollector collector) { return wrap(stream.transform(new DynamicTransform<T>(new DynamicDriverContext(stageSpec, sec, collector), false))); }