private RDD<WindowedValue<T>> generateRdd() { return rdds.size() > 0 ? rdds.poll().rdd() : ssc().sparkContext().emptyRDD(JavaSparkContext$.MODULE$.<WindowedValue<T>>fakeClassTag()); }
SourceDStream( StreamingContext ssc, UnboundedSource<T, CheckpointMarkT> unboundedSource, SerializablePipelineOptions options, Long boundMaxRecords) { super(ssc, JavaSparkContext$.MODULE$.fakeClassTag()); this.unboundedSource = unboundedSource; this.options = options; SparkPipelineOptions sparkOptions = options.get().as(SparkPipelineOptions.class); // Reader cache expiration interval. 50% of batch interval is added to accommodate latency. this.readerCacheInterval = 1.5 * sparkOptions.getBatchIntervalMillis(); this.boundReadDuration = boundReadDuration( sparkOptions.getReadTimePercentage(), sparkOptions.getMinReadTimeMillis()); // set initial parallelism once. this.initialParallelism = ssc().sparkContext().defaultParallelism(); checkArgument(this.initialParallelism > 0, "Number of partitions must be greater than zero."); this.boundMaxRecords = boundMaxRecords; try { this.numPartitions = createMicrobatchSource().split(sparkOptions).size(); } catch (Exception e) { throw new RuntimeException(e); } }
JavaSparkContext$.MODULE$.fakeClassTag(), JavaSparkContext$.MODULE$.fakeClassTag()) .filter(
JavaSparkContext$.MODULE$.fakeClassTag(), JavaSparkContext$.MODULE$.fakeClassTag(), null);
JavaPairInputDStream$.MODULE$.fromInputDStream( sourceDStream, JavaSparkContext$.MODULE$.fakeClassTag(), JavaSparkContext$.MODULE$.fakeClassTag());
pairDStream.defaultPartitioner(pairDStream.defaultPartitioner$default$1()), true, JavaSparkContext$.MODULE$.fakeClassTag());
public Bounded( SparkContext sc, BoundedSource<T> source, SerializablePipelineOptions options, String stepName) { super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag()); this.source = source; this.options = options; // the input parallelism is determined by Spark's scheduler backend. // when running on YARN/SparkDeploy it's the result of max(totalCores, 2). // when running on Mesos it's 8. // when running local it's the total number of cores (local = 1, local[N] = N, // local[*] = estimation of the machine's cores). // ** the configuration "spark.default.parallelism" takes precedence over all of the above ** this.numPartitions = sc.defaultParallelism(); checkArgument(this.numPartitions > 0, "Number of partitions must be greater than zero."); this.bundleSize = options.get().as(SparkPipelineOptions.class).getBundleSize(); this.stepName = stepName; this.metricsAccum = MetricsAccumulator.getInstance(); }
ReadReportDStream( DStream<Metadata> parent, int inputDStreamId, String sourceName, String stepName) { super(parent.ssc(), JavaSparkContext$.MODULE$.fakeClassTag()); this.parent = parent; this.inputDStreamId = inputDStreamId; this.sourceName = sourceName; this.stepName = stepName; }
public WatermarkSyncedDStream( final Queue<JavaRDD<WindowedValue<T>>> rdds, final Long batchDuration, final StreamingContext ssc) { super(ssc, JavaSparkContext$.MODULE$.fakeClassTag()); this.rdds = rdds; this.batchDuration = batchDuration; }
public Unbounded( SparkContext sc, SerializablePipelineOptions options, MicrobatchSource<T, CheckpointMarkT> microbatchSource, int initialNumPartitions) { super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag()); this.options = options; this.microbatchSource = microbatchSource; this.partitioner = new HashPartitioner(initialNumPartitions); }