private Duration boundReadDuration(double readTimePercentage, long minReadTimeMillis) { long batchDurationMillis = ssc().graph().batchDuration().milliseconds(); Duration proportionalDuration = new Duration(Math.round(batchDurationMillis * readTimePercentage)); Duration lowerBoundDuration = new Duration(minReadTimeMillis); Duration readDuration = proportionalDuration.isLongerThan(lowerBoundDuration) ? proportionalDuration : lowerBoundDuration; LOG.info("Read duration set to: " + readDuration); return readDuration; }
final long batchDuration = jsc.ssc().graph().batchDuration().milliseconds(); ReceiverStreamListener listener = new ReceiverStreamListener(kafkaConfig, batchDuration);
private scala.Option<Long> rateControlledMaxRecords() { final scala.Option<RateController> rateControllerOption = rateController(); final scala.Option<Long> rateLimitPerBatch; final long rateLimitPerSec; if (rateControllerOption.isDefined() && ((rateLimitPerSec = rateControllerOption.get().getLatestRate()) > 0)) { final long batchDurationSec = ssc().graph().batchDuration().milliseconds() / 1000; rateLimitPerBatch = scala.Option.apply(rateLimitPerSec * batchDurationSec); } else { rateLimitPerBatch = scala.Option.empty(); } return rateLimitPerBatch; }
@Override public JavaStreamingContext call() throws Exception { LOG.info("Creating a new Spark Streaming Context"); // validate unbounded read properties. checkArgument( options.getMinReadTimeMillis() < options.getBatchIntervalMillis(), "Minimum read time has to be less than batch time."); checkArgument( options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1, "Read time percentage is bound to (0, 1)."); SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()); Duration batchDuration = new Duration(options.getBatchIntervalMillis()); LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds()); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration); // We must first init accumulators since translators expect them to be instantiated. SparkRunner.initAccumulators(options, jsc); //do not need to create a MetricsPusher instance here because if is called in SparkRunner.run() EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc); // update cache candidates SparkRunner.updateCacheCandidates(pipeline, translator, ctxt); pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt)); ctxt.computeOutputs(); checkpoint(jssc, checkpointDir); return jssc; }