BatchUpdateFunction(Config config, Class<K> keyClass, Class<M> messageClass, Class<? extends Writable> keyWritableClass, Class<? extends Writable> messageWritableClass, String dataDirString, String modelDirString, BatchLayerUpdate<K,M,U> updateInstance, JavaStreamingContext streamingContext) { this.keyClass = keyClass; this.messageClass = messageClass; this.keyWritableClass = keyWritableClass; this.messageWritableClass = messageWritableClass; this.dataDirString = dataDirString; this.modelDirString = modelDirString; this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker"); this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic"); this.updateInstance = updateInstance; this.sparkContext = streamingContext.sparkContext(); }
Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration(); new Thread(LoggingCallable.log(() -> { try {
JavaSparkContext sparkContext = streamingContext.sparkContext(); Configuration hadoopConf = sparkContext.hadoopConfiguration();
StreamingMode( final Future<?> pipelineExecution, final JavaStreamingContext javaStreamingContext) { super(pipelineExecution, javaStreamingContext.sparkContext()); this.javaStreamingContext = javaStreamingContext; }
List<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>> list = Arrays.asList(); // Random batch time, for the sake of the example. JavaStreamingContext jssc = new JavaStreamingContext("local[*]", "TestApp", new Duration(4000)); Queue<JavaRDD<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>>> rddQueue = new LinkedList<>(); rddQueue.add(jssc.sparkContext().parallelize(list)); JavaDStream<Tuple2<Tuple3<String, Long, Object>, Tuple2<Long, Object[]>>> dStream = jssc.queueStream(rddQueue);
// Create the context JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create the queue through which RDDs can be pushed to // a QueueInputDStream Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>(); // Create and push some RDDs into the queue List<Integer> list = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { list.add(i); } for (int i = 0; i < 30; i++) { rddQueue.add(ssc.sparkContext().parallelize(list)); } // Create the QueueInputDStream and use it do some processing JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue); JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair( new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer i) { return new Tuple2<Integer, Integer>(i % 10, 1); } });
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) { Path rootCheckpointPath = checkpointDir.getRootCheckpointDir(); Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir(); Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir(); try { FileSystem fileSystem = rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration()); if (!fileSystem.exists(rootCheckpointPath)) { fileSystem.mkdirs(rootCheckpointPath); } if (!fileSystem.exists(sparkCheckpointPath)) { fileSystem.mkdirs(sparkCheckpointPath); } if (!fileSystem.exists(beamCheckpointPath)) { fileSystem.mkdirs(beamCheckpointPath); } } catch (IOException e) { throw new RuntimeException("Failed to create checkpoint dir", e); } jssc.checkpoint(sparkCheckpointPath.toString()); } }
BatchUpdateFunction(Config config, Class<K> keyClass, Class<M> messageClass, Class<? extends Writable> keyWritableClass, Class<? extends Writable> messageWritableClass, String dataDirString, String modelDirString, BatchLayerUpdate<K,M,U> updateInstance, JavaStreamingContext streamingContext) { this.keyClass = keyClass; this.messageClass = messageClass; this.keyWritableClass = keyWritableClass; this.messageWritableClass = messageWritableClass; this.dataDirString = dataDirString; this.modelDirString = modelDirString; this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker"); this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic"); this.updateInstance = updateInstance; this.sparkContext = streamingContext.sparkContext(); }
@Test public void testInitialization() { Assert.assertNotNull(ssc.sparkContext()); }
@Test public void testInitialization() { Assert.assertNotNull(ssc.sparkContext()); }
Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration(); new Thread(LoggingCallable.log(() -> { try {
final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName); final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
JavaSparkContext sparkContext = streamingContext.sparkContext(); Configuration hadoopConf = sparkContext.hadoopConfiguration();
Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
new Tuple2<>("new york", 2)); JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial); JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD);
new Tuple2<>("new york", 2)); JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial); JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD);
public static void main(String[] args) throws DataIngestException { CmdLineParser cmdLineParser = new CmdLineParser(); final AppArgs appArgs = cmdLineParser.validateArgs(args); System.setProperty("HADOOP_USER_NAME", appArgs.getProperty(DiPConfiguration.HADOOP_USER_NAME)); SparkConf conf = new SparkConf().setAppName("SparkTwitterStreaming") .setMaster("local[*]"); try (JavaStreamingContext jsc = new JavaStreamingContext(new JavaSparkContext(conf), new Duration(1000))) { JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(jsc, appArgs.getProperty(DiPConfiguration.ZK_HOST)+":"+appArgs.getProperty(DiPConfiguration.ZK_PORT), "spark-stream", getKafkaTopics(appArgs)); JavaDStream<Object[]> twitterStreams = stream.map(tuple -> FlatJsonConverter.convertToValuesArray(tuple._2)) .cache(); SparkHdfsWriter.write(twitterStreams, appArgs); new SparkHBaseWriter(jsc.sparkContext(), appArgs).write(twitterStreams); SparkJdbcSourceWriter jdbcSourceWriter = new SparkJdbcSourceWriter(new SQLContext(jsc.sparkContext()), appArgs); new TopNLocationByTweets(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); new TopNUsersWithMaxFollowers(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); jsc.start(); jsc.awaitTermination(); } }
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }
initAccumulators(mOptions, jssc.sparkContext());