public void await() throws InterruptedException { Preconditions.checkState(streamingContext != null); log.info("Spark Streaming is running"); streamingContext.awaitTermination(); }
public void await() throws InterruptedException { JavaStreamingContext theStreamingContext; synchronized (this) { theStreamingContext = streamingContext; Preconditions.checkState(theStreamingContext != null); } log.info("Spark Streaming is running"); theStreamingContext.awaitTermination(); // Can't do this with lock }
public static void main(String[] args) throws Exception { String zkQuorum = args[0]; String group = args[1]; SparkConf conf = new SparkConf().setAppName("KafkaInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000)); Map<String, Integer> topics = new HashMap<String, Integer>(); topics.put("pandas", 1); JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics); input.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
public static void main(String[] args) throws Exception { String master = args[0]; JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() { public Boolean call(String line) { return line.contains("error"); }}); // Print out the lines with errors, which causes this DStream to be evaluated errorLines.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
jssc.awaitTermination(); // Wait for the computation to terminate
public void await() throws InterruptedException { JavaStreamingContext theStreamingContext; synchronized (this) { theStreamingContext = streamingContext; Preconditions.checkState(theStreamingContext != null); } log.info("Spark Streaming is running"); theStreamingContext.awaitTermination(); // Can't do this with lock }
public void await() throws InterruptedException { Preconditions.checkState(streamingContext != null); log.info("Spark Streaming is running"); streamingContext.awaitTermination(); }
/** Use this to block on {@link #close()} */ public void awaitTermination() { if (started.get()) jsc().awaitTermination(); }
SparkConf conf = new SparkConf().setAppName("log jamming").setMaster("local[2]"); JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(1)); JavaReceiverInputDStream<byte[]> bytes = jsc.rawSocketStream("localhost", 9999); // Have fun with the RDD jsc.start(); jsc.awaitTermination();
public void run() { try { streamingContext.start(); if (deploymentConfig.isRunLocal()) { streamingContext.awaitTermination(); } } catch (Exception e) { e.printStackTrace(); } }
/** * Print the status text of the some of the tweets */ public void tweetPrint() { JavaDStream<Status> tweetsStream = loadData(); // Here print the status text // TODO write code here // Hint: use the print method JavaDStream<String> statusText = null; // Start the context jssc.start(); jssc.awaitTermination(); }
/** * Find the 10 most popular Hashtag in the last minute */ public String top10Hashtag() { JavaDStream<Status> tweetsStream = loadData(); // First, find all hashtags // TODO write code here JavaDStream<String> hashtags = null; // Make a "wordcount" on hashtag // Hint: define a 1 second window (millisecond) for the reduce step. // TODO write code here JavaPairDStream<Integer, String> hashtagMention = null; // Then sort the hashtags // Hint: look at the transformToPair method // TODO write code here JavaPairDStream<Integer, String> sortedHashtag = null; // and return the 10 most populars // Hint: loop on the RDD and take the 10 most popular // TODO write code here List<Tuple2<Integer, String>> mostPopulars = new ArrayList<>(); // we need to tell the context to start running the computation we have setup // it won't work if you don't add this! jssc.start(); jssc.awaitTermination(); return "Most popular hashtag :" + mostPopulars; }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("kafka-sandbox") .setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "localhost:9092"); JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics); directKafkaStream.foreachRDD(rdd -> { System.out.println("--- New RDD with " + rdd.partitions().size() + " partitions and " + rdd.count() + " records"); rdd.foreach(record -> System.out.println(record._2)); }); ssc.start(); ssc.awaitTermination(); }
streamingContext.awaitTermination();
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: JavaFlumeEventCount <host> <port>"); System.exit(1); } String host = args[0]; int port = Integer.parseInt(args[1]); Duration batchInterval = new Duration(2000); SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval); JavaReceiverInputDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, host, port); flumeStream.count(); flumeStream.count().map(in -> "Received " + in + " flume events.").print(); ssc.start(); ssc.awaitTermination(); } }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("kafka-sandbox") .setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "localhost:9092"); JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc, String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics); directKafkaStream .map(message -> recordInjection.invert(message._2).get()) .foreachRDD(rdd -> { rdd.foreach(record -> { System.out.println("str1= " + record.get("str1") + ", str2= " + record.get("str2") + ", int1=" + record.get("int1")); }); }); ssc.start(); ssc.awaitTermination(); }
context.awaitTermination();
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "NetworkWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
public static void main(String[] args) throws DataIngestException { CmdLineParser cmdLineParser = new CmdLineParser(); final AppArgs appArgs = cmdLineParser.validateArgs(args); System.setProperty("HADOOP_USER_NAME", appArgs.getProperty(DiPConfiguration.HADOOP_USER_NAME)); SparkConf conf = new SparkConf().setAppName("SparkTwitterStreaming") .setMaster("local[*]"); try (JavaStreamingContext jsc = new JavaStreamingContext(new JavaSparkContext(conf), new Duration(1000))) { JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(jsc, appArgs.getProperty(DiPConfiguration.ZK_HOST)+":"+appArgs.getProperty(DiPConfiguration.ZK_PORT), "spark-stream", getKafkaTopics(appArgs)); JavaDStream<Object[]> twitterStreams = stream.map(tuple -> FlatJsonConverter.convertToValuesArray(tuple._2)) .cache(); SparkHdfsWriter.write(twitterStreams, appArgs); new SparkHBaseWriter(jsc.sparkContext(), appArgs).write(twitterStreams); SparkJdbcSourceWriter jdbcSourceWriter = new SparkJdbcSourceWriter(new SQLContext(jsc.sparkContext()), appArgs); new TopNLocationByTweets(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); new TopNUsersWithMaxFollowers(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); jsc.start(); jsc.awaitTermination(); } }