public static void main(String[] args) throws Exception { String zkQuorum = args[0]; String group = args[1]; SparkConf conf = new SparkConf().setAppName("KafkaInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000)); Map<String, Integer> topics = new HashMap<String, Integer>(); topics.put("pandas", 1); JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics); input.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
streamingContext.start();
streamingContext.start();
public static void main(String[] args) throws Exception { String master = args[0]; JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() { public Boolean call(String line) { return line.contains("error"); }}); // Print out the lines with errors, which causes this DStream to be evaluated errorLines.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
jssc.start(); // Start the computation
SparkConf conf = new SparkConf().setAppName("log jamming").setMaster("local[2]"); JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(1)); JavaReceiverInputDStream<byte[]> bytes = jsc.rawSocketStream("localhost", 9999); // Have fun with the RDD jsc.start(); jsc.awaitTermination();
public void run() { try { streamingContext.start(); if (deploymentConfig.isRunLocal()) { streamingContext.awaitTermination(); } } catch (Exception e) { e.printStackTrace(); } }
/** * Print the status text of the some of the tweets */ public void tweetPrint() { JavaDStream<Status> tweetsStream = loadData(); // Here print the status text // TODO write code here // Hint: use the print method JavaDStream<String> statusText = null; // Start the context jssc.start(); jssc.awaitTermination(); }
@Override public void startHDFSTxtFileStreams() { String hdfsUri = hadoopClusterService.getHDFSUri() + "/searchevents" + getCurrentStreamUri(); QueryStringJDStreams queryStringJDStreams = new QueryStringJDStreams(); JavaDStream<String> fileStream = jssc.textFileStream(hdfsUri); queryStringJDStreams.topQueryStringsCountInLastOneHour(fileStream); queryStringJDStreams.topProductViewsCountInLastOneHour(fileStream); LOG.debug("Starting streaming context!"); jssc.start(); LOG.debug("Streaming context running!"); }
@Override public void startFlumeStream() { JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream( jssc, "localhost", 41111, StorageLevels.MEMORY_AND_DISK); QueryStringJDStreams queryStringJDStreams = new QueryStringJDStreams(); // Run top top search query string stream queryStringJDStreams .topQueryStringsCountInLastOneHourUsingSparkFlumeEvent(flumeStream); // Run top product view stream //TODO: uncomment to get both stats. // queryStringJDStreams // .topProductViewsCountInLastOneHourUsingSparkFlumeEvent(flumeStream); jssc.start(); }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("kafka-sandbox") .setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "localhost:9092"); JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics); directKafkaStream.foreachRDD(rdd -> { System.out.println("--- New RDD with " + rdd.partitions().size() + " partitions and " + rdd.count() + " records"); rdd.foreach(record -> System.out.println(record._2)); }); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("kafka-sandbox") .setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "localhost:9092"); JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc, String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics); directKafkaStream .map(message -> recordInjection.invert(message._2).get()) .foreachRDD(rdd -> { rdd.foreach(record -> { System.out.println("str1= " + record.get("str1") + ", str2= " + record.get("str2") + ", int1=" + record.get("int1")); }); }); ssc.start(); ssc.awaitTermination(); }
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: JavaFlumeEventCount <host> <port>"); System.exit(1); } String host = args[0]; int port = Integer.parseInt(args[1]); Duration batchInterval = new Duration(2000); SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval); JavaReceiverInputDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, host, port); flumeStream.count(); flumeStream.count().map(in -> "Received " + in + " flume events.").print(); ssc.start(); ssc.awaitTermination(); } }
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "NetworkWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
/** Starts the streaming job. Use {@link #close()} to stop it */ public SparkStreamingJob start() { if (!started.compareAndSet(false, true)) return this; Runnable logInitializer = LogInitializer.create(zipkinLogLevel()); logInitializer.run(); // Ensures local log commands emit streamSpansToStorage( streamFactory().create(jsc()), new AutoValue_ReadSpans(logInitializer), new AutoValue_AdjustAndConsumeSpansSharingTraceId(logInitializer, adjusters(), consumer()) ); jsc().start(); return this; }
@SuppressWarnings("unchecked") @Test public void testContextState() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4)); Assert.assertEquals(StreamingContextState.INITIALIZED, ssc.getState()); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream); Assert.assertEquals(StreamingContextState.INITIALIZED, ssc.getState()); ssc.start(); Assert.assertEquals(StreamingContextState.ACTIVE, ssc.getState()); ssc.stop(); Assert.assertEquals(StreamingContextState.STOPPED, ssc.getState()); }
@SuppressWarnings("unchecked") @Test public void testContextState() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4)); Assert.assertEquals(StreamingContextState.INITIALIZED, ssc.getState()); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream); Assert.assertEquals(StreamingContextState.INITIALIZED, ssc.getState()); ssc.start(); Assert.assertEquals(StreamingContextState.ACTIVE, ssc.getState()); ssc.stop(); Assert.assertEquals(StreamingContextState.STOPPED, ssc.getState()); }
public static void main(String[] args) throws DataIngestException { CmdLineParser cmdLineParser = new CmdLineParser(); final AppArgs appArgs = cmdLineParser.validateArgs(args); System.setProperty("HADOOP_USER_NAME", appArgs.getProperty(DiPConfiguration.HADOOP_USER_NAME)); SparkConf conf = new SparkConf().setAppName("SparkTwitterStreaming") .setMaster("local[*]"); try (JavaStreamingContext jsc = new JavaStreamingContext(new JavaSparkContext(conf), new Duration(1000))) { JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(jsc, appArgs.getProperty(DiPConfiguration.ZK_HOST)+":"+appArgs.getProperty(DiPConfiguration.ZK_PORT), "spark-stream", getKafkaTopics(appArgs)); JavaDStream<Object[]> twitterStreams = stream.map(tuple -> FlatJsonConverter.convertToValuesArray(tuple._2)) .cache(); SparkHdfsWriter.write(twitterStreams, appArgs); new SparkHBaseWriter(jsc.sparkContext(), appArgs).write(twitterStreams); SparkJdbcSourceWriter jdbcSourceWriter = new SparkJdbcSourceWriter(new SQLContext(jsc.sparkContext()), appArgs); new TopNLocationByTweets(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); new TopNUsersWithMaxFollowers(jdbcSourceWriter,Integer.valueOf(appArgs.getProperty("topN"))).compute(twitterStreams); jsc.start(); jsc.awaitTermination(); } }
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }
private <K, S, T> void testOperation( List<List<K>> input, StateSpec<K, Integer, S, T> mapWithStateSpec, List<Set<T>> expectedOutputs, List<Set<Tuple2<K, S>>> expectedStateSnapshots) { int numBatches = expectedOutputs.size(); JavaDStream<K> inputStream = JavaTestUtils.attachTestInputStream(ssc, input, 2); JavaMapWithStateDStream<K, Integer, S, T> mapWithStateDStream = JavaPairDStream.fromJavaDStream( inputStream.map(x -> new Tuple2<>(x, 1))).mapWithState(mapWithStateSpec); List<Set<T>> collectedOutputs = Collections.synchronizedList(new ArrayList<Set<T>>()); mapWithStateDStream.foreachRDD(rdd -> collectedOutputs.add(Sets.newHashSet(rdd.collect()))); List<Set<Tuple2<K, S>>> collectedStateSnapshots = Collections.synchronizedList(new ArrayList<Set<Tuple2<K, S>>>()); mapWithStateDStream.stateSnapshots().foreachRDD(rdd -> collectedStateSnapshots.add(Sets.newHashSet(rdd.collect()))); BatchCounter batchCounter = new BatchCounter(ssc.ssc()); ssc.start(); ((ManualClock) ssc.ssc().scheduler().clock()) .advance(ssc.ssc().progressListener().batchDuration() * numBatches + 1); batchCounter.waitUntilBatchesCompleted(numBatches, 10000); Assert.assertEquals(expectedOutputs, collectedOutputs); Assert.assertEquals(expectedStateSnapshots, collectedStateSnapshots); } }