public static void main(String[] args) throws Exception { String master = args[0]; JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000)); // Create a DStream from all the input on port 7777 JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777); // Filter our DStream for lines with "error" JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() { public Boolean call(String line) { return line.contains("error"); }}); // Print out the lines with errors, which causes this DStream to be evaluated errorLines.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
public static void main(String[] args) throws Exception { String zkQuorum = args[0]; String group = args[1]; SparkConf conf = new SparkConf().setAppName("KafkaInput"); // Create a StreamingContext with a 1 second batch size JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000)); Map<String, Integer> topics = new HashMap<String, Integer>(); topics.put("pandas", 1); JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics); input.print(); // start our streaming context and wait for it to "finish" jssc.start(); // Wait for 10 seconds then exit. To run forever call without a timeout jssc.awaitTermination(10000); // Stop the streaming context jssc.stop(); } }
public static void setFromCommandLineArgs(Options options, String[] args) { CommandLineParser parser = new PosixParser(); try { CommandLine cl = parser.parse(options, args); THE_INSTANCE.windowLength = new Duration(Integer.parseInt( cl.getOptionValue(LogAnalyzerAppMain.WINDOW_LENGTH, "30")) * 1000); THE_INSTANCE.slideInterval = new Duration(Integer.parseInt( cl.getOptionValue(LogAnalyzerAppMain.SLIDE_INTERVAL, "5")) * 1000); THE_INSTANCE.logsDirectory = cl.getOptionValue( LogAnalyzerAppMain.LOGS_DIRECTORY, "/tmp/logs"); THE_INSTANCE.outputHtmlFile = cl.getOptionValue( LogAnalyzerAppMain.OUTPUT_HTML_FILE, "/tmp/log_stats.html"); THE_INSTANCE.checkpointDirectory = cl.getOptionValue( LogAnalyzerAppMain.CHECKPOINT_DIRECTORY, "/tmp/log-analyzer-streaming"); THE_INSTANCE.indexHtmlTemplate = cl.getOptionValue( LogAnalyzerAppMain.INDEX_HTML_TEMPLATE, "./src/main/resources/index.html.template"); THE_INSTANCE.outputDirectory = cl.getOptionValue( LogAnalyzerAppMain.OUTPUT_DIRECTORY, "/tmp/pandaout"); THE_INSTANCE.initialized = true; } catch (ParseException e) { THE_INSTANCE.initialized = false; System.err.println("Parsing failed. Reason: " + e.getMessage()); } } }
protected final JavaStreamingContext buildStreamingContext() { log.info("Starting SparkContext with interval {} seconds", generationIntervalSec); SparkConf sparkConf = new SparkConf(); // Only for tests, really if (sparkConf.getOption("spark.master").isEmpty()) { log.info("Overriding master to {} for tests", streamingMaster); sparkConf.setMaster(streamingMaster); } // Only for tests, really if (sparkConf.getOption("spark.app.name").isEmpty()) { String appName = "Oryx" + getLayerName(); if (id != null) { appName = appName + "-" + id; } log.info("Overriding app name to {} for tests", appName); sparkConf.setAppName(appName); } extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString())); // Turn this down to prevent long blocking at shutdown sparkConf.setIfMissing( "spark.streaming.gracefulStopTimeout", Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS))); sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec)); long generationIntervalMS = TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf)); return new JavaStreamingContext(jsc, new Duration(generationIntervalMS)); }
@Test public void testMilliseconds() { Assert.assertEquals(new Duration(100), Durations.milliseconds(100)); }
StreamingContextJavaFunctions(StreamingContext ssc) { super(ssc.sparkContext()); this.ssc = ssc; } }
@Override public WriteAheadLogRecordHandle write(ByteBuffer record, long time) { index += 1; records.add(new Record(time, index, record)); return new JavaWriteAheadLogSuiteHandle(index); }
@Override public void call(JavaPairRDD<K,M> rdd, Time time) throws IOException { if (rdd.isEmpty()) { log.info("RDD was empty, not saving to HDFS"); } else { String file = prefix + "-" + time.milliseconds() + "." + suffix; Path path = new Path(file); FileSystem fs = FileSystem.get(path.toUri(), hadoopConf); if (fs.exists(path)) { log.warn("Saved data already existed, possibly from a failed job. Deleting {}", path); fs.delete(path, true); } log.info("Saving RDD to HDFS at {}", file); rdd.mapToPair( new ValueToWritableFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass) ).saveAsNewAPIHadoopFile( file, keyWritableClass, messageWritableClass, SequenceFileOutputFormat.class, hadoopConf); } } }
@Test public void testMilliseconds() { Assert.assertEquals(new Duration(100), Durations.milliseconds(100)); }
public Builder checkpointIntervalMillis(int checkpointIntervalMillis) { this.checkpointInterval = new Duration(checkpointIntervalMillis); return this; }
StreamingContextJavaFunctions(StreamingContext ssc) { super(ssc.sparkContext()); this.ssc = ssc; } }
@Override public WriteAheadLogRecordHandle write(ByteBuffer record, long time) { index += 1; records.add(new Record(time, index, record)); return new JavaWriteAheadLogSuiteHandle(index); }
log.info("Not producing updates to update topic since none was configured"); updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData, new TopicProducerImpl<>(updateBroker, updateTopic, false)) { updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData,
StreamingContextJavaFunctions(StreamingContext ssc) { super(ssc.sparkContext()); this.ssc = ssc; } }
StreamingContextJavaFunctions(StreamingContext ssc) { super(ssc.sparkContext()); this.ssc = ssc; } }
StreamingContextJavaFunctions(StreamingContext ssc) { super(ssc.sparkContext()); this.ssc = ssc; } }