@Test public void testForeachBatchAPI() { StreamingQuery query = spark .readStream() .textFile(input) .writeStream() .foreachBatch(new VoidFunction2<Dataset<String>, Long>() { @Override public void call(Dataset<String> v1, Long v2) throws Exception {} }) .start(); query.stop(); }
@Test public void testForeachBatchAPI() { StreamingQuery query = spark .readStream() .textFile(input) .writeStream() .foreachBatch(new VoidFunction2<Dataset<String>, Long>() { @Override public void call(Dataset<String> v1, Long v2) throws Exception {} }) .start(); query.stop(); }
@Test public void testForeachAPI() { StreamingQuery query = spark .readStream() .textFile(input) .writeStream() .foreach(new ForeachWriter<String>() { @Override public boolean open(long partitionId, long epochId) { return true; } @Override public void process(String value) {} @Override public void close(Throwable errorOrNull) {} }) .start(); query.stop(); } }
@Test public void testForeachAPI() { StreamingQuery query = spark .readStream() .textFile(input) .writeStream() .foreach(new ForeachWriter<String>() { @Override public boolean open(long partitionId, long epochId) { return true; } @Override public void process(String value) {} @Override public void close(Throwable errorOrNull) {} }) .start(); query.stop(); } }
private void start() { log.debug("-> start()"); SparkSession spark = SparkSession.builder() .appName("Read lines over a file stream").master("local") .getOrCreate(); Dataset<Row> df = spark .readStream() .format("text") .load(StreamingUtils.getInputDirectory()); StreamingQuery query = df.writeStream().outputMode(OutputMode.Update()) .format("console").start(); try { query.awaitTermination(); } catch (StreamingQueryException e) { log.error("Exception while waiting for query to end {}.", e .getMessage(), e); } // In this case everything is a string df.show(); df.printSchema(); } }
.readStream() .format("text") .load(StreamingUtils.getInputDirectory());
Dataset<String> lines = spark.readStream() .format("loghub") .option("sls.project", logProject)
Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()) .json(streamingSourcePath);
Dataset<String> lines = spark.readStream() .format("loghub") .option("sls.project", logProject)