public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) { accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() { public Void call(JavaRDD<ApacheAccessLog> accessLogs) { Tuple4<Long, Long, Long, Long> stats = ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() { public Void call(JavaRDD<String> rdd) { List<String> currentIPAddresses = rdd.take(100);
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() { public Void call(JavaRDD<ApacheAccessLog> rdd) {
windowDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() { public Void call(JavaRDD<ApacheAccessLog> accessLogs) { Tuple4<Long, Long, Long, Long> contentSizeStats =
public static <T> void foreachRDD(JavaDStream<T> stream, final Function2<JavaRDD<T>, Time, Void> func) { stream.foreachRDD(new VoidFunction2<JavaRDD<T>, Time>() { @Override public void call(JavaRDD<T> v1, Time v2) throws Exception { func.call(v1, v2); } }); }
public static <T> void foreachRDD(JavaDStream<T> stream, final Function2<JavaRDD<T>, Time, Void> func) { stream.foreachRDD(new VoidFunction2<JavaRDD<T>, Time>() { @Override public void call(JavaRDD<T> v1, Time v2) throws Exception { func.call(v1, v2); } }); }
@Override public JavaDStream<String> process(JavaDStream<String> input) { input.foreachRDD(new Function<JavaRDD<String>, Void>() {
@Override public void action() { // Force computation of DStream. dStream.foreachRDD(rdd -> rdd.foreach(TranslationUtils.<WindowedValue<T>>emptyVoidFunction())); }
public <T> void compute(JavaDStream<T> twitterStream) { twitterStream.foreachRDD(rdd -> { List<Row> rows = new ArrayList<>(); doMapToPair(rdd).top(topN, (Comparator<Tuple2<K, V>> & Serializable) (tuple1, tuple2) -> tuple1._2.compareTo(tuple2._2)) .forEach(tuple -> rows.add(createRow(tuple))); rdbmsWriter.write(rows, schema, tableName); }); }
public void run() { Map<String, Integer> topicMap = new HashMap<>(); topicMap.put(topic, 1); JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream( scc, zkString, "groupId", topicMap); JavaDStream<String> messages = stream.map(r -> r._2()); messages.foreachRDD(r -> { System.out.println("========================"); System.out.println(r); }); } }
@SuppressWarnings("deprecation") public static void persists(DStream<Tuple2<Integer, Iterable<Long>>> partitonOffset, Properties props) { ClassTag<Tuple2<Integer, Iterable<Long>>> tuple2ClassTag = ScalaUtil.<Integer, Iterable<Long>>getTuple2ClassTag(); JavaDStream<Tuple2<Integer, Iterable<Long>>> jpartitonOffset = new JavaDStream<Tuple2<Integer, Iterable<Long>>>(partitonOffset, tuple2ClassTag); jpartitonOffset.foreachRDD(new VoidFunction<JavaRDD<Tuple2<Integer, Iterable<Long>>>>() { @Override public void call(JavaRDD<Tuple2<Integer, Iterable<Long>>> po) throws Exception { List<Tuple2<Integer, Iterable<Long>>> poList = po.collect(); doPersists(poList, props); } }); }
public static <T> void write(JavaDStream<T> javaDStream, AppArgs appArgs) { javaDStream.foreachRDD(rdd -> { rdd.map(record -> { StringBuilder recordBuilder = new StringBuilder(); for (Object e : (Object[]) record) { recordBuilder.append(e); recordBuilder.append(appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_DELIMITER)); } return StringUtils.removeEnd(recordBuilder.toString(), appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_DELIMITER)); }).saveAsTextFile(appArgs.getProperty(DiPConfiguration.CLUSTER_FS_URL) + appArgs.getProperty(DiPConfiguration.HDFS_OUTPUT_PATH) + System.currentTimeMillis()); }); } }
@Override public boolean execute(JavaDStream<Map<String, Object>>... convertedStream) { JavaDStream<Map<String, Object>> applicationLogic = getApplicationLogic(convertedStream); //applicationLogic.print(); if (isOutputKafkaProtocol()) { applicationLogic.foreachRDD(SimpleKafkaSerializer.getInstance(kafkaParams, protocol().getTopicDefinition ().getActualTopicName())); } else { //TODO: JMS } thread = new Thread(this); thread.start(); return true; }
countAndSum.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Integer>>, Void>() { @Override public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception {
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("kafka-sandbox") .setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "localhost:9092"); JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc, String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics); directKafkaStream .map(message -> recordInjection.invert(message._2).get()) .foreachRDD(rdd -> { rdd.foreach(record -> { System.out.println("str1= " + record.get("str1") + ", str2= " + record.get("str2") + ", int1=" + record.get("int1")); }); }); ssc.start(); ssc.awaitTermination(); }
private void configureDataContext(JavaStreamingContext context) { Map<String, Integer> baseTopicMap = new HashMap<>(); configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1)); kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext .getKafkaReplicationFactor(), configurationContext.getKafkaPartitions()); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath()); kafkaParams.put("group.id", configurationContext.getGroupId()); /* groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of the group. Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the same topic with the same groupId, only one instance will be able to read from the topic */ JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class, kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap, StorageLevel.MEMORY_AND_DISK_SER()); AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction(); JavaDStream<StratioStreamingMessage> insertRequests = messages.filter( new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT)) .map(avroDeserializeMessageFunction); InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService, configurationContext.getZookeeperHostsQuorum()); insertRequests.foreachRDD(insertIntoStreamFunction); }
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
ssc.receiverStream(new JavaSocketReceiver("localhost", server.port())); JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + "."); mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { long count = rdd.count(); dataCounter.addAndGet(count);
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }
ssc.receiverStream(new JavaSocketReceiver("localhost", server.port())); JavaDStream<String> mapped = input.map((Function<String, String>) v1 -> v1 + "."); mapped.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { long count = rdd.count(); dataCounter.addAndGet(count);
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }