public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Integer> trainingData = env.addSource(new FiniteTrainingDataSource()); DataStream<Integer> newData = env.addSource(new FiniteNewDataSource()); // build new model on every second of new data DataStream<Double[]> model = trainingData .assignTimestampsAndWatermarks(new LinearTimestamp()) .timeWindowAll(Time.of(5000, TimeUnit.MILLISECONDS)) .apply(new PartialModelBuilder()); // use partial model for newData DataStream<Integer> prediction = newData.connect(model).map(new Predictor()); // emit result if (params.has("output")) { prediction.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); prediction.print(); } // execute program env.execute("Streaming Incremental Learning"); }
.timeWindowAll(Time.milliseconds(1), Time.milliseconds(1)) .process(new ProcessAllWindowFunction<Integer, Integer, TimeWindow>() { private static final long serialVersionUID = 1L;
.timeWindowAll(Time.milliseconds(1), Time.milliseconds(1)) .sideOutputLateData(lateDataTag) .apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Integer> trainingData = env.addSource(new FiniteTrainingDataSource()); DataStream<Integer> newData = env.addSource(new FiniteNewDataSource()); // build new model on every second of new data DataStream<Double[]> model = trainingData .assignTimestampsAndWatermarks(new LinearTimestamp()) .timeWindowAll(Time.of(5000, TimeUnit.MILLISECONDS)) .apply(new PartialModelBuilder()); // use partial model for newData DataStream<Integer> prediction = newData.connect(model).map(new Predictor()); // emit result if (params.has("output")) { prediction.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); prediction.print(); } // execute program env.execute("Streaming Incremental Learning"); }
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); SimpleEdgeStream<Long, NullValue> edges = getGraphStream(env); DataStream<Tuple2<Integer, Long>> triangleCount = edges.slice(windowTime, EdgeDirection.ALL) .applyOnNeighbors(new GenerateCandidateEdges()) .keyBy(0, 1).timeWindow(windowTime) .apply(new CountTriangles()) .timeWindowAll(windowTime).sum(0); if (fileOutput) { triangleCount.writeAsText(outputPath); } else { triangleCount.print(); } env.execute("Naive window triangle count"); }
@SuppressWarnings("unchecked") @Override public DataStream<T> run(final DataStream<Edge<K, EV>> edgeStream) { //For parallel window support we key the edge stream by partition and apply a parallel fold per partition. //Finally, we merge all locally combined results into our final graph aggregation property. TupleTypeInfo edgeTypeInfo = (TupleTypeInfo) edgeStream.getType(); TypeInformation<S> returnType = TypeExtractor.createTypeInfo(EdgesFold.class, getUpdateFun().getClass(), 2, edgeTypeInfo.getTypeAt(0), edgeTypeInfo.getTypeAt(2)); TypeInformation<Tuple2<Integer, Edge<K, EV>>> typeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, edgeStream.getType()); DataStream<S> partialAgg = edgeStream .map(new PartitionMapper<>()).returns(typeInfo) .keyBy(0) .timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .fold(getInitialValue(), new PartialAgg<>(getUpdateFun(),returnType)) .timeWindowAll(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(getCombineFun()) .flatMap(getAggregator(edgeStream)).setParallelism(1); if (getTransform() != null) { return partialAgg.map(getTransform()); } return (DataStream<T>) partialAgg; }
@SuppressWarnings("unchecked") @Override public DataStream<T> run(final DataStream<Edge<K, EV>> edgeStream) { TypeInformation<Tuple2<Integer, Edge<K, EV>>> basicTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, edgeStream.getType()); TupleTypeInfo edgeTypeInfo = (TupleTypeInfo) edgeStream.getType(); TypeInformation<S> partialAggType = TypeExtractor.createTypeInfo(EdgesFold.class, getUpdateFun().getClass(), 2, edgeTypeInfo.getTypeAt(0), edgeTypeInfo.getTypeAt(2)); TypeInformation<Tuple2<Integer, S>> partialTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, partialAggType); degree = (degree == -1) ? edgeStream.getParallelism() : degree; DataStream<S> partialAgg = edgeStream .map(new PartitionMapper<>()).returns(basicTypeInfo) .setParallelism(degree) .keyBy(0) .timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .fold(getInitialValue(), new PartialAgg<>(getUpdateFun(), partialAggType)).setParallelism(degree); //split here DataStream<Tuple2<Integer, S>> treeAgg = enhance(partialAgg.map(new PartitionMapper<>()).setParallelism(degree).returns(partialTypeInfo), partialTypeInfo); DataStream<S> resultStream = treeAgg.map(new PartitionStripper<>()).setParallelism(treeAgg.getParallelism()) .timeWindowAll(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(getCombineFun()) .flatMap(getAggregator(edgeStream)).setParallelism(1); return (getTransform() != null) ? resultStream.map(getTransform()) : (DataStream<T>) resultStream; }