.groupBy(0, 1) .reduceGroup(new GroupReduceFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override
@Test public void testCorrectnessOfDistinctOnTuplesWithKeyFieldSelector() throws Exception { /* * check correctness of distinct on tuples with key field selector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct(0, 1, 2); List<Tuple3<Integer, Long, String>> result = distinctDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n"; compareResultAsTuples(result, expected); }
.map(new IdentityMapper<>()); .filter(x -> true).name("postFilter1") .groupBy(0) .reduceGroup(new IdentityGroupReducer<>()).name("reducer1"); DataSet<Tuple2<Long, Long>> reduced2 = s1 .union(s2) .filter(x -> true).name("postFilter2") .groupBy(1) .reduceGroup(new IdentityGroupReducer<>()).name("reducer2"); .output(new DiscardingOutputFormat<>());
/** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) { DataSet<Vertex<K, VV>> unionedVertices = graph .getVertices() .union(this.getVertices()) .name("Vertices") .distinct() .name("Vertices"); DataSet<Edge<K, EV>> unionedEdges = graph .getEdges() .union(this.getEdges()) .name("Edges"); return new Graph<>(unionedVertices, unionedEdges, this.context); }
dataset1.union(dataset2).union(dataset3) .groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "") .sortGroup((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "", Order.ASCENDING) .reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {})
.setParallelism(parallelism) .name("Union with source vertices") .map(new AdjustScores<K>(dampingFactor)) .withBroadcastSet(sumOfScores, SUM_OF_SCORES) .withBroadcastSet(vertexCount, VERTEX_COUNT)
@Test public void testUnion5IdenticalDataSets() throws Exception { /* * Union of 5 same Data Sets, with multiple unions */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env)) .union(CollectionDataSets.get3TupleDataSet(env)) .union(CollectionDataSets.get3TupleDataSet(env)) .union(CollectionDataSets.get3TupleDataSet(env)); List<Tuple3<Integer, Long, String>> result = unionDs.collect(); String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING; compareResultAsTuples(result, expected); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, CoordVector>> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1); IterativeDataSet<Tuple2<Integer, CoordVector>> iteration = initialInput.iterate(2); DataSet<Tuple2<Integer, CoordVector>> result = iteration.union(iteration).map(new IdentityMapper()); iteration.closeWith(result).writeAsFormattedText(this.resultPath, new PointFormatter()); env.execute(); }
private <IN> void createUnionOperation(PythonOperationInfo info) { DataSet<IN> op1 = sets.getDataSet(info.parentID); DataSet<IN> op2 = sets.getDataSet(info.otherID); sets.add(info.setID, op1.union(op2).name("Union")); }
/** * Creates a union of this DataSet with an other DataSet. The other DataSet must be of the same data type. * * @param other The other DataSet which is unioned with the current DataSet. * @return The resulting DataSet. */ public UnionOperator<T> union(DataSet<T> other){ return new UnionOperator<>(this, other, Utils.getCallLocationName()); }
/** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) { DataSet<Vertex<K, VV>> unionedVertices = graph .getVertices() .union(this.getVertices()) .name("Vertices") .distinct() .name("Vertices"); DataSet<Edge<K, EV>> unionedEdges = graph .getEdges() .union(this.getEdges()) .name("Edges"); return new Graph<>(unionedVertices, unionedEdges, this.context); }
.setParallelism(parallelism) .name("Union with source vertices") .map(new AdjustScores<>(dampingFactor)) .withBroadcastSet(sumOfScores, SUM_OF_SCORES) .withBroadcastSet(vertexCount, VERTEX_COUNT)
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> inputStatic = env.generateSequence(1, 4); DataSet<Long> inputIteration = env.generateSequence(1, 4); IterativeDataSet<Long> iteration = inputIteration.iterate(3); DataSet<Long> result = iteration.closeWith(inputStatic.union(inputStatic).union(iteration.union(iteration))); result.output(new LocalCollectionOutputFormat<Long>(this.result)); env.execute(); }