.combineGroup( new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() { @Override
@Test public void testIdentityWithGroupBy() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> reduceDs = ds .groupBy(1) // combine .combineGroup(new IdentityFunction()) // fully reduce .reduceGroup(new IdentityFunction()); List<Tuple3<Integer, Long, String>> result = reduceDs.collect(); compareResultAsTuples(result, identityResult); }
@Test public void testCombineGroupWithTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Integer> resultDs = ds .groupBy(0) .combineGroup(new GroupCombiner<Tuple3<Integer, Long, String>, Integer>()) .returns(BasicTypeInfo.INT_TYPE_INFO); List<Integer> result = resultDs.collect(); String expectedResult = "2\n" + "3\n" + "1\n"; compareResultAsText(result, expectedResult); }
.combineGroup( new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() { @Override
@Test public void testPartialReduceWithIdenticalInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple3<Integer, Long, String>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple3GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple3toTuple3GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple3<Integer, Long, String>>, Tuple3<Integer, Long, String>>() { @Override public Tuple3<Integer, Long, String> map(Tuple2<Long, Tuple3<Integer, Long, String>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,1,combined\n" + "5,4,combined\n" + "15,9,combined\n" + "34,16,combined\n" + "65,25,combined\n" + "111,36,combined\n"; compareResultAsTuples(result, expected); }
@Test public void testPartialReduceWithDifferentInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple2<Integer, Long>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple2GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple2toTuple2GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,3\n" + "5,20\n" + "15,58\n" + "34,52\n" + "65,70\n" + "111,96\n"; compareResultAsTuples(result, expected); }
@Test // check if all API methods are callable public void testAPI() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<String>> ds = CollectionDataSets.getStringDataSet(env).map(new MapFunction<String, Tuple1<String>>() { @Override public Tuple1<String> map(String value) throws Exception { return new Tuple1<>(value); } }); // all methods on DataSet ds.combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); // all methods on UnsortedGrouping ds.groupBy(0).combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); // all methods on SortedGrouping ds.groupBy(0).sortGroup(0, Order.ASCENDING).combineGroup(new GroupCombineFunctionExample()) .output(new DiscardingOutputFormat<Tuple1<String>>()); env.execute(); }
@Test public void testSemanticPropsWithKeySelector7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupCombineOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> combineOp = tupleDs.groupBy(new DummyTestKeySelector()) .combineGroup(new DummyGroupCombineFunction4()); SemanticProperties semProps = combineOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(0)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) == 2); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) < 0); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
@Test public void testSemanticPropsWithKeySelector5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupCombineOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> combineOp = tupleDs.groupBy(new DummyTestKeySelector()) .combineGroup(new DummyGroupCombineFunction3()) .withForwardedFields("4->0;3;3->1;2"); SemanticProperties semProps = combineOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 5).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0)); assertTrue(semProps.getForwardingSourceField(0, 0) == 6); assertTrue(semProps.getForwardingSourceField(0, 1) == 5); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 5); assertTrue(semProps.getForwardingSourceField(0, 4) < 0); assertTrue(semProps.getReadFields(0) == null); }
@Test public void testSemanticPropsWithKeySelector1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupCombineOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> combineOp = tupleDs.groupBy(new DummyTestKeySelector()) .combineGroup(new DummyGroupCombineFunction1()); SemanticProperties semProps = combineOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
@Test public void testSemanticPropsWithKeySelector3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); GroupCombineOperator<Tuple5<Integer, Long, String, Long, Integer>, Tuple5<Integer, Long, String, Long, Integer>> combineOp = tupleDs.groupBy(new DummyTestKeySelector()) .combineGroup(new DummyGroupCombineFunction2()) .withForwardedFields("0->4;1;1->3;2"); SemanticProperties semProps = combineOp.getSemanticProperties(); assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4)); assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1)); assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3)); assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1); assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2)); assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0); assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0); assertTrue(semProps.getForwardingSourceField(0, 0) < 0); assertTrue(semProps.getForwardingSourceField(0, 1) == 3); assertTrue(semProps.getForwardingSourceField(0, 2) == 4); assertTrue(semProps.getForwardingSourceField(0, 3) == 3); assertTrue(semProps.getForwardingSourceField(0, 4) == 2); assertTrue(semProps.getReadFields(0).size() == 3); assertTrue(semProps.getReadFields(0).contains(2)); assertTrue(semProps.getReadFields(0).contains(5)); assertTrue(semProps.getReadFields(0).contains(6)); }
/** * Applies an aggregate function to the partitions of an edge data set. * * @param edges edge data set * @param graphIds graph ids to aggregate * @return partition aggregate value */ private DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregateEdges( DataSet<Edge> edges, DataSet<GradoopId> graphIds) { return edges .flatMap(new ElementsOfSelectedGraphs<>()) .withBroadcastSet(graphIds, ElementsOfSelectedGraphs.GRAPH_IDS) .groupBy(0) .combineGroup(new ApplyAggregateElements<>(aggregateFunctions.stream() .filter(AggregateFunction::isEdgeAggregation) .collect(Collectors.toSet()))); }
/** * Applies an aggregate function to the partitions of a vertex data set. * * @param vertices vertex data set * @param graphIds graph ids to aggregate * @return partition aggregate value */ private DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregateVertices( DataSet<Vertex> vertices, DataSet<GradoopId> graphIds) { return vertices .flatMap(new ElementsOfSelectedGraphs<>()) .withBroadcastSet(graphIds, ElementsOfSelectedGraphs.GRAPH_IDS) .groupBy(0) .combineGroup(new ApplyAggregateElements<>(aggregateFunctions.stream() .filter(AggregateFunction::isVertexAggregation) .collect(Collectors.toSet()))); }
/** * Applies an aggregate function to the partitions of a vertex data set. * * @param vertices vertex data set * @param graphIds graph ids to aggregate * @return partition aggregate value */ private DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregateVertices( DataSet<Vertex> vertices, DataSet<GradoopId> graphIds) { return vertices .flatMap(new ElementsOfSelectedGraphs<>()) .withBroadcastSet(graphIds, ElementsOfSelectedGraphs.GRAPH_IDS) .groupBy(0) .combineGroup(new ApplyAggregateElements<>(aggregateFunctions.stream() .filter(AggregateFunction::isVertexAggregation) .collect(Collectors.toSet()))); }
/** * Applies an aggregate function to the partitions of an edge data set. * * @param edges edge data set * @param graphIds graph ids to aggregate * @return partition aggregate value */ private DataSet<Tuple2<GradoopId, Map<String, PropertyValue>>> aggregateEdges( DataSet<Edge> edges, DataSet<GradoopId> graphIds) { return edges .flatMap(new ElementsOfSelectedGraphs<>()) .withBroadcastSet(graphIds, ElementsOfSelectedGraphs.GRAPH_IDS) .groupBy(0) .combineGroup(new ApplyAggregateElements<>(aggregateFunctions.stream() .filter(AggregateFunction::isEdgeAggregation) .collect(Collectors.toSet()))); } }
/** * Prepares the initial working set for the bulk iteration. * * @param triples matching triples from the input graph * @return data set containing fat vertices */ private DataSet<FatVertex> buildInitialWorkingSet( DataSet<TripleWithCandidates<GradoopId>> triples) { return triples.flatMap(new CloneAndReverse()) .groupBy(1) // sourceId .combineGroup(new BuildFatVertex(getQuery())) .groupBy(0) // vertexId .reduceGroup(new GroupedFatVertices()); }
/** * Prepares the initial working set for the bulk iteration. * * @param triples matching triples from the input graph * @return data set containing fat vertices */ private DataSet<FatVertex> buildInitialWorkingSet( DataSet<TripleWithCandidates<GradoopId>> triples) { return triples.flatMap(new CloneAndReverse()) .groupBy(1) // sourceId .combineGroup(new BuildFatVertex(getQuery())) .groupBy(0) // vertexId .reduceGroup(new GroupedFatVertices()); }
/** * Filters and casts EPGM edges from a given set of {@link Element} * * @param elements EPGM elements * @param edgeType edge type * @param mayOverlap edges may be contained in multiple graphs * @return EPGM edges */ public static DataSet<Edge> extractEdges(DataSet<Element> elements, Class<Edge> edgeType, boolean mayOverlap) { DataSet<Edge> result = elements .filter(new IsInstance<>(edgeType)) .map(new Cast<>(edgeType)) .returns(TypeExtractor.createTypeInfo(edgeType)); return mayOverlap ? result .groupBy(new Id<>()) .combineGroup(new MergedGraphIds<>()).groupBy(new Id<>()) .reduceGroup(new MergedGraphIds<>()) : result; }
/** * Filters and casts EPGM edges from a given set of {@link Element} * * @param elements EPGM elements * @param edgeType edge type * @param mayOverlap edges may be contained in multiple graphs * @return EPGM edges */ public static DataSet<Edge> extractEdges(DataSet<Element> elements, Class<Edge> edgeType, boolean mayOverlap) { DataSet<Edge> result = elements .filter(new IsInstance<>(edgeType)) .map(new Cast<>(edgeType)) .returns(TypeExtractor.createTypeInfo(edgeType)); return mayOverlap ? result .groupBy(new Id<>()) .combineGroup(new MergedGraphIds<>()).groupBy(new Id<>()) .reduceGroup(new MergedGraphIds<>()) : result; }
@Override public DataSet<GraphTransaction> getGraphTransactions() { DataSet<Tuple2<GradoopId, GraphElement>> graphVertexTuples = getVertices() .map(new Cast<>(GraphElement.class)) .returns(TypeExtractor.getForClass(GraphElement.class)) .flatMap(new GraphElementExpander<>()); DataSet<Tuple2<GradoopId, GraphElement>> graphEdgeTuples = getEdges() .map(new Cast<>(GraphElement.class)) .returns(TypeExtractor.getForClass(GraphElement.class)) .flatMap(new GraphElementExpander<>()); DataSet<Tuple3<GradoopId, Set<Vertex>, Set<Edge>>> transactions = graphVertexTuples .union(graphEdgeTuples) .groupBy(0) .combineGroup(new GraphVerticesEdges()) .groupBy(0) .reduceGroup(new GraphVerticesEdges()); return getGraphHeads() .leftOuterJoin(transactions) .where(new Id<>()).equalTo(0) .with(new TransactionFromSets()); }