return false; }).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>()); env.execute();
@Test public void testRichFilterOnStringTupleField() throws Exception { /* * Test filter on String tuple field. */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> ints = CollectionDataSets.getIntegerDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new RichFilter1()).withBroadcastSet(ints, "ints"); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n" + "4,3,Hello world, how are you?\n"; compareResultAsTuples(result, expected); }
@Test public void testFilterWithBroadcastVariables() throws Exception { /* * Test filter with broadcast variables */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> filterDs = ds. filter(new RichFilter2()).withBroadcastSet(intDs, "ints"); List<Tuple3<Integer, Long, String>> result = filterDs.collect(); String expected = "11,5,Comment#5\n" + "12,5,Comment#6\n" + "13,5,Comment#7\n" + "14,5,Comment#8\n" + "15,5,Comment#9\n"; compareResultAsTuples(result, expected); }
/** * Filters edges based on the given graph identifiers. * * @param edges edges * @param ids graph identifiers * @return filtered edges */ protected DataSet<Edge> getEdges(DataSet<Edge> edges, DataSet<GradoopId> ids) { return edges .filter(new InAllGraphsBroadcast<Edge>()) .withBroadcastSet(ids, InAllGraphsBroadcast.GRAPH_IDS); } }
/** * Filters vertices based on the given graph identifiers. * * @param vertices vertices * @param ids graph identifiers * @return filtered vertices */ protected DataSet<Vertex> getVertices(DataSet<Vertex> vertices, DataSet<GradoopId> ids) { return vertices .filter(new InAllGraphsBroadcast<Vertex>()) .withBroadcastSet(ids, InAllGraphsBroadcast.GRAPH_IDS); }
/** * Filters vertices based on the given graph identifiers. * * @param vertices vertices * @param ids graph identifiers * @return filtered vertices */ protected DataSet<Vertex> getVertices(DataSet<Vertex> vertices, DataSet<GradoopId> ids) { return vertices .filter(new InAllGraphsBroadcast<Vertex>()) .withBroadcastSet(ids, InAllGraphsBroadcast.GRAPH_IDS); }
/** * Filters edges based on the given graph identifiers. * * @param edges edges * @param ids graph identifiers * @return filtered edges */ protected DataSet<Edge> getEdges(DataSet<Edge> edges, DataSet<GradoopId> ids) { return edges .filter(new InAllGraphsBroadcast<Edge>()) .withBroadcastSet(ids, InAllGraphsBroadcast.GRAPH_IDS); } }
@Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newSubgraphs) { DataSet<GradoopId> ids = secondCollection.getGraphHeads() .map(new Id<GraphHead>()); return firstCollection.getVertices() .filter(new InAnyGraphBroadcast<Vertex>()) .withBroadcastSet(ids, GraphsContainmentFilterBroadcast.GRAPH_IDS); }
@Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newSubgraphs) { DataSet<GradoopId> ids = secondCollection.getGraphHeads() .map(new Id<GraphHead>()); return firstCollection.getVertices() .filter(new InAnyGraphBroadcast<Vertex>()) .withBroadcastSet(ids, GraphsContainmentFilterBroadcast.GRAPH_IDS); } }
/** * Computes the resulting vertices by collecting a list of resulting * subgraphs and checking if the vertex is contained in that list. * * @param newGraphHeads graph dataset of the resulting graph collection * @return vertex set of the resulting graph collection */ @Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newGraphHeads) { DataSet<GradoopId> identifiers = newGraphHeads .map(new Id<GraphHead>()); return firstCollection.getVertices() .filter(new InAnyGraphBroadcast<Vertex>()) .withBroadcastSet(identifiers, GraphsContainmentFilterBroadcast.GRAPH_IDS); }
/** * Computes the resulting vertices by collecting a list of resulting * subgraphs and checking if the vertex is contained in that list. * * @param newGraphHeads graph dataset of the resulting graph collection * @return vertex set of the resulting graph collection */ @Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newGraphHeads) { DataSet<GradoopId> identifiers = newGraphHeads .map(new Id<GraphHead>()); return firstCollection.getVertices() .filter(new InAnyGraphBroadcast<Vertex>()) .withBroadcastSet(identifiers, GraphsContainmentFilterBroadcast.GRAPH_IDS); } }
/** * Determines frequent subgraphs in a set of embeddings. * * @param embeddings set of embeddings * @return frequent subgraphs */ private DataSet<TFSMSubgraph> getFrequentSubgraphs( DataSet<TFSMSubgraphEmbeddings> embeddings) { return embeddings .map(new TFSMSubgraphOnly()) .groupBy(0) .sum(1) .filter(new Frequent<>()) .withBroadcastSet(minFrequency, DIMSpanConstants.MIN_FREQUENCY); }
@Override public GraphCollection execute(GraphCollection collection) { DataSet<GraphHead> graphHeads = collection.getGraphHeads().first(limit); DataSet<GradoopId> firstIds = graphHeads.map(new Id<>()); DataSet<Vertex> filteredVertices = collection.getVertices() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(firstIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); DataSet<Edge> filteredEdges = collection.getEdges() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(firstIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); return collection.getConfig().getGraphCollectionFactory() .fromDataSets(graphHeads, filteredVertices, filteredEdges); }
/** * Determines frequent subgraphs in a set of embeddings. * * @param embeddings set of embeddings * @return frequent subgraphs */ private DataSet<TFSMSubgraph> getFrequentSubgraphs( DataSet<TFSMSubgraphEmbeddings> embeddings) { return embeddings .map(new TFSMSubgraphOnly()) .groupBy(0) .sum(1) .filter(new Frequent<>()) .withBroadcastSet(minFrequency, DIMSpanConstants.MIN_FREQUENCY); }
@Override public GraphCollection execute(GraphCollection collection) { DataSet<GraphHead> graphHeads = collection.getGraphHeads().first(limit); DataSet<GradoopId> firstIds = graphHeads.map(new Id<>()); DataSet<Vertex> filteredVertices = collection.getVertices() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(firstIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); DataSet<Edge> filteredEdges = collection.getEdges() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(firstIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); return collection.getConfig().getGraphCollectionFactory() .fromDataSets(graphHeads, filteredVertices, filteredEdges); } }
/** * Selects vertices and edges for a selected subset of graph heads / graph ids. * Creates a graph collection representing selection result. * * @param collection input collection * @param graphHeads selected graph heads * * @return selection result */ protected GraphCollection selectVerticesAndEdges( GraphCollection collection, DataSet<GraphHead> graphHeads) { // get the identifiers of these logical graphs DataSet<GradoopId> graphIds = graphHeads.map(new Id<GraphHead>()); // use graph ids to filter vertices from the actual graph structure DataSet<Vertex> vertices = collection.getVertices() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(graphIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); DataSet<Edge> edges = collection.getEdges() .filter(new InAnyGraphBroadcast<>()) .withBroadcastSet(graphIds, GraphsContainmentFilterBroadcast.GRAPH_IDS); return collection.getConfig().getGraphCollectionFactory() .fromDataSets(graphHeads, vertices, edges); }
@Override public GraphCollection execute(GraphCollection collection) { // create canonical labels for all graph heads and choose representative for all distinct ones DataSet<GradoopId> graphIds = getCanonicalLabels(collection) .distinct(1) .map(new IdFromGraphHeadString()); DataSet<GraphHead> graphHeads = collection.getGraphHeads() .filter(new IdInBroadcast<>()) .withBroadcastSet(graphIds, IdInBroadcast.IDS); return selectVerticesAndEdges(collection, graphHeads); }
@Override public GraphCollection execute(GraphCollection collection) { // create canonical labels for all graph heads and choose representative for all distinct ones DataSet<GradoopId> graphIds = getCanonicalLabels(collection) .distinct(1) .map(new IdFromGraphHeadString()); DataSet<GraphHead> graphHeads = collection.getGraphHeads() .filter(new IdInBroadcast<>()) .withBroadcastSet(graphIds, IdInBroadcast.IDS); return selectVerticesAndEdges(collection, graphHeads); } }
/** * Creates a new logical graph that contains only vertices and edges that * are contained in the starting graph but not in any other graph that is part * of the given collection. * * @param collection input collection * @return excluded graph */ @Override public LogicalGraph execute(GraphCollection collection) { DataSet<GradoopId> excludedGraphIds = collection.getGraphHeads() .filter(new ByDifferentId<GraphHead>(startId)) .map(new Id<GraphHead>()); DataSet<Vertex> vertices = collection.getVertices() .filter(new InGraph<Vertex>(startId)) .filter(new NotInGraphsBroadcast<Vertex>()) .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS); DataSet<Edge> edges = collection.getEdges() .filter(new InGraph<Edge>(startId)) .filter(new NotInGraphsBroadcast<Edge>()) .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS); return collection.getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges); }
/** * Filters a collection form a graph dataset performing either an intersection or a difference * * @param collection Collection to be filtered * @param g Graph where to verify the containment operation * @param inGraph If the value is true, then perform an intersection, otherwise a difference * @param <P> e.g. either vertices or edges * @return The filtered collection */ public static <P extends GraphElement> DataSet<P> areElementsInGraph(DataSet<P> collection, LogicalGraph g, boolean inGraph) { return collection .filter(inGraph ? new InGraphBroadcast<>() : new NotInGraphBroadcast<>()) .withBroadcastSet(g.getGraphHead().map(new Id<>()), GraphContainmentFilterBroadcast.GRAPH_ID); }