/** * Creates a graph from a DataSet of vertices and a DataSet of edges. * * @param vertices a DataSet of vertices. * @param edges a DataSet of edges. * @param context the flink execution environment. * @return the newly created graph. */ public static <K, VV, EV> Graph<K, VV, EV> fromDataSet(DataSet<Vertex<K, VV>> vertices, DataSet<Edge<K, EV>> edges, ExecutionEnvironment context) { return new Graph<>(vertices, edges, context); }
/** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the edges. * * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph<K, VV, EV> filterOnEdges(FilterFunction<Edge<K, EV>> edgeFilter) { DataSet<Edge<K, EV>> filteredEdges = this.edges.filter(edgeFilter).name("Filter on edges"); return new Graph<>(this.vertices, filteredEdges, this.context); }
/** * Reverse the direction of the edges in the graph. * * @return a new graph with all edges reversed * @throws UnsupportedOperationException */ public Graph<K, VV, EV> reverse() throws UnsupportedOperationException { DataSet<Edge<K, EV>> reversedEdges = edges.map(new ReverseEdgesMap<>()).name("Reverse edges"); return new Graph<>(vertices, reversedEdges, this.context); }
/** * This operation adds all inverse-direction edges to the graph. * * @return the undirected graph. */ public Graph<K, VV, EV> getUndirected() { DataSet<Edge<K, EV>> undirectedEdges = edges. flatMap(new RegularAndReversedEdgesMap<>()).name("To undirected graph"); return new Graph<>(vertices, undirectedEdges, this.context); }
/** * Removes all edges that match the given edge from the graph. * * @param edge the edge to remove * @return the new graph containing the existing vertices and edges without * the removed edges */ public Graph<K, VV, EV> removeEdge(Edge<K, EV> edge) { DataSet<Edge<K, EV>> newEdges = getEdges().filter(new EdgeRemovalEdgeFilter<>(edge)).name("Remove edge"); return new Graph<>(this.vertices, newEdges, this.context); }
/** * Runs a Gather-Sum-Apply iteration on the graph with configuration options. * * @param gatherFunction the gather function collects information about adjacent vertices and edges * @param sumFunction the sum function aggregates the gathered information * @param applyFunction the apply function updates the vertex values with the aggregates * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the iteration configuration parameters * @param <M> the intermediate type used between gather, sum and apply * * @return the updated Graph after the gather-sum-apply iteration has converged or * after maximumNumberOfIterations. */ public <M> Graph<K, VV, EV> runGatherSumApplyIteration( org.apache.flink.graph.gsa.GatherFunction<VV, EV, M> gatherFunction, SumFunction<VV, EV, M> sumFunction, ApplyFunction<K, VV, M> applyFunction, int maximumNumberOfIterations, GSAConfiguration parameters) { GatherSumApplyIteration<K, VV, EV, M> iteration = GatherSumApplyIteration.withEdges( edges, gatherFunction, sumFunction, applyFunction, maximumNumberOfIterations); iteration.configure(parameters); DataSet<Vertex<K, VV>> newVertices = vertices.runOperation(iteration); return new Graph<>(newVertices, this.edges, this.context); }
/** * Runs a {@link VertexCentricIteration} on the graph with configuration options. * * @param computeFunction the vertex compute function * @param combiner an optional message combiner * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the {@link VertexCentricConfiguration} parameters * * @return the updated Graph after the vertex-centric iteration has converged or * after maximumNumberOfIterations. */ public <M> Graph<K, VV, EV> runVertexCentricIteration( ComputeFunction<K, VV, EV, M> computeFunction, MessageCombiner<K, M> combiner, int maximumNumberOfIterations, VertexCentricConfiguration parameters) { VertexCentricIteration<K, VV, EV, M> iteration = VertexCentricIteration.withEdges( edges, computeFunction, combiner, maximumNumberOfIterations); iteration.configure(parameters); DataSet<Vertex<K, VV>> newVertices = this.getVertices().runOperation(iteration); return new Graph<>(newVertices, this.edges, this.context); }
/** * Runs a ScatterGather iteration on the graph with configuration options. * * @param scatterFunction the scatter function * @param gatherFunction the gather function * @param maximumNumberOfIterations maximum number of iterations to perform * @param parameters the iteration configuration parameters * * @return the updated Graph after the scatter-gather iteration has converged or * after maximumNumberOfIterations. */ public <M> Graph<K, VV, EV> runScatterGatherIteration( ScatterFunction<K, VV, M, EV> scatterFunction, org.apache.flink.graph.spargel.GatherFunction<K, VV, M> gatherFunction, int maximumNumberOfIterations, ScatterGatherConfiguration parameters) { ScatterGatherIteration<K, VV, M, EV> iteration = ScatterGatherIteration.withEdges( edges, scatterFunction, gatherFunction, maximumNumberOfIterations); iteration.configure(parameters); DataSet<Vertex<K, VV>> newVertices = this.getVertices().runOperation(iteration); return new Graph<>(newVertices, this.edges, this.context); }
/** * Creates a graph from a DataSet of edges. * Vertices are created automatically and their values are set to * NullValue. * * @param edges a DataSet of edges. * @param context the flink execution environment. * @return the newly created graph. */ public static <K, EV> Graph<K, NullValue, EV> fromDataSet( DataSet<Edge<K, EV>> edges, ExecutionEnvironment context) { DataSet<Vertex<K, NullValue>> vertices = edges .flatMap(new EmitSrcAndTarget<>()) .name("Source and target IDs") .distinct() .name("IDs"); return new Graph<>(vertices, edges, context); }
/** * Apply a function to the attribute of each vertex in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public <NV> Graph<K, NV, EV> mapVertices(final MapFunction<Vertex<K, VV>, NV> mapper, TypeInformation<Vertex<K, NV>> returnType) { DataSet<Vertex<K, NV>> mappedVertices = vertices.map( new MapFunction<Vertex<K, VV>, Vertex<K, NV>>() { private Vertex<K, NV> output = new Vertex<>(); public Vertex<K, NV> map(Vertex<K, VV> value) throws Exception { output.f0 = value.f0; output.f1 = mapper.map(value); return output; } }) .returns(returnType) .withForwardedFields("f0") .name("Map vertices"); return new Graph<>(mappedVertices, this.edges, this.context); }
/** * Adds the list of vertices, passed as input, to the graph. * If the vertices already exist in the graph, they will not be added once more. * * @param verticesToAdd the list of vertices to add * @return the new graph containing the existing and newly added vertices */ public Graph<K, VV, EV> addVertices(List<Vertex<K, VV>> verticesToAdd) { // Add the vertices DataSet<Vertex<K, VV>> newVertices = this.vertices.coGroup(this.context.fromCollection(verticesToAdd)) .where(0).equalTo(0).with(new VerticesUnionCoGroup<>()).name("Add vertices"); return new Graph<>(newVertices, this.edges, this.context); }
/** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper, TypeInformation<Edge<K, NV>> returnType) { DataSet<Edge<K, NV>> mappedEdges = edges.map( new MapFunction<Edge<K, EV>, Edge<K, NV>>() { private Edge<K, NV> output = new Edge<>(); public Edge<K, NV> map(Edge<K, EV> value) throws Exception { output.f0 = value.f0; output.f1 = value.f1; output.f2 = mapper.map(value); return output; } }) .returns(returnType) .withForwardedFields("f0; f1") .name("Map edges"); return new Graph<>(this.vertices, mappedEdges, this.context); }
/** * Removes all the edges that match the edges in the given data set from the graph. * * @param edgesToBeRemoved the list of edges to be removed * @return a new graph where the edges have been removed and in which the vertices remained intact */ public Graph<K, VV, EV> removeEdges(List<Edge<K, EV>> edgesToBeRemoved) { DataSet<Edge<K, EV>> newEdges = getEdges().coGroup(this.context.fromCollection(edgesToBeRemoved)) .where(0, 1).equalTo(0, 1).with(new EdgeRemovalCoGroup<>()).name("Remove edges"); return new Graph<>(this.vertices, newEdges, context); }
/** * Joins the edge DataSet with an input DataSet on the composite key of both * source and target IDs and applies a user-defined transformation on the values * of the matched records. The first two fields of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first two fields of the Tuple3 are used as the composite join key * and the third field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple3 from the input DataSet. * @param <T> the type of the third field of the input Tuple3 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdges(DataSet<Tuple3<K, K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0, 1).equalTo(0, 1) .with(new ApplyCoGroupToEdgeValues<>(edgeJoinFunction)) .name("Join with edges"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The source ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param <T> the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdgesOnSource(DataSet<Tuple2<K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction)) .name("Join with edges on source"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The target ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param <T> the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdgesOnTarget(DataSet<Tuple2<K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(1).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction)) .name("Join with edges on target"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) { DataSet<Vertex<K, VV>> unionedVertices = graph .getVertices() .union(this.getVertices()) .name("Vertices") .distinct() .name("Vertices"); DataSet<Edge<K, EV>> unionedEdges = graph .getEdges() .union(this.getEdges()) .name("Edges"); return new Graph<>(unionedVertices, unionedEdges, this.context); }
/** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the vertices. * * @param vertexFilter the filter function for vertices. * @return the resulting sub-graph. */ public Graph<K, VV, EV> filterOnVertices(FilterFunction<Vertex<K, VV>> vertexFilter) { DataSet<Vertex<K, VV>> filteredVertices = this.vertices.filter(vertexFilter); DataSet<Edge<K, EV>> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge<>()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Filter on vertices"); return new Graph<>(filteredVertices, remainingEdges, this.context); }
/** * Apply filtering functions to the graph and return a sub-graph that * satisfies the predicates for both vertices and edges. * * @param vertexFilter the filter function for vertices. * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph<K, VV, EV> subgraph(FilterFunction<Vertex<K, VV>> vertexFilter, FilterFunction<Edge<K, EV>> edgeFilter) { DataSet<Vertex<K, VV>> filteredVertices = this.vertices.filter(vertexFilter); DataSet<Edge<K, EV>> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge<>()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Subgraph"); DataSet<Edge<K, EV>> filteredEdges = remainingEdges.filter(edgeFilter); return new Graph<>(filteredVertices, filteredEdges, this.context); }
/** * Removes the given list of vertices and its edges from the graph. * * @param verticesToBeRemoved the DataSet of vertices to be removed * @return the resulted graph containing the initial vertices and edges minus the vertices * and edges removed. */ private Graph<K, VV, EV> removeVertices(DataSet<Vertex<K, VV>> verticesToBeRemoved) { DataSet<Vertex<K, VV>> newVertices = getVertices().coGroup(verticesToBeRemoved).where(0).equalTo(0) .with(new VerticesRemovalCoGroup<>()).name("Remove vertices"); DataSet <Edge< K, EV>> newEdges = newVertices.join(getEdges()).where(0).equalTo(0) // if the edge source was removed, the edge will also be removed .with(new ProjectEdgeToBeRemoved<>()).name("Edges to be removed") // if the edge target was removed, the edge will also be removed .join(newVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Remove edges"); return new Graph<>(newVertices, newEdges, context); }