private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) { // configure coGroup update function with name and broadcast variables updates = updates.name("Vertex State Updates"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) { updates = updates.withBroadcastSet(e.f1, e.f0); } } // let the operator know that we preserve the key field updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0"); } }
/** * Return the out-degree of all vertices in the graph. * * @return A DataSet of {@code Tuple2<vertexId, outDegree>} */ public DataSet<Tuple2<K, LongValue>> outDegrees() { return vertices.coGroup(edges).where(0).equalTo(0).with(new CountNeighborsCoGroup<>()) .name("Out-degree"); }
/** * Return the in-degree of all vertices in the graph. * * @return A DataSet of {@code Tuple2<vertexId, inDegree>} */ public DataSet<Tuple2<K, LongValue>> inDegrees() { return vertices.coGroup(edges).where(0).equalTo(1).with(new CountNeighborsCoGroup<>()) .name("In-degree"); }
/** * Computes the intersection between the edge set and the given edge set. For all matching pairs, both edges will be * in the resulting data set. * * @param edges edges to compute intersection with * @return edge set containing both edges from all matching pairs of the same edge */ private DataSet<Edge<K, EV>> getPairwiseEdgeIntersection(DataSet<Edge<K, EV>> edges) { return this.getEdges() .coGroup(edges) .where(0, 1, 2) .equalTo(0, 1, 2) .with(new MatchingEdgeReducer<>()) .name("Intersect edges"); }
/** * Adds the list of vertices, passed as input, to the graph. * If the vertices already exist in the graph, they will not be added once more. * * @param verticesToAdd the list of vertices to add * @return the new graph containing the existing and newly added vertices */ public Graph<K, VV, EV> addVertices(List<Vertex<K, VV>> verticesToAdd) { // Add the vertices DataSet<Vertex<K, VV>> newVertices = this.vertices.coGroup(this.context.fromCollection(verticesToAdd)) .where(0).equalTo(0).with(new VerticesUnionCoGroup<>()).name("Add vertices"); return new Graph<>(newVertices, this.edges, this.context); }
case IN: return vertices.coGroup(edges).where(0).equalTo(1) .with(new ApplyCoGroupFunction<>(edgesFunction)).name("GroupReduce on in-edges"); case OUT: return vertices.coGroup(edges).where(0).equalTo(0) .with(new ApplyCoGroupFunction<>(edgesFunction)).name("GroupReduce on out-edges"); case ALL: return vertices.coGroup(edges.flatMap(new EmitOneEdgePerNode<>()) .name("Emit edge")) .where(0).equalTo(0).with(new ApplyCoGroupFunctionOnAllEdges<>(edgesFunction)) .name("GroupReduce on in- and out-edges"); default: throw new IllegalArgumentException("Illegal edge direction");
/** * Removes all the edges that match the edges in the given data set from the graph. * * @param edgesToBeRemoved the list of edges to be removed * @return a new graph where the edges have been removed and in which the vertices remained intact */ public Graph<K, VV, EV> removeEdges(List<Edge<K, EV>> edgesToBeRemoved) { DataSet<Edge<K, EV>> newEdges = getEdges().coGroup(this.context.fromCollection(edgesToBeRemoved)) .where(0, 1).equalTo(0, 1).with(new EdgeRemovalCoGroup<>()).name("Remove edges"); return new Graph<>(this.vertices, newEdges, context); }
/** * Joins the edge DataSet with an input DataSet on the composite key of both * source and target IDs and applies a user-defined transformation on the values * of the matched records. The first two fields of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first two fields of the Tuple3 are used as the composite join key * and the third field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple3 from the input DataSet. * @param <T> the type of the third field of the input Tuple3 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdges(DataSet<Tuple3<K, K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0, 1).equalTo(0, 1) .with(new ApplyCoGroupToEdgeValues<>(edgeJoinFunction)) .name("Join with edges"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The source ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param <T> the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdgesOnSource(DataSet<Tuple2<K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(0).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction)) .name("Join with edges on source"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Joins the edge DataSet with an input Tuple2 DataSet and applies a user-defined transformation * on the values of the matched records. * The target ID of the edges input and the first field of the input DataSet are used as join keys. * * @param inputDataSet the DataSet to join with. * The first field of the Tuple2 is used as the join key * and the second field is passed as a parameter to the transformation function. * @param edgeJoinFunction the transformation function to apply. * The first parameter is the current edge value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @param <T> the type of the second field of the input Tuple2 DataSet. * @return a new Graph, where the edge values have been updated according to the * result of the edgeJoinFunction. */ public <T> Graph<K, VV, EV> joinWithEdgesOnTarget(DataSet<Tuple2<K, T>> inputDataSet, final EdgeJoinFunction<EV, T> edgeJoinFunction) { DataSet<Edge<K, EV>> resultedEdges = this.getEdges() .coGroup(inputDataSet).where(1).equalTo(0) .with(new ApplyCoGroupToEdgeValuesOnEitherSourceOrTarget<>(edgeJoinFunction)) .name("Join with edges on target"); return new Graph<>(this.vertices, resultedEdges, this.context); }
/** * Joins the vertex DataSet of this graph with an input Tuple2 DataSet and applies * a user-defined transformation on the values of the matched records. * The vertex ID and the first field of the Tuple2 DataSet are used as the join keys. * * @param inputDataSet the Tuple2 DataSet to join with. * The first field of the Tuple2 is used as the join key and the second field is passed * as a parameter to the transformation function. * @param vertexJoinFunction the transformation function to apply. * The first parameter is the current vertex value and the second parameter is the value * of the matched Tuple2 from the input DataSet. * @return a new Graph, where the vertex values have been updated according to the * result of the vertexJoinFunction. * * @param <T> the type of the second field of the input Tuple2 DataSet. */ public <T> Graph<K, VV, EV> joinWithVertices(DataSet<Tuple2<K, T>> inputDataSet, final VertexJoinFunction<VV, T> vertexJoinFunction) { DataSet<Vertex<K, VV>> resultedVertices = this.getVertices() .coGroup(inputDataSet).where(0).equalTo(0) .with(new ApplyCoGroupToVertexValues<>(vertexJoinFunction)) .name("Join with vertices"); return new Graph<>(resultedVertices, this.edges, this.context); }
return vertices.coGroup(edges).where(0).equalTo(1) .with(new ApplyCoGroupFunction<>(edgesFunction)) .name("GroupReduce on in-edges").returns(typeInfo); case OUT: return vertices.coGroup(edges).where(0).equalTo(0) .with(new ApplyCoGroupFunction<>(edgesFunction)) .name("GroupReduce on out-edges").returns(typeInfo); case ALL: return vertices.coGroup(edges.flatMap(new EmitOneEdgePerNode<>()) .name("Emit edge")) .where(0).equalTo(0).with(new ApplyCoGroupFunctionOnAllEdges<>(edgesFunction)) .name("GroupReduce on in- and out-edges").returns(typeInfo); default: throw new IllegalArgumentException("Illegal edge direction");
messages = messages.name("Messaging");
messages = messages.name("Messaging"); if (this.configuration != null) { for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
return vertices.coGroup(edgesWithSources) .where(0).equalTo("f0.f1") .with(new ApplyNeighborCoGroupFunction<>(neighborsFunction)).name("Neighbors function"); case OUT: return vertices.coGroup(edgesWithTargets) .where(0).equalTo("f0.f0") .with(new ApplyNeighborCoGroupFunction<>(neighborsFunction)).name("Neighbors function"); case ALL: .with(new ApplyCoGroupFunctionOnAllNeighbors<>(neighborsFunction)).name("Neighbors function"); default: throw new IllegalArgumentException("Illegal edge direction");
/** * Removes the given list of vertices and its edges from the graph. * * @param verticesToBeRemoved the DataSet of vertices to be removed * @return the resulted graph containing the initial vertices and edges minus the vertices * and edges removed. */ private Graph<K, VV, EV> removeVertices(DataSet<Vertex<K, VV>> verticesToBeRemoved) { DataSet<Vertex<K, VV>> newVertices = getVertices().coGroup(verticesToBeRemoved).where(0).equalTo(0) .with(new VerticesRemovalCoGroup<>()).name("Remove vertices"); DataSet <Edge< K, EV>> newEdges = newVertices.join(getEdges()).where(0).equalTo(0) // if the edge source was removed, the edge will also be removed .with(new ProjectEdgeToBeRemoved<>()).name("Edges to be removed") // if the edge target was removed, the edge will also be removed .join(newVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Remove edges"); return new Graph<>(newVertices, newEdges, context); }
.with(new IdentityCoGrouper<Long>()).name("CoGroup 1"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 2"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 3"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 4"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 5"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 6"); .with(new IdentityCoGrouper<Long>()).name("CoGroup 7");
public static void connectedComponentsWithCoGroup(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(Integer.parseInt(args[0])); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(args[1]).types(Long.class).name(VERTEX_SOURCE); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(args[2]).types(Long.class, Long.class).name(EDGES_SOURCE); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.flatMap(new DummyMapFunction()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, Integer.parseInt(args[4]), 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new DummyJoinFunction()).name(JOIN_NEIGHBORS_MATCH); DataSet<Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new DummyCoGroupFunction()).name(MIN_ID_AND_UPDATE); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(args[3]).name(SINK); env.execute(); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices"); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges"); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception { return new Tuple2<>(value.f0, value.f0); } }).name("Assign Vertex Ids"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0); JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset() .join(edges).where(0).equalTo(0) .with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception { return new Tuple2<>(second.f1, first.f1); } }) .name("Join Candidate Id With Neighbor"); CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()) .name("min Id and Update"); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result"); env.execute("Workset Connected Components"); }
.sortFirstGroup(5, Order.DESCENDING) .sortSecondGroup(1, Order.DESCENDING).sortSecondGroup(4, Order.ASCENDING) .with(new IdentityCoGrouper<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("CoGroup") .output(new DiscardingOutputFormat<Tuple7<Long, Long, Long, Long, Long, Long, Long>>()).name("Sink");