public static void connectedComponentsWithCoGroup(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(Integer.parseInt(args[0])); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(args[1]).types(Long.class).name(VERTEX_SOURCE); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(args[2]).types(Long.class, Long.class).name(EDGES_SOURCE); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.flatMap(new DummyMapFunction()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, Integer.parseInt(args[4]), 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new DummyJoinFunction()).name(JOIN_NEIGHBORS_MATCH); DataSet<Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new DummyCoGroupFunction()).name(MIN_ID_AND_UPDATE); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(args[3]).name(SINK); env.execute(); }
DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");
.equalTo(0, 1) .with(new ProjectTriangles<>()) .name("Triangle listing");
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices"); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges"); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception { return new Tuple2<>(value.f0, value.f0); } }).name("Assign Vertex Ids"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0); JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset() .join(edges).where(0).equalTo(0) .with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception { return new Tuple2<>(second.f1, first.f1); } }) .name("Join Candidate Id With Neighbor"); CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()) .name("min Id and Update"); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result"); env.execute("Workset Connected Components"); }
.name(JOIN_WITH_SOLUTION_SET); if(joinPreservesSolutionSet) { ((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
/** * Convert a bipartite graph into a graph that contains only top vertices. An edge between two vertices in the new * graph will exist only if the original bipartite graph contains at least one bottom vertex they both connect to. * * <p>The full projection performs three joins and returns edges containing the connecting vertex ID and value, * both top vertex values, and both bipartite edge values. * * <p>Note: KT must override .equals(). This requirement may be removed in a future release. * * @return full top projection of the bipartite graph */ public Graph<KT, VVT, Projection<KB, VVB, VVT, EV>> projectionTopFull() { DataSet<Tuple5<KT, KB, EV, VVT, VVB>> edgesWithVertices = joinEdgeWithVertices(); DataSet<Edge<KT, Projection<KB, VVB, VVT, EV>>> newEdges = edgesWithVertices.join(edgesWithVertices) .where(1) .equalTo(1) .with(new ProjectionTopFull<>()) .name("Full top projection"); return Graph.fromDataSet(topVertices, newEdges, context); }
/** * Convert a bipartite graph into an undirected graph that contains only top vertices. An edge between two vertices * in the new graph will exist only if the original bipartite graph contains a bottom vertex they are both * connected to. * * <p>The simple projection performs a single join and returns edges containing the bipartite edge values. * * <p>Note: KT must override .equals(). This requirement may be removed in a future release. * * @return simple top projection of the bipartite graph */ public Graph<KT, VVT, Tuple2<EV, EV>> projectionTopSimple() { DataSet<Edge<KT, Tuple2<EV, EV>>> newEdges = edges.join(edges) .where(1) .equalTo(1) .with(new ProjectionTopSimple<>()) .name("Simple top projection"); return Graph.fromDataSet(topVertices, newEdges, context); }
public static void tcph3(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(Integer.parseInt(args[0])); //order id, order status, order data, order prio, ship prio DataSet<Tuple5<Long, String, String, String, Integer>> orders = env.readCsvFile(args[1]) .fieldDelimiter("|").lineDelimiter("\n") .includeFields("101011001").types(Long.class, String.class, String.class, String.class, Integer.class) .name(ORDERS); //order id, extended price DataSet<Tuple2<Long, Double>> lineItems = env.readCsvFile(args[2]) .fieldDelimiter("|").lineDelimiter("\n") .includeFields("100001").types(Long.class, Double.class) .name(LINEITEM); DataSet<Tuple2<Long, Integer>> filterO = orders.flatMap(new FilterO()).name(MAPPER_NAME); DataSet<Tuple3<Long, Integer, Double>> joinLiO = filterO.join(lineItems).where(0).equalTo(0).with(new JoinLiO()).name(JOIN_NAME); DataSet<Tuple3<Long, Integer, Double>> aggLiO = joinLiO.groupBy(0, 1).reduceGroup(new AggLiO()).name(REDUCE_NAME); aggLiO.writeAsCsv(args[3], "\n", "|").name(SINK); env.execute(); }
/** * Convert a bipartite graph into a graph that contains only bottom vertices. An edge between two vertices in the * new graph will exist only if the original bipartite graph contains at least one top vertex they both connect to. * * <p>The full projection performs three joins and returns edges containing the connecting vertex ID and value, * both bottom vertex values, and both bipartite edge values. * * <p>Note: KB must override .equals(). This requirement may be removed in a future release. * * @return full bottom projection of the bipartite graph */ public Graph<KB, VVB, Projection<KT, VVT, VVB, EV>> projectionBottomFull() { DataSet<Tuple5<KT, KB, EV, VVT, VVB>> edgesWithVertices = joinEdgeWithVertices(); DataSet<Edge<KB, Projection<KT, VVT, VVB, EV>>> newEdges = edgesWithVertices.join(edgesWithVertices) .where(0) .equalTo(0) .with(new ProjectionBottomFull<>()) .name("Full bottom projection"); return Graph.fromDataSet(bottomVertices, newEdges, context); }
/** * Apply filtering functions to the graph and return a sub-graph that * satisfies the predicates for both vertices and edges. * * @param vertexFilter the filter function for vertices. * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph<K, VV, EV> subgraph(FilterFunction<Vertex<K, VV>> vertexFilter, FilterFunction<Edge<K, EV>> edgeFilter) { DataSet<Vertex<K, VV>> filteredVertices = this.vertices.filter(vertexFilter); DataSet<Edge<K, EV>> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge<>()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Subgraph"); DataSet<Edge<K, EV>> filteredEdges = remainingEdges.filter(edgeFilter); return new Graph<>(filteredVertices, filteredEdges, this.context); }
private Plan getTestPlanLeftStatic(String strategy) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L), new Tuple3<Long, Long, Long>(1L, 2L, 3L),new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Big"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small"); IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10); Configuration joinStrategy = new Configuration(); joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy); DataSet<Tuple3<Long, Long, Long>> inner = smallInput.join(iteration).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy); DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner); output.output(new DiscardingOutputFormat<Tuple3<Long,Long,Long>>()); return env.createProgramPlan(); }
/** * Convert a bipartite graph into an undirected graph that contains only bottom vertices. An edge between two * vertices in the new graph will exist only if the original bipartite graph contains a top vertex they are both * connected to. * * <p>The simple projection performs a single join and returns edges containing the bipartite edge values. * * <p>Note: KB must override .equals(). This requirement may be removed in a future release. * * @return simple bottom projection of the bipartite graph */ public Graph<KB, VVB, Tuple2<EV, EV>> projectionBottomSimple() { DataSet<Edge<KB, Tuple2<EV, EV>>> newEdges = edges.join(edges) .where(0) .equalTo(0) .with(new ProjectionBottomSimple<>()) .name("Simple bottom projection"); return Graph.fromDataSet(bottomVertices, newEdges, context); }
/** * Adds the given list edges to the graph. * * <p>When adding an edge for a non-existing set of vertices, the edge is considered invalid and ignored. * * @param newEdges the data set of edges to be added * @return a new graph containing the existing edges plus the newly added edges. */ public Graph<K, VV, EV> addEdges(List<Edge<K, EV>> newEdges) { DataSet<Edge<K, EV>> newEdgesDataSet = this.context.fromCollection(newEdges); DataSet<Edge<K, EV>> validNewEdges = this.getVertices().join(newEdgesDataSet) .where(0).equalTo(0) .with(new JoinVerticesWithEdgesOnSrc<>()).name("Join with source") .join(this.getVertices()).where(1).equalTo(0) .with(new JoinWithVerticesOnTrg<>()).name("Join with target"); return Graph.fromDataSet(this.vertices, this.edges.union(validNewEdges), this.context); }
private Plan getTestPlanRightStatic(String strategy) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile"); DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile"); IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10); Configuration joinStrategy = new Configuration(); joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH); if(!strategy.equals("")) { joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy); } DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy); DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner); output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); return env.createProgramPlan(); }
/** * Removes the given list of vertices and its edges from the graph. * * @param verticesToBeRemoved the DataSet of vertices to be removed * @return the resulted graph containing the initial vertices and edges minus the vertices * and edges removed. */ private Graph<K, VV, EV> removeVertices(DataSet<Vertex<K, VV>> verticesToBeRemoved) { DataSet<Vertex<K, VV>> newVertices = getVertices().coGroup(verticesToBeRemoved).where(0).equalTo(0) .with(new VerticesRemovalCoGroup<>()).name("Remove vertices"); DataSet <Edge< K, EV>> newEdges = newVertices.join(getEdges()).where(0).equalTo(0) // if the edge source was removed, the edge will also be removed .with(new ProjectEdgeToBeRemoved<>()).name("Edges to be removed") // if the edge target was removed, the edge will also be removed .join(newVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Remove edges"); return new Graph<>(newVertices, newEdges, context); }
/** * This method allows access to the graph's edge values along with its source and target vertex values. * * @return a triplet DataSet consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue) */ public DataSet<Triplet<K, VV, EV>> getTriplets() { return this.getVertices() .join(this.getEdges()).where(0).equalTo(0) .with(new ProjectEdgeWithSrcValue<>()) .name("Project edge with source value") .join(this.getVertices()).where(1).equalTo(0) .with(new ProjectEdgeWithVertexValues<>()) .name("Project edge with vertex values"); }
/** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the vertices. * * @param vertexFilter the filter function for vertices. * @return the resulting sub-graph. */ public Graph<K, VV, EV> filterOnVertices(FilterFunction<Vertex<K, VV>> vertexFilter) { DataSet<Vertex<K, VV>> filteredVertices = this.vertices.filter(vertexFilter); DataSet<Edge<K, EV>> remainingEdges = this.edges.join(filteredVertices) .where(0).equalTo(0).with(new ProjectEdge<>()) .join(filteredVertices).where(1).equalTo(0) .with(new ProjectEdge<>()).name("Filter on vertices"); return new Graph<>(filteredVertices, remainingEdges, this.context); }
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }