/** * Apply a filtering function to the graph and return a sub-graph that * satisfies the predicates only for the edges. * * @param edgeFilter the filter function for edges. * @return the resulting sub-graph. */ public Graph<K, VV, EV> filterOnEdges(FilterFunction<Edge<K, EV>> edgeFilter) { DataSet<Edge<K, EV>> filteredEdges = this.edges.filter(edgeFilter).name("Filter on edges"); return new Graph<>(this.vertices, filteredEdges, this.context); }
@Override public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input) throws Exception { // Edges DataSet<Edge<K, EV>> edges = input .getEdges() .filter(new RemoveSelfLoops<>()) .setParallelism(parallelism) .name("Remove self-loops") .distinct(0, 1) .setCombineHint(CombineHint.NONE) .setParallelism(parallelism) .name("Remove duplicate edges"); // Graph return Graph.fromDataSet(input.getVertices(), edges, input.getContext()); }
@Test public void testGivenName() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b")); strs.filter(new FilterFunction<String>() { private static final long serialVersionUID = 1L; @Override public boolean filter(String value) throws Exception { return value.equals("a"); } }).name("GivenName").output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); testForName("GivenName", plan); }
.sum(2) .filter(new CategoryFrequent()) .withBroadcastSet(categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY) .map(new LabelOnly()) .distinct(); .sum(2) .filter(new CategoryFrequent()) .withBroadcastSet(categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY) .map(new LabelOnly()) .distinct();
/** * Creates a new logical graph that contains only vertices and edges that * are contained in the starting graph but not in any other graph that is part * of the given collection. * * @param collection input collection * @return excluded graph */ @Override public LogicalGraph execute(GraphCollection collection) { DataSet<GradoopId> excludedGraphIds = collection.getGraphHeads() .filter(new ByDifferentId<GraphHead>(startId)) .map(new Id<GraphHead>()); DataSet<Vertex> vertices = collection.getVertices() .filter(new InGraph<Vertex>(startId)) .filter(new NotInGraphsBroadcast<Vertex>()) .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS); DataSet<Edge> edges = collection.getEdges() .filter(new InGraph<Edge>(startId)) .filter(new NotInGraphsBroadcast<Edge>()) .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS); return collection.getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges); }
@Override protected DataSet<ExpandEmbedding> iterate(DataSet<ExpandEmbedding> initialWorkingSet) { IterativeDataSet<ExpandEmbedding> iteration = initialWorkingSet .iterate(upperBound - 1) .name(getName()); DataSet<ExpandEmbedding> nextWorkingSet = iteration .filter(new FilterPreviousExpandEmbedding()) .name(getName() + " - FilterRecent") .join(candidateEdgeTuples, joinHint) .where(2).equalTo(0) .with(new MergeExpandEmbeddings( distinctVertexColumns, distinctEdgeColumns, closingColumn )) .name(getName() + " - Expansion"); DataSet<ExpandEmbedding> solutionSet = nextWorkingSet.union(iteration); return iteration.closeWith(solutionSet, nextWorkingSet); } }
private DataSet<DataInstance> loadDataSet(ExecutionEnvironment env){ if (attributes==null) this.loadHeader(env); DataSet<Attributes> attsDataSet = env.fromElements(attributes); DataSource<String> data = env.readTextFile(pathFileData); Configuration config = new Configuration(); config.setString(DataFlinkLoader.RELATION_NAME, this.relationName); return data .filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> !line.startsWith("@attribute")) .filter(line -> !line.startsWith("@relation")) .filter(line -> !line.startsWith("@data")) .map(new DataInstanceBuilder(isNormalize())) .withParameters(config) .withBroadcastSet(attsDataSet, DataFlinkLoader.ATTRIBUTES_NAME + "_" + this.relationName); }
.filter(x -> true).name("preFilter1"); DataSet<Tuple2<Long, Long>> s2 = u1 .filter(x -> true).name("preFilter2"); .filter(x -> true).name("postFilter1") .groupBy(0) .reduceGroup(new IdentityGroupReducer<>()).name("reducer1"); DataSet<Tuple2<Long, Long>> reduced2 = s1 .union(s2) .filter(x -> true).name("postFilter2") .groupBy(1) .reduceGroup(new IdentityGroupReducer<>()).name("reducer2");
@Test public void testDefaultName() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b")); // WARNING: The test will fail if this line is being moved down in the file (the line-number is hard-coded) strs.filter(new FilterFunction<String>() { private static final long serialVersionUID = 1L; @Override public boolean filter(String value) throws Exception { return value.equals("a"); } }).output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); testForName("Filter at testDefaultName(NamesTest.java:55)", plan); }
long edgesToChange = social.getEdges().filter(new ByLabel<>(toInvertLabel)).count(); long edgesAfter = invertedEdgeGraph.getEdges().count(); long oldEdgeCount = invertedEdgeGraph.getEdges().filter(new ByLabel<>(toInvertLabel)).count(); Assert.assertEquals(oldEdgeCount, 0); // no edges with the old label should exist long invertedEdgeCount = invertedEdgeGraph.getEdges().filter(new ByLabel<>(invertedLabel)).count(); Assert.assertEquals(edgesToChange, invertedEdgeCount); invertedEdgeGraph.getVertices() .filter(new Or<>(new ByLabel<>("Person"), new ByLabel<>("Tag"))) .output(new LocalCollectionOutputFormat<>(vertices));
.setParallelism(1) .filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> line.startsWith("@relation")) .first(1) .collect(); }catch (Exception ex){ .setParallelism(1) .filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> line.startsWith("@attribute")) .collect();
@Test public void testIfContextIsSet() throws Exception { DataSet<Integer> elements = getExecutionEnvironment().fromElements(1, 2, 3, 4, 5); TestRichCombinableFilters filter1 = new TestRichCombinableFilters(); TestRichCombinableFilters filter2 = new TestRichCombinableFilters(); Configuration configuration = new Configuration(); configuration.setInteger(TestRichCombinableFilters.KEY, 2); List<Integer> result = new ArrayList<>(); elements.filter(new And<>(filter1, filter2)) .withParameters(configuration) .output(new LocalCollectionOutputFormat<>(result)); getExecutionEnvironment().execute(); assertEquals(1, result.size()); assertEquals(2, result.get(0).intValue()); }
/** * Function that verifies whether the edge to be removed is part of the SSSP or not. * If it is, the src vertex will be invalidated. * * @param edgeToBeRemoved * @param edgesInSSSP * @return true or false */ public static boolean isInSSSP(final Edge<Long, Double> edgeToBeRemoved, DataSet<Edge<Long, Double>> edgesInSSSP) throws Exception { return edgesInSSSP.filter(new FilterFunction<Edge<Long, Double>>() { @Override public boolean filter(Edge<Long, Double> edge) throws Exception { return edge.equals(edgeToBeRemoved); } }).count() > 0; }
public static void main(String[] args) throws Exception { // parse parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // obtain an execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read messageId, sender, and reply-to fields from the input data set DataSet<Tuple3<String, String, String>> mails = env.readCsvFile(input) .lineDelimiter(MBoxParser.MAIL_RECORD_DELIM) .fieldDelimiter(MBoxParser.MAIL_FIELD_DELIM) // messageId at position 0, sender at 2, reply-to at 5 .includeFields("101001") .types(String.class, String.class, String.class); // extract email addresses and filter out mails from bots DataSet<Tuple3<String, String, String>> addressMails = mails .map(new EmailExtractor()) .filter(new ExcludeEmailFilter("git@git.apache.org")) .filter(new ExcludeEmailFilter("jira@apache.org")); // construct reply connections by joining on messageId and reply-To DataSet<Tuple2<String, String>> replyConnections = addressMails .join(addressMails).where(2).equalTo(0).projectFirst(1).projectSecond(1); // count reply connections for each pair of email addresses replyConnections .groupBy(0, 1).reduceGroup(new ConnectionCounter()) .print(); }