org.apache.flink.api.java.operators.FilterOperator java code examples

  .map(new VertexGroupItemToSummarizedVertexMapper<>());
.map(new VertexGroupItemToVertexWithRepresentativeMapper<>());

/**
 * Apply a filtering function to the graph and return a sub-graph that
 * satisfies the predicates only for the edges.
 *
 * @param edgeFilter the filter function for edges.
 * @return the resulting sub-graph.
 */
public Graph<K, VV, EV> filterOnEdges(FilterFunction<Edge<K, EV>> edgeFilter) {
  DataSet<Edge<K, EV>> filteredEdges = this.edges.filter(edgeFilter).name("Filter on edges");
  return new Graph<>(this.vertices, filteredEdges, this.context);
}

.project(0, 1);
.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))

@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
    throws Exception {
  // Edges
  DataSet<Edge<K, EV>> edges = input
    .getEdges()
    .filter(new RemoveSelfLoops<>())
      .setParallelism(parallelism)
      .name("Remove self-loops")
    .distinct(0, 1)
      .setCombineHint(CombineHint.NONE)
      .setParallelism(parallelism)
      .name("Remove duplicate edges");
  // Graph
  return Graph.fromDataSet(input.getVertices(), edges, input.getContext());
}

@Test
public void testGivenName() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b"));
  strs.filter(new FilterFunction<String>() {
    private static final long serialVersionUID = 1L;
    @Override
    public boolean filter(String value) throws Exception {
      return value.equals("a");
    }
  }).name("GivenName").output(new DiscardingOutputFormat<String>());
  Plan plan = env.createProgramPlan();
  testForName("GivenName", plan);
}

.sum(2)
.filter(new CategoryFrequent())
.withBroadcastSet(categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY)
.map(new LabelOnly())
.distinct();
.sum(2)
.filter(new CategoryFrequent())
.withBroadcastSet(categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY)
.map(new LabelOnly())
.distinct();

@Override
public DataSet<Embedding> evaluate() {
 return input
  .filter(new FilterVertex(predicates))
  .name(getName())
  .map(new ProjectVertex(projectionPropertyKeys))
  .name(getName());
}

 .map(new CCSWrapInSubgraphEmbeddings());
.union(resultIncrement)
.union(childEmbeddings);
.closeWith(resultAndEmbeddings, childEmbeddings)
.filter(new IsResult<>(true))
.map(new CCSSubgraphOnly());

/**
 * Creates a new logical graph that contains only vertices and edges that
 * are contained in the starting graph but not in any other graph that is part
 * of the given collection.
 *
 * @param collection input collection
 * @return excluded graph
 */
@Override
public LogicalGraph execute(GraphCollection collection) {
 DataSet<GradoopId> excludedGraphIds = collection.getGraphHeads()
  .filter(new ByDifferentId<GraphHead>(startId))
  .map(new Id<GraphHead>());
 DataSet<Vertex> vertices = collection.getVertices()
  .filter(new InGraph<Vertex>(startId))
  .filter(new NotInGraphsBroadcast<Vertex>())
  .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);
 DataSet<Edge> edges = collection.getEdges()
  .filter(new InGraph<Edge>(startId))
  .filter(new NotInGraphsBroadcast<Edge>())
  .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);
 return collection.getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges);
}

 @Override
 protected DataSet<ExpandEmbedding> iterate(DataSet<ExpandEmbedding> initialWorkingSet) {

  IterativeDataSet<ExpandEmbedding> iteration = initialWorkingSet
   .iterate(upperBound - 1)
   .name(getName());

  DataSet<ExpandEmbedding> nextWorkingSet = iteration
   .filter(new FilterPreviousExpandEmbedding())
   .name(getName() + " - FilterRecent")
   .join(candidateEdgeTuples, joinHint)
    .where(2).equalTo(0)
    .with(new MergeExpandEmbeddings(
     distinctVertexColumns,
     distinctEdgeColumns,
     closingColumn
    ))
   .name(getName() + " - Expansion");

  DataSet<ExpandEmbedding> solutionSet = nextWorkingSet.union(iteration);

  return iteration.closeWith(solutionSet, nextWorkingSet);
 }
}

private DataSet<DataInstance> loadDataSet(ExecutionEnvironment env){
  if (attributes==null)
    this.loadHeader(env);
  DataSet<Attributes> attsDataSet = env.fromElements(attributes);
  DataSource<String> data = env.readTextFile(pathFileData);
  Configuration config = new Configuration();
  config.setString(DataFlinkLoader.RELATION_NAME, this.relationName);
  return  data
      .filter(w -> !w.isEmpty())
      .filter(w -> !w.startsWith("%"))
      .filter(line -> !line.startsWith("@attribute"))
      .filter(line -> !line.startsWith("@relation"))
      .filter(line -> !line.startsWith("@data"))
      .map(new DataInstanceBuilder(isNormalize()))
      .withParameters(config)
      .withBroadcastSet(attsDataSet, DataFlinkLoader.ATTRIBUTES_NAME + "_" + this.relationName);
}

  .filter(x -> true).name("preFilter1");
DataSet<Tuple2<Long, Long>> s2 = u1
  .filter(x -> true).name("preFilter2");
  .filter(x -> true).name("postFilter1")
  .groupBy(0)
  .reduceGroup(new IdentityGroupReducer<>()).name("reducer1");
DataSet<Tuple2<Long, Long>> reduced2 = s1
  .union(s2)
  .filter(x -> true).name("postFilter2")
  .groupBy(1)
  .reduceGroup(new IdentityGroupReducer<>()).name("reducer2");

.project(0);
.project(0);

@Test
public void testDefaultName() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b"));
  // WARNING: The test will fail if this line is being moved down in the file (the line-number is hard-coded)
  strs.filter(new FilterFunction<String>() {
    private static final long serialVersionUID = 1L;
    @Override
    public boolean filter(String value) throws Exception {
      return value.equals("a");
    }
  }).output(new DiscardingOutputFormat<String>());
  Plan plan = env.createProgramPlan();
  testForName("Filter at testDefaultName(NamesTest.java:55)", plan);
}

long edgesToChange = social.getEdges().filter(new ByLabel<>(toInvertLabel)).count();
long edgesAfter = invertedEdgeGraph.getEdges().count();
long oldEdgeCount = invertedEdgeGraph.getEdges().filter(new ByLabel<>(toInvertLabel)).count();
Assert.assertEquals(oldEdgeCount, 0); // no edges with the old label should exist
long invertedEdgeCount = invertedEdgeGraph.getEdges().filter(new ByLabel<>(invertedLabel)).count();
Assert.assertEquals(edgesToChange, invertedEdgeCount);
invertedEdgeGraph.getVertices()
 .filter(new Or<>(new ByLabel<>("Person"), new ByLabel<>("Tag")))
 .output(new LocalCollectionOutputFormat<>(vertices));

      .setParallelism(1)
      .filter(w -> !w.isEmpty())
      .filter(w -> !w.startsWith("%"))
      .filter(line -> line.startsWith("@relation"))
      .first(1)
      .collect();
}catch (Exception ex){
      .setParallelism(1)
      .filter(w -> !w.isEmpty())
      .filter(w -> !w.startsWith("%"))
      .filter(line -> line.startsWith("@attribute"))
      .collect();

.setParallelism(1)
.filter(w -> !w.isEmpty())
.filter(w -> !w.startsWith("%"))
.filter(line -> line.startsWith("@attribute"))
.collect();

@Test
public void testIfContextIsSet() throws Exception {
 DataSet<Integer> elements =
 getExecutionEnvironment().fromElements(1, 2, 3, 4, 5);
 TestRichCombinableFilters filter1 = new TestRichCombinableFilters();
 TestRichCombinableFilters filter2 = new TestRichCombinableFilters();
 Configuration configuration = new Configuration();
 configuration.setInteger(TestRichCombinableFilters.KEY, 2);
 List<Integer> result = new ArrayList<>();
 elements.filter(new And<>(filter1, filter2))
  .withParameters(configuration)
  .output(new LocalCollectionOutputFormat<>(result));
 getExecutionEnvironment().execute();
 assertEquals(1, result.size());
 assertEquals(2, result.get(0).intValue());
}

/**
 * Function that verifies whether the edge to be removed is part of the SSSP or not.
 * If it is, the src vertex will be invalidated.
 *
 * @param edgeToBeRemoved
 * @param edgesInSSSP
 * @return true or false
 */
public static boolean isInSSSP(final Edge<Long, Double> edgeToBeRemoved, DataSet<Edge<Long, Double>> edgesInSSSP) throws Exception {
  return edgesInSSSP.filter(new FilterFunction<Edge<Long, Double>>() {
    @Override
    public boolean filter(Edge<Long, Double> edge) throws Exception {
      return edge.equals(edgeToBeRemoved);
    }
  }).count() > 0;
}

public static void main(String[] args) throws Exception {
  // parse parameters
  ParameterTool params = ParameterTool.fromArgs(args);
  String input = params.getRequired("input");
  // obtain an execution environment
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // read messageId, sender, and reply-to fields from the input data set
  DataSet<Tuple3<String, String, String>> mails =
      env.readCsvFile(input)
          .lineDelimiter(MBoxParser.MAIL_RECORD_DELIM)
          .fieldDelimiter(MBoxParser.MAIL_FIELD_DELIM)
          // messageId at position 0, sender at 2, reply-to at 5
          .includeFields("101001")
          .types(String.class, String.class, String.class);
  // extract email addresses and filter out mails from bots
  DataSet<Tuple3<String, String, String>> addressMails = mails
      .map(new EmailExtractor())
      .filter(new ExcludeEmailFilter("git@git.apache.org"))
      .filter(new ExcludeEmailFilter("jira@apache.org"));
  // construct reply connections by joining on messageId and reply-To
  DataSet<Tuple2<String, String>> replyConnections = addressMails
      .join(addressMails).where(2).equalTo(0).projectFirst(1).projectSecond(1);
  // count reply connections for each pair of email addresses
  replyConnections
      .groupBy(0, 1).reduceGroup(new ConnectionCounter())
      .print();
}

Javadoc

This operator represents the application of a "filter" function on a data set, and the result data set produced by the function.

Most used methods

Popular in Java

Reactive rest calls using spring rest template
findViewById (Activity)
setRequestProperty (URLConnection)
getSharedPreferences (Context)
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
JOptionPane (javax.swing)
Best IntelliJ plugins

How to useFilterOperator in org.apache.flink.api.java.operators

Best Java code snippets using org.apache.flink.api.java.operators.FilterOperator (Showing top 20 results out of 315)

How to use
FilterOperator
in
org.apache.flink.api.java.operators