How to use
filter
method
in
org.apache.flink.api.java.operators.FilterOperator

Best Java code snippets using org.apache.flink.api.java.operators.FilterOperator.filter (Showing top 6 results out of 315)

private DataSet<DataInstance> loadDataSet(ExecutionEnvironment env){
  if (attributes==null)
    this.loadHeader(env);
  DataSet<Attributes> attsDataSet = env.fromElements(attributes);
  DataSource<String> data = env.readTextFile(pathFileData);
  Configuration config = new Configuration();
  config.setString(DataFlinkLoader.RELATION_NAME, this.relationName);
  return  data
      .filter(w -> !w.isEmpty())
      .filter(w -> !w.startsWith("%"))
      .filter(line -> !line.startsWith("@attribute"))
      .filter(line -> !line.startsWith("@relation"))
      .filter(line -> !line.startsWith("@data"))
      .map(new DataInstanceBuilder(isNormalize()))
      .withParameters(config)
      .withBroadcastSet(attsDataSet, DataFlinkLoader.ATTRIBUTES_NAME + "_" + this.relationName);
}

.setParallelism(1)
.filter(w -> !w.isEmpty())
.filter(w -> !w.startsWith("%"))
.filter(line -> line.startsWith("@relation"))
.first(1)
.collect();
.setParallelism(1)
.filter(w -> !w.isEmpty())
.filter(w -> !w.startsWith("%"))
.filter(line -> line.startsWith("@attribute"))
.collect();

.setParallelism(1)
.filter(w -> !w.isEmpty())
.filter(w -> !w.startsWith("%"))
.filter(line -> line.startsWith("@attribute"))
.collect();

/**
 * Creates a new logical graph that contains only vertices and edges that
 * are contained in the starting graph but not in any other graph that is part
 * of the given collection.
 *
 * @param collection input collection
 * @return excluded graph
 */
@Override
public LogicalGraph execute(GraphCollection collection) {
 DataSet<GradoopId> excludedGraphIds = collection.getGraphHeads()
  .filter(new ByDifferentId<GraphHead>(startId))
  .map(new Id<GraphHead>());
 DataSet<Vertex> vertices = collection.getVertices()
  .filter(new InGraph<Vertex>(startId))
  .filter(new NotInGraphsBroadcast<Vertex>())
  .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);
 DataSet<Edge> edges = collection.getEdges()
  .filter(new InGraph<Edge>(startId))
  .filter(new NotInGraphsBroadcast<Edge>())
  .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);
 return collection.getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges);
}

 /**
  * Creates a new logical graph that contains only vertices and edges that
  * are contained in the starting graph but not in any other graph that is part
  * of the given collection.
  *
  * @param collection input collection
  * @return excluded graph
  */
 @Override
 public LogicalGraph execute(GraphCollection collection) {
  DataSet<GradoopId> excludedGraphIds = collection.getGraphHeads()
   .filter(new ByDifferentId<GraphHead>(startId))
   .map(new Id<GraphHead>());

  DataSet<Vertex> vertices = collection.getVertices()
   .filter(new InGraph<Vertex>(startId))
   .filter(new NotInGraphsBroadcast<Vertex>())
   .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);

  DataSet<Edge> edges = collection.getEdges()
   .filter(new InGraph<Edge>(startId))
   .filter(new NotInGraphsBroadcast<Edge>())
   .withBroadcastSet(excludedGraphIds, NotInGraphsBroadcast.GRAPH_IDS);

  return collection.getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges);
 }
}

public static void main(String[] args) throws Exception {
  // parse parameters
  ParameterTool params = ParameterTool.fromArgs(args);
  String input = params.getRequired("input");
  // obtain an execution environment
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // read messageId, sender, and reply-to fields from the input data set
  DataSet<Tuple3<String, String, String>> mails =
      env.readCsvFile(input)
          .lineDelimiter(MBoxParser.MAIL_RECORD_DELIM)
          .fieldDelimiter(MBoxParser.MAIL_FIELD_DELIM)
          // messageId at position 0, sender at 2, reply-to at 5
          .includeFields("101001")
          .types(String.class, String.class, String.class);
  // extract email addresses and filter out mails from bots
  DataSet<Tuple3<String, String, String>> addressMails = mails
      .map(new EmailExtractor())
      .filter(new ExcludeEmailFilter("git@git.apache.org"))
      .filter(new ExcludeEmailFilter("jira@apache.org"));
  // construct reply connections by joining on messageId and reply-To
  DataSet<Tuple2<String, String>> replyConnections = addressMails
      .join(addressMails).where(2).equalTo(0).projectFirst(1).projectSecond(1);
  // count reply connections for each pair of email addresses
  replyConnections
      .groupBy(0, 1).reduceGroup(new ConnectionCounter())
      .print();
}

How to use filtermethodin org.apache.flink.api.java.operators.FilterOperator

Best Java code snippets using org.apache.flink.api.java.operators.FilterOperator.filter (Showing top 6 results out of 315)

How to use
filter
method
in
org.apache.flink.api.java.operators.FilterOperator