org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate.equalTo java code examples

Refine search

@Test(expected = InvalidProgramException.class)
public void testJoinKeyInvalidAtomic5() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<ArrayList<Integer>> ds1 = env.fromElements(new ArrayList<Integer>());
  DataSet<Integer> ds2 = env.fromElements(0, 0, 0);
  ds1.join(ds2).where("*").equalTo("*");
}

/**
 * Convert a bipartite graph into an undirected graph that contains only bottom vertices. An edge between two
 * vertices in the new graph will exist only if the original bipartite graph contains a top vertex they are both
 * connected to.
 *
 * <p>The simple projection performs a single join and returns edges containing the bipartite edge values.
 *
 * <p>Note: KB must override .equals(). This requirement may be removed in a future release.
 *
 * @return simple bottom projection of the bipartite graph
 */
public Graph<KB, VVB, Tuple2<EV, EV>> projectionBottomSimple() {
  DataSet<Edge<KB, Tuple2<EV, EV>>> newEdges =  edges.join(edges)
    .where(0)
    .equalTo(0)
    .with(new ProjectionBottomSimple<>())
    .name("Simple bottom projection");
  return Graph.fromDataSet(bottomVertices, newEdges, context);
}

@Test
public void testJoinWithHuge() throws Exception {
  /*
   * Join with Huge
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
  DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
  DataSet<Tuple2<String, String>> joinDs = ds1.joinWithHuge(ds2)
      .where(1)
      .equalTo(1)
      .with(new T3T5FlatJoin());
  List<Tuple2<String, String>> result = joinDs.collect();
  String expected = "Hi,Hallo\n" +
      "Hello,Hallo Welt\n" +
      "Hello world,Hallo Welt\n";
  compareResultAsTuples(result, expected);
}

@Test
public void reuseSinglePartitioningJoin2() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .partitionByHash(0,1)
      .map(new MockMapper()).withForwardedFields("0;1")
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(2,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, String>> ds = env.createInput(new LargeJoinDataGeneratorInputFormat(1000000));
  ds.join(ds).where(0).equalTo(1).with(new Joiner()).writeAsText(resultPath);
  env.execute("Local Selfjoin Test Job");
}

public static void connectedComponentsWithCoGroup(String[] args) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(Integer.parseInt(args[0]));
  DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(args[1]).types(Long.class).name(VERTEX_SOURCE);
  DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(args[2]).types(Long.class, Long.class).name(EDGES_SOURCE);
  DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.flatMap(new DummyMapFunction());
  DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
      verticesWithId.iterateDelta(verticesWithId, Integer.parseInt(args[4]), 0).name(ITERATION_NAME);
  DataSet<Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges)
      .where(0).equalTo(0)
      .with(new DummyJoinFunction()).name(JOIN_NEIGHBORS_MATCH);
  DataSet<Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet())
      .where(0).equalTo(0)
      .with(new DummyCoGroupFunction()).name(MIN_ID_AND_UPDATE);
  iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(args[3]).name(SINK);
  env.execute();
}

public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {
  
  // open a bulk iteration
  IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);
  
  DataSet<Tuple2<Long, Long>> changes = iteration
      .join(edges).where(0).equalTo(0).with(new Join222())
      .groupBy(0).aggregate(Aggregations.MIN, 1)
      .join(iteration).where(0).equalTo(0)
      .flatMap(new FlatMapJoin());
  
  // close the bulk iteration
  return iteration.closeWith(changes);
}

public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {
  DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);
  DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
      .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
      .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
      .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);
  DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);
  DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);
  return result;
}

@Test(expected = InvalidProgramException.class)
public void testJoinKeyInvalidAtomic6() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Integer> ds1 = env.fromElements(0, 0, 0);
  DataSet<ArrayList<Integer>> ds2 = env.fromElements(new ArrayList<Integer>());
  ds1.join(ds2).where("*").equalTo("*");
}

@Test
public void reuseSinglePartitioningJoin4() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .partitionByHash(0)
      .map(new MockMapper()).withForwardedFields("0")
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(2,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

private void executeTask(JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner, boolean slow, int parallelism) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));
  DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));
  input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
      .where(0)
      .equalTo(0)
      .with(joiner)
      .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
  env.setParallelism(parallelism);
  runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}

/**
 * Convert a bipartite graph into an undirected graph that contains only top vertices. An edge between two vertices
 * in the new graph will exist only if the original bipartite graph contains a bottom vertex they are both
 * connected to.
 *
 * <p>The simple projection performs a single join and returns edges containing the bipartite edge values.
 *
 * <p>Note: KT must override .equals(). This requirement may be removed in a future release.
 *
 * @return simple top projection of the bipartite graph
 */
public Graph<KT, VVT, Tuple2<EV, EV>> projectionTopSimple() {
  DataSet<Edge<KT, Tuple2<EV, EV>>> newEdges = edges.join(edges)
    .where(1)
    .equalTo(1)
    .with(new ProjectionTopSimple<>())
      .name("Simple top projection");
  return Graph.fromDataSet(topVertices, newEdges, context);
}

private Plan getTestPlanRightStatic(String strategy) {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  
  DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
  
  DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
  
  IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
  
  Configuration joinStrategy = new Configuration();
  joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);
  
  if(!strategy.equals("")) {
    joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
  }
  
  DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);
  DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
  
  output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
  
  return env.createProgramPlan();
  
}

@Test
public void testJoinKeyNestedTuples() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Tuple2<Integer, String>, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyNestedTupleData, nestedTupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  try {
    ds1.join(ds2).where("f0.f0").equalTo(4);
  } catch (Exception e) {
    e.printStackTrace();
    Assert.fail();
  }
}

@Test
public void reuseSinglePartitioningJoin1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .partitionByHash(0,1)
      .map(new MockMapper()).withForwardedFields("0;1")
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(0,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

private void executeTaskWithGenerator(
    JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner,
    int keys, int vals, int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
      .where(0)
      .equalTo(0)
      .with(joiner)
      .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
  env.setParallelism(PARALLELISM);
  runAndCancelJob(env.createProgramPlan(), msecsTillCanceling, maxTimeTillCanceled);
}

/**
 * Convert a bipartite graph into a graph that contains only bottom vertices. An edge between two vertices in the
 * new graph will exist only if the original bipartite graph contains at least one top vertex they both connect to.
 *
 * <p>The full projection performs three joins and returns edges containing the connecting vertex ID and value,
 * both bottom vertex values, and both bipartite edge values.
 *
 * <p>Note: KB must override .equals(). This requirement may be removed in a future release.
 *
 * @return full bottom projection of the bipartite graph
 */
public Graph<KB, VVB, Projection<KT, VVT, VVB, EV>> projectionBottomFull() {
  DataSet<Tuple5<KT, KB, EV, VVT, VVB>> edgesWithVertices    = joinEdgeWithVertices();
  DataSet<Edge<KB, Projection<KT, VVT, VVB, EV>>> newEdges = edgesWithVertices.join(edgesWithVertices)
    .where(0)
    .equalTo(0)
    .with(new ProjectionBottomFull<>())
      .name("Full bottom projection");
  return Graph.fromDataSet(bottomVertices, newEdges, context);
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Long> data1 = env.generateSequence(1, 100);
  DataSet<Long> data2 = env.generateSequence(1, 100);
  IterativeDataSet<Long> firstIteration = data1.iterate(100);
  DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdMapper()));
  IterativeDataSet<Long> mainIteration = data2.map(new IdMapper()).iterate(100);
  DataSet<Long> joined = mainIteration.join(firstResult)
      .where(new IdKeyExtractor()).equalTo(new IdKeyExtractor())
      .with(new Joiner());
  DataSet<Long> mainResult = mainIteration.closeWith(joined);
  mainResult.output(new DiscardingOutputFormat<Long>());
  env.execute();
}

@Test
public void testJoinKeyWithCustomContainingTuple0() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CustomTypeWithTuple> ds1 = env.fromCollection(customTypeWithTupleData);
  DataSet<CustomTypeWithTuple> ds2 = env.fromCollection(customTypeWithTupleData);
  try {
    ds1.join(ds2).where("intByString.f0").equalTo("myInt");
  } catch (Exception e) {
    e.printStackTrace();
    Assert.fail();
  }
}

public static void tcph3(String[] args) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(Integer.parseInt(args[0]));
  //order id, order status, order data, order prio, ship prio
  DataSet<Tuple5<Long, String, String, String, Integer>> orders =
      env.readCsvFile(args[1])
      .fieldDelimiter("|").lineDelimiter("\n")
      .includeFields("101011001").types(Long.class, String.class, String.class, String.class, Integer.class)
      .name(ORDERS);
  //order id, extended price
  DataSet<Tuple2<Long, Double>> lineItems =
      env.readCsvFile(args[2])
      .fieldDelimiter("|").lineDelimiter("\n")
      .includeFields("100001").types(Long.class, Double.class)
      .name(LINEITEM);
  DataSet<Tuple2<Long, Integer>> filterO = orders.flatMap(new FilterO()).name(MAPPER_NAME);
  DataSet<Tuple3<Long, Integer, Double>> joinLiO = filterO.join(lineItems).where(0).equalTo(0).with(new JoinLiO()).name(JOIN_NAME);
  DataSet<Tuple3<Long, Integer, Double>> aggLiO = joinLiO.groupBy(0, 1).reduceGroup(new AggLiO()).name(REDUCE_NAME);
  aggLiO.writeAsCsv(args[3], "\n", "|").name(SINK);
  env.execute();
}

Popular methods of JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate

<init>

Popular in Java

Creating JSON documents from java classes using gson
getContentResolver (Context)
scheduleAtFixedRate (Timer)
getSharedPreferences (Context)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
ImageIO (javax.imageio)
Best plugins for Eclipse

How to use equalTomethodin org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate

Best Java code snippets using org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate.equalTo (Showing top 20 results out of 315)

Refine search

How to use
equalTo
method
in
org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate