org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate java code examples

@Test
public void testJoinKeyMixedKeySelectorTurned() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
  DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
  try {
    ds1.join(ds2).where(new KeySelector<CustomType, Integer>() {
      @Override
      public Integer getKey(CustomType value) throws Exception {
        return value.myInt;
      }
    }).equalTo("myInt");
  } catch (Exception e) {
    e.printStackTrace();
    Assert.fail();
  }
}

private Plan getTestPlanLeftStatic(String strategy) {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L),
      new Tuple3<Long, Long, Long>(1L, 2L, 3L),new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Big");
  
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small");
  
  IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
  
  Configuration joinStrategy = new Configuration();
  joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
  
  DataSet<Tuple3<Long, Long, Long>> inner = smallInput.join(iteration).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);
  DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
  
  output.output(new DiscardingOutputFormat<Tuple3<Long,Long,Long>>());
  
  return env.createProgramPlan();
  
}

@Test
public void testJoinNestedPojoAgainstTupleSelectedUsingInteger() throws Exception {
  /*
   * Join nested pojo against tuple (selected as an integer)
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
  DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
  DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
      ds1.join(ds2).where("nestedPojo.longNumber").equalTo(6); // <--- difference!
  List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
  String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
      "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
      "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
  compareResultAsTuples(result, expected);
}

@Test
public void testJoinNestedPojoAgainstTupleSelectedUsingString() throws Exception {
  /*
   * Join nested pojo against tuple (selected using a string)
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
  DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
  DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
      ds1.join(ds2).where("nestedPojo.longNumber").equalTo("f6");
  List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();
  String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
      "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
      "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
  compareResultAsTuples(result, expected);
}

private Plan getTestPlanRightStatic(String strategy) {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  
  DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
  
  DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
  
  IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
  
  Configuration joinStrategy = new Configuration();
  joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);
  
  if(!strategy.equals("")) {
    joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
  }
  
  DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);
  DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
  
  output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
  
  return env.createProgramPlan();
  
}

private DataSet<Tuple5<KT, KB, EV, VVT, VVB>> joinEdgeWithVertices() {
  return edges
    .join(topVertices, JoinHint.REPARTITION_HASH_SECOND)
    .where(0)
    .equalTo(0)
    .projectFirst(0, 1, 2)
    .<Tuple4<KT, KB, EV, VVT>>projectSecond(1)
      .name("Edge with vertex")
    .join(bottomVertices, JoinHint.REPARTITION_HASH_SECOND)
    .where(1)
    .equalTo(0)
    .projectFirst(0, 1, 2, 3)
    .<Tuple5<KT, KB, EV, VVT, VVB>>projectSecond(1)
      .name("Edge with vertices");
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Long> data1 = env.generateSequence(1, 100);
  DataSet<Long> data2 = env.generateSequence(1, 100);
  IterativeDataSet<Long> firstIteration = data1.iterate(100);
  DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdMapper()));
  IterativeDataSet<Long> mainIteration = data2.map(new IdMapper()).iterate(100);
  DataSet<Long> joined = mainIteration.join(firstResult)
      .where(new IdKeyExtractor()).equalTo(new IdKeyExtractor())
      .with(new Joiner());
  DataSet<Long> mainResult = mainIteration.closeWith(joined);
  mainResult.output(new DiscardingOutputFormat<Long>());
  env.execute();
}

@Test
public void noPreviousPartitioningJoin1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
        .where(0,1).equalTo(0,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

@Test
public void testJoinProjection5() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectSecond(0, 2)
    .projectFirst(1, 4)
    .projectFirst(1);
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void testJoinProjection4() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectFirst(0, 2)
    .projectSecond(1, 4)
    .projectFirst(1);
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void noPreviousPartitioningJoin2() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(2,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

/**
 * Apply a filtering function to the graph and return a sub-graph that
 * satisfies the predicates only for the vertices.
 *
 * @param vertexFilter the filter function for vertices.
 * @return the resulting sub-graph.
 */
public Graph<K, VV, EV> filterOnVertices(FilterFunction<Vertex<K, VV>> vertexFilter) {
  DataSet<Vertex<K, VV>> filteredVertices = this.vertices.filter(vertexFilter);
  DataSet<Edge<K, EV>> remainingEdges = this.edges.join(filteredVertices)
      .where(0).equalTo(0).with(new ProjectEdge<>())
      .join(filteredVertices).where(1).equalTo(0)
      .with(new ProjectEdge<>()).name("Filter on vertices");
  return new Graph<>(filteredVertices, remainingEdges, this.context);
}

/**
 * This method allows access to the graph's edge values along with its source and target vertex values.
 *
 * @return a triplet DataSet consisting of (srcVertexId, trgVertexId, srcVertexValue, trgVertexValue, edgeValue)
 */
public DataSet<Triplet<K, VV, EV>> getTriplets() {
  return this.getVertices()
    .join(this.getEdges()).where(0).equalTo(0)
    .with(new ProjectEdgeWithSrcValue<>())
      .name("Project edge with source value")
    .join(this.getVertices()).where(1).equalTo(0)
    .with(new ProjectEdgeWithVertexValues<>())
      .name("Project edge with vertex values");
}

@Test
public void testJoinProjection7() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectSecond()
    .projectFirst(1, 4);
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void testJoinProjection3() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectFirst(0)
    .projectSecond(3);
  } catch (Exception e) {
    Assert.fail();
  }
}

@Test
public void testJoinProjection27() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectSecond()
    .projectFirst(1, 4);
  } catch (Exception e) {
    Assert.fail();
  }
}

private void executeTaskWithGenerator(
    JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner,
    int keys, int vals, int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
      .where(0)
      .equalTo(0)
      .with(joiner)
      .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
  env.setParallelism(PARALLELISM);
  runAndCancelJob(env.createProgramPlan(), msecsTillCanceling, maxTimeTillCanceled);
}

@Test(expected = InvalidProgramException.class)
public void testJoinKeyMixedWrong() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
  DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
  // wrongly mix String and Integer
  ds1.join(ds2).where("myString").equalTo(new KeySelector<CustomType, Integer>() {
    @Override
    public Integer getKey(CustomType value) throws Exception {
      return value.myInt;
    }
  });
}

@Test
public void testJoinKeyNestedTuplesWithCustom() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<CustomType, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyNestedCustomTupleData, nestedCustomTupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  try {
    TypeInformation<?> t = ds1.join(ds2).where("f0.myInt").equalTo(4).getType();
    assertTrue("not a composite type", t instanceof CompositeType);
  } catch (Exception e) {
    e.printStackTrace();
    Assert.fail();
  }
}

@Test
public void testJoinProjection2() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  // should work
  try {
    ds1.join(ds2).where(0).equalTo(0)
    .projectFirst(0, 3);
  } catch (Exception e) {
    Assert.fail();
  }
}

Most used methods

Popular in Java

Making http post requests using okhttp
getSystemService (Context)
getSupportFragmentManager (FragmentActivity)
setContentView (Activity)
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
IsNull (org.hamcrest.core)
Is the value null?
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top Vim plugins

How to useJoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate in org.apache.flink.api.java.operators

Best Java code snippets using org.apache.flink.api.java.operators.JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate (Showing top 20 results out of 315)

How to use
JoinOperator$JoinOperatorSets$JoinOperatorSetsPredicate
in
org.apache.flink.api.java.operators