/** * Initiates a Left Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of the <b>left</b> DataSet (i.e. {@code this}) that do not have a matching * element on the other side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> leftOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.LEFT_OUTER); }
private void testRightOuterStrategies(JoinHint hint) { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.rightOuterJoin(ds2, hint) .where(0).equalTo(4) .with(new DummyJoin()); }
private void testLeftOuterStrategies(JoinHint hint) { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.leftOuterJoin(ds2, hint) .where(0).equalTo(4) .with(new DummyJoin()); }
private void testFullOuterStrategies(JoinHint hint) { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.fullOuterJoin(ds2, hint) .where(0).equalTo(4) .with(new DummyJoin()); }
/** * Initiates a Right Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of the <b>right</b> DataSet (i.e. {@code other}) that do not have a matching * element on {@code this} side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> rightOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.RIGHT_OUTER); }
@Test public void testRightOuter1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.rightOuterJoin(ds2) .where(0).equalTo(4) .with(new DummyJoin()); }
/** * Initiates a Full Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of <b>both</b> DataSets that do not have a matching * element on the opposing side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> fullOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.FULL_OUTER); }
@Test(expected = IndexOutOfBoundsException.class) public void testFullOuter7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // invalid key position ds1.fullOuterJoin(ds2) .where(5).equalTo(0) .with(new DummyJoin()); }
case REPARTITION_HASH_SECOND: case BROADCAST_HASH_SECOND: return new JoinOperatorSetsBase<>(this, other, strategy, JoinType.LEFT_OUTER); default: throw new InvalidProgramException("Invalid JoinHint for LeftOuterJoin: " + strategy);
@Test public void testLeftOuter1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.leftOuterJoin(ds2) .where(0).equalTo(4) .with(new DummyJoin()); }
/** * Initiates a Full Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of <b>both</b> DataSets that do not have a matching * element on the opposing side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @param strategy The strategy that should be used execute the join. If {@code null} is given, then the * optimizer will pick the join strategy. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> fullOuterJoin(DataSet<R> other, JoinHint strategy) { switch(strategy) { case OPTIMIZER_CHOOSES: case REPARTITION_SORT_MERGE: case REPARTITION_HASH_FIRST: case REPARTITION_HASH_SECOND: return new JoinOperatorSetsBase<>(this, other, strategy, JoinType.FULL_OUTER); default: throw new InvalidProgramException("Invalid JoinHint for FullOuterJoin: " + strategy); } }
@Test public void testFullOuter2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.fullOuterJoin(ds2) .where("f1").equalTo("f3") .with(new DummyJoin()); }
case REPARTITION_HASH_SECOND: case BROADCAST_HASH_FIRST: return new JoinOperatorSetsBase<>(this, other, strategy, JoinType.RIGHT_OUTER); default: throw new InvalidProgramException("Invalid JoinHint for RightOuterJoin: " + strategy);
@Test(expected = CompositeType.InvalidFieldReferenceException.class) public void testFullOuter8() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // invalid key reference ds1.fullOuterJoin(ds2) .where(1).equalTo("f5") .with(new DummyJoin()); }
/** * Initiates a Right Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of the <b>right</b> DataSet (i.e. {@code other}) that do not have a matching * element on {@code this} side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> rightOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.RIGHT_OUTER); }
@Test public void testRightOuter2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.rightOuterJoin(ds2) .where("f1").equalTo("f3") .with(new DummyJoin()); }
/** * Initiates a Full Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of <b>both</b> DataSets that do not have a matching * element on the opposing side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> fullOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.FULL_OUTER); }
@Test public void testRightOuter6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.rightOuterJoin(ds2) .where("f0").equalTo(4) .with(new DummyJoin()); }
/** * Initiates a Left Outer Join transformation. * * <p>An Outer Join transformation joins two elements of two * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet. * * <p>Elements of the <b>left</b> DataSet (i.e. {@code this}) that do not have a matching * element on the other side are joined with {@code null} and emitted to the * resulting DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. * * @see org.apache.flink.api.java.operators.join.JoinOperatorSetsBase * @see DataSet */ public <R> JoinOperatorSetsBase<T, R> leftOuterJoin(DataSet<R> other) { return new JoinOperatorSetsBase<>(this, other, JoinHint.OPTIMIZER_CHOOSES, JoinType.LEFT_OUTER); }
@Test(expected = CompositeType.InvalidFieldReferenceException.class) public void testRightOuter8() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // invalid key reference ds1.rightOuterJoin(ds2) .where(1).equalTo("f5") .with(new DummyJoin()); }