public SortPartitionOperator(DataSet<T> dataSet, String sortField, Order sortOrder, String sortLocationName) { this(dataSet, sortLocationName); this.useKeySelector = false; ensureSortableKey(sortField); keys.add(new Keys.ExpressionKeys<>(sortField, getType())); orders.add(sortOrder); }
public SortPartitionOperator(DataSet<T> dataSet, int sortField, Order sortOrder, String sortLocationName) { this(dataSet, sortLocationName); this.useKeySelector = false; ensureSortableKey(sortField); keys.add(new Keys.ExpressionKeys<>(sortField, getType())); orders.add(sortOrder); }
private <IN1, IN2, OUT> void createCoGroupOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN1> op1 = sets.getDataSet(info.parentID); DataSet<IN2> op2 = sets.getDataSet(info.otherID); Keys.ExpressionKeys<IN1> key1 = new Keys.ExpressionKeys<>(info.keys1.toArray(new String[info.keys1.size()]), op1.getType()); Keys.ExpressionKeys<IN2> key2 = new Keys.ExpressionKeys<>(info.keys2.toArray(new String[info.keys2.size()]), op2.getType()); PythonCoGroup<IN1, IN2, OUT> pcg = new PythonCoGroup<>(operatorConfig, info.envID, info.setID, type); sets.add(info.setID, new CoGroupRawOperator<>(op1, op2, key1, key2, pcg, type, info.name).setParallelism(info.parallelism)); }
@Test(expected = Keys.IncompatibleKeysException.class) public void testAreCompatible7() throws Keys.IncompatibleKeysException { TypeInformation<Pojo1> t1 = TypeExtractor.getForClass(Pojo1.class); TypeInformation<Tuple2<String, Long>> t2 = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO ); ExpressionKeys<Pojo1> ek1 = new ExpressionKeys<>(new String[]{"a", "b"}, t1); ExpressionKeys<Tuple2<String, Long>> ek2 = new ExpressionKeys<>(0, t2); ek1.areCompatible(ek2); }
@Test(expected = InvalidProgramException.class) public void testGenericNonKeyType() { // Fail: GenericType cannot be used as key TypeInformation<GenericNonKeyType> genericType = new GenericTypeInfo<>(GenericNonKeyType.class); new ExpressionKeys<>("*", genericType); }
/** * Range-partitions a DataSet on the specified fields. */ public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, String... fields) { return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType()), distribution, Utils.getCallLocationName()); }
/** * Range-partitions a DataSet on the specified tuple field positions. */ public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, int... fields) { return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType(), false), distribution, Utils.getCallLocationName()); }
/** * {@inheritDoc} * * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. */ @Override public JoinOperatorSetsPredicate where(String... fields) { return new JoinOperatorSetsPredicate(new Keys.ExpressionKeys<>(fields, input1.getType())); }
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) { super(input, input.getType()); this.distinctLocationName = distinctLocationName; // if keys is null distinction is done on all fields if (keys == null) { keys = new Keys.ExpressionKeys<>(input.getType()); } this.keys = keys; }
@Test public void testAreCompatible5() throws Keys.IncompatibleKeysException { TypeInformation<PojoWithMultiplePojos> t1 = TypeExtractor.getForClass(PojoWithMultiplePojos.class); TypeInformation<Tuple2<String, String>> t2 = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO ); ExpressionKeys<PojoWithMultiplePojos> ek1 = new ExpressionKeys<>(new String[]{"p1.b", "p2.a2"}, t1); ExpressionKeys<Tuple2<String, String>> ek2 = new ExpressionKeys<>("*", t2); Assert.assertTrue(ek1.areCompatible(ek2)); Assert.assertTrue(ek2.areCompatible(ek1)); }
@Test public void testKeyGenericType() { TypeInformation<GenericKeyType> genericType = new GenericTypeInfo<>(GenericKeyType.class); ExpressionKeys<GenericKeyType> ek = new ExpressionKeys<>("*", genericType); Assert.assertArrayEquals(new int[] {0}, ek.computeLogicalKeyPositions()); }
/** * Range-partitions a DataSet on the specified key fields. * * <p><b>Important:</b>This operation requires an extra pass over the DataSet to compute the range boundaries and * shuffles the whole DataSet over the network. This can take significant amount of time. * * @param fields The field expressions on which the DataSet is range-partitioned. * @return The partitioned DataSet. */ public PartitionOperator<T> partitionByRange(String... fields) { return new PartitionOperator<>(this, PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName()); }
@Override protected TypeComparator<PojoContainingTuple> createComparator(boolean ascending) { Assert.assertTrue(type instanceof CompositeType); CompositeType<PojoContainingTuple> cType = (CompositeType<PojoContainingTuple>) type; ExpressionKeys<PojoContainingTuple> keys = new ExpressionKeys<PojoContainingTuple>(new String[] {"theTuple.*"}, cType); boolean[] orders = new boolean[keys.getNumberOfKeyFields()]; Arrays.fill(orders, ascending); return cType.createComparator(keys.computeLogicalKeyPositions(), orders, 0, new ExecutionConfig()); }
public SortedGrouping(DataSet<T> set, Keys<T> keys, String field, Order order) { super(set, keys); if (!Keys.ExpressionKeys.isSortKey(field, inputDataSet.getType())) { throw new InvalidProgramException("Selected sort key is not a sortable type"); } // resolve String-field to int using the expression keys ExpressionKeys<T> ek = new ExpressionKeys<>(field, inputDataSet.getType()); this.groupSortKeyPositions = ek.computeLogicalKeyPositions(); this.groupSortOrders = new Order[groupSortKeyPositions.length]; Arrays.fill(this.groupSortOrders, order); // if field == "*" }
@Test public void testAreCompatible3() throws Keys.IncompatibleKeysException { TypeInformation<String> t1 = BasicTypeInfo.STRING_TYPE_INFO; TypeInformation<Tuple2<String, Long>> t2 = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO ); ExpressionKeys<String> ek1 = new ExpressionKeys<>("*", t1); ExpressionKeys<Tuple2<String, Long>> ek2 = new ExpressionKeys<>(0, t2); Assert.assertTrue(ek1.areCompatible(ek2)); Assert.assertTrue(ek2.areCompatible(ek1)); }
/** * Hash-partitions a DataSet on the specified key fields. * * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time. * * @param fields The field indexes on which the DataSet is hash-partitioned. * @return The partitioned DataSet. */ public PartitionOperator<T> partitionByHash(int... fields) { return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName()); }
/** * {@inheritDoc} * * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. */ @Override public JoinOperatorSetsPredicate where(int... fields) { return new JoinOperatorSetsPredicate(new Keys.ExpressionKeys<>(fields, input1.getType())); }
@Test public void testAreCompatible1() throws Keys.IncompatibleKeysException { TypeInformation<Pojo1> t1 = TypeExtractor.getForClass(Pojo1.class); ExpressionKeys<Pojo1> ek1 = new ExpressionKeys<>("a", t1); ExpressionKeys<Pojo1> ek2 = new ExpressionKeys<>("b", t1); Assert.assertTrue(ek1.areCompatible(ek2)); Assert.assertTrue(ek2.areCompatible(ek1)); }
@Test(expected = InvalidProgramException.class) public void testTupleNonKeyField() { // selected field is not a key type TypeInformation<Tuple3<String, Long, GenericNonKeyType>> ti = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, TypeExtractor.getForClass(GenericNonKeyType.class) ); new ExpressionKeys<>(2, ti); }
@Test public void testBasicType() { TypeInformation<Long> longType = BasicTypeInfo.LONG_TYPE_INFO; ExpressionKeys<Long> ek = new ExpressionKeys<>("*", longType); Assert.assertArrayEquals(new int[] {0}, ek.computeLogicalKeyPositions()); }