/** * Range-partitions a DataSet using the specified key selector function. */ public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType()); return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName()); }
/** * Groups a {@link DataSet} using a {@link KeySelector} function. * The KeySelector function is called for each element of the DataSet and extracts a single * key value on which the DataSet is grouped. * * <p>This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation * can be applied. * <ul> * <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}. * <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation. * <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation. * <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation. * </ul> * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An UnsortedGrouping on which a transformation needs to be applied to obtain a transformed DataSet. * * @see KeySelector * @see UnsortedGrouping * @see AggregateOperator * @see ReduceOperator * @see org.apache.flink.api.java.operators.GroupReduceOperator * @see DataSet */ public <K> UnsortedGrouping<T> groupBy(KeySelector<T, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new UnsortedGrouping<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType)); }
/** * Sorts elements within a group on a key extracted by the specified {@link org.apache.flink.api.java.functions.KeySelector} * in the specified {@link Order}. * * <p>Chaining {@link #sortGroup(KeySelector, Order)} calls is not supported. * * @param keySelector The KeySelector with which the group is sorted. * @param order The Order in which the extracted key is sorted. * @return A SortedGrouping with specified order of group element. * * @see Order */ public <K> SortedGrouping<T> sortGroup(KeySelector<T, K> keySelector, Order order) { if (!(this.getKeys() instanceof Keys.SelectorFunctionKeys)) { throw new InvalidProgramException("KeySelector group-sorting keys can only be used with KeySelector grouping keys."); } TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, this.inputDataSet.getType()); SortedGrouping<T> sg = new SortedGrouping<T>(this.inputDataSet, this.keys, new Keys.SelectorFunctionKeys<T, K>(keySelector, this.inputDataSet.getType(), keyType), order); sg.customPartitioner = getCustomPartitioner(); return sg; }
@Test public void testAreCompatible2() throws Keys.IncompatibleKeysException { TypeInformation<PojoWithMultiplePojos> t1 = TypeExtractor.getForClass(PojoWithMultiplePojos.class); TypeInformation<Tuple3<Long, Pojo1, Integer>> t2 = new TupleTypeInfo<>( BasicTypeInfo.LONG_TYPE_INFO, TypeExtractor.getForClass(Pojo1.class), BasicTypeInfo.INT_TYPE_INFO); TypeInformation<Tuple2<Integer, String>> kt = new TupleTypeInfo<>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO ); Keys<PojoWithMultiplePojos> k1 = new Keys.SelectorFunctionKeys<>( new KeySelector3(), t1, kt ); Keys<Tuple3<Long, Pojo1, Integer>> k2 = new Keys.SelectorFunctionKeys<>( new KeySelector4(), t2, kt ); Assert.assertTrue(k1.areCompatible(k2)); Assert.assertTrue(k2.areCompatible(k1)); }
/** * Locally sorts the partitions of the DataSet on the extracted key in the specified order. * The DataSet can be sorted on multiple values by returning a tuple from the KeySelector. * * <p>Note that no additional sort keys can be appended to a KeySelector sort keys. To sort * the partitions by multiple values using KeySelector, the KeySelector must return a tuple * consisting of the values. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet * on which the DataSet is sorted. * @param order The order in which the DataSet is sorted. * @return The DataSet with sorted local partitions. */ public <K> SortPartitionOperator<T> sortPartition(KeySelector<T, K> keyExtractor, Order order) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new SortPartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType), order, Utils.getCallLocationName()); }
/** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}. * * <p>The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is grouped. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An incomplete CoGroup transformation. * Call {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate#equalTo(int...)} to continue the CoGroup. * * @see KeySelector * @see DataSet */ public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input1.getType()); return new CoGroupOperatorSetsPredicate(new SelectorFunctionKeys<>(input1.clean(keyExtractor), input1.getType(), keyType)); }
/** * Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}. * * <p>The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is joined. * * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined. * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)} * to continue the Join. * * @see KeySelector * @see DataSet */ public <K> JoinOperatorSetsPredicateBase where(KeySelector<I1, K> keySelector) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return new JoinOperatorSetsPredicateBase(new Keys.SelectorFunctionKeys<>(keySelector, input1.getType(), keyType)); }
@Test public void testAreCompatible1() throws Keys.IncompatibleKeysException { TypeInformation<Pojo2> t1 = TypeExtractor.getForClass(Pojo2.class); TypeInformation<Tuple2<Integer, String>> t2 = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO); Keys<Pojo2> k1 = new Keys.SelectorFunctionKeys<>( new KeySelector1(), t1, BasicTypeInfo.STRING_TYPE_INFO ); Keys<Tuple2<Integer, String>> k2 = new Keys.SelectorFunctionKeys<>( new KeySelector2(), t2, BasicTypeInfo.STRING_TYPE_INFO ); Assert.assertTrue(k1.areCompatible(k2)); Assert.assertTrue(k2.areCompatible(k1)); }
@Test public void testAreCompatible9() throws Keys.IncompatibleKeysException { TypeInformation<Tuple3<String, Long, Integer>> t1 = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO ); TypeInformation<PojoWithMultiplePojos> t2 = TypeExtractor.getForClass(PojoWithMultiplePojos.class); ExpressionKeys<Tuple3<String, Long, Integer>> ek1 = new ExpressionKeys<>(new int[]{2,0}, t1); Keys<PojoWithMultiplePojos> ek2 = new Keys.SelectorFunctionKeys<>( new KeySelector3(), t2, new TupleTypeInfo<Tuple2<Integer, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO) ); Assert.assertTrue(ek1.areCompatible(ek2)); }
@Test public void testAreCompatible4() throws Keys.IncompatibleKeysException { TypeInformation<Tuple3<String, Long, Integer>> t1 = new TupleTypeInfo<>( BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO ); TypeInformation<PojoWithMultiplePojos> t2 = TypeExtractor.getForClass(PojoWithMultiplePojos.class); Keys.ExpressionKeys<Tuple3<String, Long, Integer>> ek1 = new Keys.ExpressionKeys<>(new int[]{2,0}, t1); Keys<PojoWithMultiplePojos> sk2 = new Keys.SelectorFunctionKeys<>( new KeySelector3(), t2, new TupleTypeInfo<Tuple2<Integer, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO) ); Assert.assertTrue(sk2.areCompatible(ek1)); }
/** * Range-partitions a DataSet using the specified KeySelector. * * <p><b>Important:</b>This operation requires an extra pass over the DataSet to compute the range boundaries and * shuffles the whole DataSet over the network. This can take significant amount of time. * * @param keyExtractor The KeyExtractor with which the DataSet is range-partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionByRange(KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName()); }
/** * Partitions a DataSet on the key returned by the selector, using a custom partitioner. * This method takes the key selector to get the key to partition on, and a partitioner that * accepts the key type. * * <p>Note: This method works only on single field keys, i.e. the selector cannot return tuples * of fields. * * @param partitioner The partitioner to assign partitions to keys. * @param keyExtractor The KeyExtractor with which the DataSet is partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionCustom(Partitioner<K> partitioner, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), clean(partitioner), Utils.getCallLocationName()); }
@Test public void testOriginalTypes2() throws Exception { final TupleTypeInfo<Tuple2<Integer, String>> t1 = new TupleTypeInfo<>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO ); TypeInformation<PojoWithMultiplePojos> t2 = TypeExtractor.getForClass(PojoWithMultiplePojos.class); Keys<PojoWithMultiplePojos> sk = new Keys.SelectorFunctionKeys<>( new KeySelector3(), t2, t1 ); Assert.assertArrayEquals( new TypeInformation<?>[] { t1 }, sk.getOriginalKeyFieldTypes() ); }
@Test public void testOriginalTypes1() throws Keys.IncompatibleKeysException { TypeInformation<Tuple2<Integer, String>> t2 = new TupleTypeInfo<>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO ); Keys<Tuple2<Integer, String>> k = new Keys.SelectorFunctionKeys<>( new KeySelector2(), t2, BasicTypeInfo.STRING_TYPE_INFO ); Assert.assertArrayEquals( new TypeInformation<?>[] { BasicTypeInfo.STRING_TYPE_INFO }, k.getOriginalKeyFieldTypes() ); }
/** * {@inheritDoc} * * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. */ @Override public <K> JoinOperatorSetsPredicate where(KeySelector<I1, K> keySelector) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return new JoinOperatorSetsPredicate(new SelectorFunctionKeys<>(input1.clean(keySelector), input1.getType(), keyType)); }
@Test public void testAreCompatible8() throws Keys.IncompatibleKeysException { TypeInformation<String> t1 = BasicTypeInfo.STRING_TYPE_INFO; TypeInformation<Pojo2> t2 = TypeExtractor.getForClass(Pojo2.class); ExpressionKeys<String> ek1 = new ExpressionKeys<>("*", t1); Keys<Pojo2> ek2 = new Keys.SelectorFunctionKeys<>( new KeySelector1(), t2, BasicTypeInfo.STRING_TYPE_INFO ); Assert.assertTrue(ek1.areCompatible(ek2)); }
/** * Partitions a DataSet using the specified KeySelector. * * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time. * * @param keyExtractor The KeyExtractor with which the DataSet is hash-partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionByHash(KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName()); }
/** * Returns a distinct set of a {@link DataSet} using a {@link KeySelector} function. * * <p>The KeySelector function is called for each element of the DataSet and extracts a single key value on which the * decision is made if two items are distinct or not. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which the * distinction of the DataSet is decided. * @return A DistinctOperator that represents the distinct DataSet. */ public <K> DistinctOperator<T> distinct(KeySelector<T, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new DistinctOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), Utils.getCallLocationName()); }
@Test public void testAreCompatible3() throws Keys.IncompatibleKeysException { TypeInformation<String> t1 = BasicTypeInfo.STRING_TYPE_INFO; TypeInformation<Pojo2> t2 = TypeExtractor.getForClass(Pojo2.class); Keys.ExpressionKeys<String> ek1 = new Keys.ExpressionKeys<>("*", t1); Keys<Pojo2> sk2 = new Keys.SelectorFunctionKeys<>( new KeySelector1(), t2, BasicTypeInfo.STRING_TYPE_INFO ); Assert.assertTrue(sk2.areCompatible(ek1)); }
/** * Range-partitions a DataSet using the specified key selector function. */ public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType()); return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName()); }