@PublicEvolving public static <IN, OUT> TypeInformation<OUT> getKeySelectorTypes(KeySelector<IN, OUT> selectorInterface, TypeInformation<IN> inType) { return getKeySelectorTypes(selectorInterface, inType, null, false); }
/** * Creates a new {@link KeyedStream} using the given {@link KeySelector} * to partition operator state by key. * * @param dataStream * Base stream of data * @param keySelector * Function for determining state partitions */ public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector) { this(dataStream, keySelector, TypeExtractor.getKeySelectorTypes(keySelector, dataStream.getType())); }
/** * Specifies a {@link KeySelector} for elements from the first input. * * @param keySelector The KeySelector to be used for extracting the key for partitioning. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector) { requireNonNull(keySelector); final TypeInformation<KEY> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return where(keySelector, keyType); }
/** * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given * {@link KeySelector}. * * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}. * * @param stream The data stream to reinterpret. For every partition, this stream must be partitioned exactly * in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}. * @param keySelector Function that defines how keys are extracted from the data stream. * @param <T> Type of events in the data stream. * @param <K> Type of the extracted keys. * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}. */ public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream( DataStream<T> stream, KeySelector<T, K> keySelector) { return reinterpretAsKeyedStream( stream, keySelector, TypeExtractor.getKeySelectorTypes(keySelector, stream.getType())); }
/** * Specifies a {@link KeySelector} for elements from the first input. * * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector) { Preconditions.checkNotNull(keySelector); final TypeInformation<KEY> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return where(keySelector, keyType); }
/** * {@inheritDoc} * * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. */ @Override public <K> JoinOperatorSetsPredicate where(KeySelector<I1, K> keySelector) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return new JoinOperatorSetsPredicate(new SelectorFunctionKeys<>(input1.clean(keySelector), input1.getType(), keyType)); }
@SuppressWarnings({ "serial", "unchecked", "rawtypes" }) @Test public void testExtractKeySelector() { KeySelector<String, Integer> selector = new KeySelector<String, Integer>() { @Override public Integer getKey(String value) { return null; } }; TypeInformation<?> ti = TypeExtractor.getKeySelectorTypes(selector, BasicTypeInfo.STRING_TYPE_INFO); Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, ti); try { TypeExtractor.getKeySelectorTypes((KeySelector) selector, BasicTypeInfo.BOOLEAN_TYPE_INFO); Assert.fail(); } catch (InvalidTypesException e) { // good } catch (Exception e) { Assert.fail("wrong exception type"); } }
/** * Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}. * * <p>The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is joined. * * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined. * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)} * to continue the Join. * * @see KeySelector * @see DataSet */ public <K> JoinOperatorSetsPredicateBase where(KeySelector<I1, K> keySelector) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType()); return new JoinOperatorSetsPredicateBase(new Keys.SelectorFunctionKeys<>(keySelector, input1.getType(), keyType)); }
/** * Returns a distinct set of a {@link DataSet} using a {@link KeySelector} function. * * <p>The KeySelector function is called for each element of the DataSet and extracts a single key value on which the * decision is made if two items are distinct or not. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which the * distinction of the DataSet is decided. * @return A DistinctOperator that represents the distinct DataSet. */ public <K> DistinctOperator<T> distinct(KeySelector<T, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new DistinctOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), Utils.getCallLocationName()); }
/** * Range-partitions a DataSet using the specified key selector function. */ public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType()); return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName()); }
/** * Partitions a DataSet using the specified KeySelector. * * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time. * * @param keyExtractor The KeyExtractor with which the DataSet is hash-partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionByHash(KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName()); }
/** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}. * * <p>The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is grouped. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An incomplete CoGroup transformation. * Call {@link org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate#equalTo(int...)} to continue the CoGroup. * * @see KeySelector * @see DataSet */ public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input1.getType()); return new CoGroupOperatorSetsPredicate(new SelectorFunctionKeys<>(input1.clean(keyExtractor), input1.getType(), keyType)); }
/** * Range-partitions a DataSet using the specified KeySelector. * * <p><b>Important:</b>This operation requires an extra pass over the DataSet to compute the range boundaries and * shuffles the whole DataSet over the network. This can take significant amount of time. * * @param keyExtractor The KeyExtractor with which the DataSet is range-partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionByRange(KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), this.getType(), keyType), Utils.getCallLocationName()); }
/** * Partitions a DataSet on the key returned by the selector, using a custom partitioner. * This method takes the key selector to get the key to partition on, and a partitioner that * accepts the key type. * * <p>Note: This method works only on single field keys, i.e. the selector cannot return tuples * of fields. * * @param partitioner The partitioner to assign partitions to keys. * @param keyExtractor The KeyExtractor with which the DataSet is partitioned. * @return The partitioned DataSet. * * @see KeySelector */ public <K extends Comparable<K>> PartitionOperator<T> partitionCustom(Partitioner<K> partitioner, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new PartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(keyExtractor, getType(), keyType), clean(partitioner), Utils.getCallLocationName()); }
/** * Locally sorts the partitions of the DataSet on the extracted key in the specified order. * The DataSet can be sorted on multiple values by returning a tuple from the KeySelector. * * <p>Note that no additional sort keys can be appended to a KeySelector sort keys. To sort * the partitions by multiple values using KeySelector, the KeySelector must return a tuple * consisting of the values. * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet * on which the DataSet is sorted. * @param order The order in which the DataSet is sorted. * @return The DataSet with sorted local partitions. */ public <K> SortPartitionOperator<T> sortPartition(KeySelector<T, K> keyExtractor, Order order) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new SortPartitionOperator<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType), order, Utils.getCallLocationName()); }
/** * Groups a {@link DataSet} using a {@link KeySelector} function. * The KeySelector function is called for each element of the DataSet and extracts a single * key value on which the DataSet is grouped. * * <p>This method returns an {@link UnsortedGrouping} on which one of the following grouping transformation * can be applied. * <ul> * <li>{@link UnsortedGrouping#sortGroup(int, org.apache.flink.api.common.operators.Order)} to get a {@link SortedGrouping}. * <li>{@link UnsortedGrouping#aggregate(Aggregations, int)} to apply an Aggregate transformation. * <li>{@link UnsortedGrouping#reduce(org.apache.flink.api.common.functions.ReduceFunction)} to apply a Reduce transformation. * <li>{@link UnsortedGrouping#reduceGroup(org.apache.flink.api.common.functions.GroupReduceFunction)} to apply a GroupReduce transformation. * </ul> * * @param keyExtractor The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An UnsortedGrouping on which a transformation needs to be applied to obtain a transformed DataSet. * * @see KeySelector * @see UnsortedGrouping * @see AggregateOperator * @see ReduceOperator * @see org.apache.flink.api.java.operators.GroupReduceOperator * @see DataSet */ public <K> UnsortedGrouping<T> groupBy(KeySelector<T, K> keyExtractor) { TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, getType()); return new UnsortedGrouping<>(this, new Keys.SelectorFunctionKeys<>(clean(keyExtractor), getType(), keyType)); }
/** * Sorts elements within a group on a key extracted by the specified {@link org.apache.flink.api.java.functions.KeySelector} * in the specified {@link Order}. * * <p>Chaining {@link #sortGroup(KeySelector, Order)} calls is not supported. * * @param keySelector The KeySelector with which the group is sorted. * @param order The Order in which the extracted key is sorted. * @return A SortedGrouping with specified order of group element. * * @see Order */ public <K> SortedGrouping<T> sortGroup(KeySelector<T, K> keySelector, Order order) { if (!(this.getKeys() instanceof Keys.SelectorFunctionKeys)) { throw new InvalidProgramException("KeySelector group-sorting keys can only be used with KeySelector grouping keys."); } TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, this.inputDataSet.getType()); SortedGrouping<T> sg = new SortedGrouping<T>(this.inputDataSet, this.keys, new Keys.SelectorFunctionKeys<T, K>(keySelector, this.inputDataSet.getType(), keyType), order); sg.customPartitioner = getCustomPartitioner(); return sg; }
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "barfoo") ); Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType())); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key.")); input.keyBy(keySelector); }
@Test public void testKeySelectorLambda() { KeySelector<Tuple2<Tuple1<Integer>, Boolean>, Tuple2<Tuple1<Integer>, String>> f = (i) -> null; TypeInformation<?> ti = TypeExtractor.getKeySelectorTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true); if (!(ti instanceof MissingTypeInfo)) { assertTrue(ti.isTupleType()); assertEquals(2, ti.getArity()); assertTrue(((TupleTypeInfo<?>) ti).getTypeAt(0).isTupleType()); assertEquals(((TupleTypeInfo<?>) ti).getTypeAt(1), BasicTypeInfo.STRING_TYPE_INFO); } }
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testValue() { // use getKeyExtractorType() KeySelector<?, ?> function = new KeySelector<StringValue, StringValue>() { private static final long serialVersionUID = 1L; @Override public StringValue getKey(StringValue value) { return null; } }; TypeInformation<?> ti = TypeExtractor.getKeySelectorTypes(function, (TypeInformation) TypeInformation.of(new TypeHint<StringValue>(){})); Assert.assertFalse(ti.isBasicType()); Assert.assertFalse(ti.isTupleType()); Assert.assertTrue(ti instanceof ValueTypeInfo); Assert.assertEquals(ti.getTypeClass(), StringValue.class); // use getForClass() Assert.assertTrue(TypeExtractor.getForClass(StringValue.class) instanceof ValueTypeInfo); Assert.assertEquals(TypeExtractor.getForClass(StringValue.class).getTypeClass(), ti.getTypeClass()); // use getForObject() StringValue v = new StringValue("Hello"); Assert.assertTrue(TypeExtractor.getForObject(v) instanceof ValueTypeInfo); Assert.assertEquals(TypeExtractor.getForObject(v).getTypeClass(), ti.getTypeClass()); }