/** * Returns the value coder for the given PCollection. Assumes that the value coder is an instance * of {@code KvCoder<K, V>}. */ private <V> Coder<V> getValueCoder(PCollection<KV<K, V>> pCollection) { // Assumes that the PCollection uses a KvCoder. Coder<?> entryCoder = pCollection.getCoder(); if (!(entryCoder instanceof KvCoder<?, ?>)) { throw new IllegalArgumentException("PCollection does not use a KvCoder"); } @SuppressWarnings("unchecked") KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder; return coder.getValueCoder(); }
@Override public Coder<OutputT> getDefaultOutputCoder( CoderRegistry registry, Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder) throws CannotProvideCoderException { return fn.getDefaultOutputCoder(registry, inputCoder.getValueCoder()); }
/** Returns the {@code Coder} of the values associated with the secondary keys. */ private static <PrimaryKeyT, SecondaryKeyT, ValueT> Coder<ValueT> getValueCoder( Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) { return getSecondaryKeyValueCoder(inputCoder).getValueCoder(); }
@Override public Coder<OutputT> getDefaultOutputCoder( CoderRegistry registry, Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder) throws CannotProvideCoderException { return fnWithContext.getDefaultOutputCoder(registry, inputCoder.getValueCoder()); }
/** Returns the {@code Coder} of the values of the input to this transform. */ public static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) { return getInputKvCoder(inputCoder).getValueCoder(); }
@Override public List<? extends Coder<?>> getComponents(KvCoder<?, ?> from) { return ImmutableList.of(from.getKeyCoder(), from.getValueCoder()); }
@Override public void verifyDeterministic() throws NonDeterministicException { verifyDeterministic(this, "Key coder must be deterministic", getKeyCoder()); verifyDeterministic(this, "Value coder must be deterministic", getValueCoder()); }
@Override public PCollection<KV<K, Long>> expand(PCollection<KV<K, V>> input) { Coder<KV<K, V>> inputCoder = input.getCoder(); if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException( "ApproximateUnique.PerKey requires its input to use KvCoder"); } @SuppressWarnings("unchecked") final Coder<V> coder = ((KvCoder<K, V>) inputCoder).getValueCoder(); return input.apply(Combine.perKey(new ApproximateUniqueCombineFn<>(sampleSize, coder))); }
public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT> withInputCoder( GlobalCombineFn<? super InputT, AccumT, OutputT> fn, CoderRegistry registry, KvCoder<K, InputT> kvCoder, Iterable<PCollectionView<?>> sideInputViews, WindowingStrategy<?, ?> windowingStrategy) { // Casting down the K and InputT is safe because they're only used as inputs. @SuppressWarnings("unchecked") GlobalCombineFn<InputT, AccumT, OutputT> clonedFn = (GlobalCombineFn<InputT, AccumT, OutputT>) SerializableUtils.clone(fn); try { Coder<AccumT> accumulatorCoder = clonedFn.getAccumulatorCoder(registry, kvCoder.getValueCoder()); return create(clonedFn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy); } catch (CannotProvideCoderException e) { throw new IllegalStateException("Could not determine coder for accumulator", e); } }
@Override public CloudObject toCloudObject(KvCoder target, SdkComponents sdkComponents) { CloudObject result = CloudObject.forClassName(CloudObjectKinds.KIND_PAIR); Structs.addBoolean(result, PropertyNames.IS_PAIR_LIKE, true); return addComponents( result, ImmutableList.<Coder<?>>of(target.getKeyCoder(), target.getValueCoder()), sdkComponents); }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with the * specified reason. The {@link PCollection} must have at most one value per key. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, V>> thatMap( String reason, PCollection<KV<K, V>> actual) { @SuppressWarnings("unchecked") KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder(); return new PCollectionViewAssert<>( actual, View.asMap(), MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()), PAssertionSite.capture(reason)); }
@Override public Object structuralValue(KV<K, V> kv) { if (consistentWithEquals()) { return kv; } else { return KV.of( getKeyCoder().structuralValue(kv.getKey()), getValueCoder().structuralValue(kv.getValue())); } }
@Override public PCollection<KV<K1, Iterable<KV<K2, V>>>> expand(PCollection<KV<K1, KV<K2, V>>> input) { @SuppressWarnings("unchecked") KvCoder<K1, KV<K2, V>> inputCoder = (KvCoder<K1, KV<K2, V>>) input.getCoder(); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getValueCoder()))); } }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with the * specified reason. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, Iterable<V>>> thatMultimap( String reason, PCollection<KV<K, V>> actual) { @SuppressWarnings("unchecked") KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder(); return new PCollectionViewAssert<>( actual, View.asMultimap(), MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())), PAssertionSite.capture(reason)); }
@Override public PCollectionTuple expand(PCollection<KV<byte[], KV<InputT, RestrictionT>>> input) { return input .apply(new GBKIntoKeyedWorkItems<>()) .setCoder( KeyedWorkItemCoder.of( ByteArrayCoder.of(), ((KvCoder<byte[], KV<InputT, RestrictionT>>) input.getCoder()).getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder())) .apply(new ProcessElements<>(original)); } }
@Override public PCollection<KeyedWorkItem<KeyT, InputT>> expand(PCollection<KV<KeyT, InputT>> input) { KvCoder<KeyT, InputT> kvCoder = (KvCoder<KeyT, InputT>) input.getCoder(); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), KeyedWorkItemCoder.of( kvCoder.getKeyCoder(), kvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder())); }
@SuppressWarnings("unchecked") public ApexGroupByKeyOperator( ApexPipelineOptions pipelineOptions, PCollection<KV<K, V>> input, ApexStateBackend stateBackend) { checkNotNull(pipelineOptions); this.serializedOptions = new SerializablePipelineOptions(pipelineOptions); this.windowingStrategy = (WindowingStrategy<V, BoundedWindow>) input.getWindowingStrategy(); this.keyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder(); this.valueCoder = ((KvCoder<K, V>) input.getCoder()).getValueCoder(); this.stateInternalsFactory = stateBackend.newStateInternalsFactory(keyCoder); TimerInternals.TimerDataCoder timerCoder = TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder()); this.timerInternals = new ApexTimerInternals<>(timerCoder); }
/** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */ private <W extends BoundedWindow> PCollection<?> applyForSingletonFallback( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings({"rawtypes", "unchecked"}) KvCoder<K, V> inputCoder = (KvCoder) input.getCoder(); @SuppressWarnings({"unchecked", "rawtypes"}) Coder<Function<WindowedValue<V>, V>> transformCoder = (Coder) SerializableCoder.of(WindowedValueToValue.class); Coder<TransformedMap<K, WindowedValue<V>, V>> finalValueCoder = TransformedMapCoder.of( transformCoder, MapCoder.of( inputCoder.getKeyCoder(), FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))); return BatchViewAsSingleton.applyForSingleton( runner, input, new ToMapDoFn<>(windowCoder), finalValueCoder, view); } }
/** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */ private <W extends BoundedWindow> PCollection<?> applyForSingletonFallback( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings({"rawtypes", "unchecked"}) KvCoder<K, V> inputCoder = (KvCoder) input.getCoder(); @SuppressWarnings({"unchecked", "rawtypes"}) Coder<Function<Iterable<WindowedValue<V>>, Iterable<V>>> transformCoder = (Coder) SerializableCoder.of(IterableWithWindowedValuesToIterable.class); Coder<TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>> finalValueCoder = TransformedMapCoder.of( transformCoder, MapCoder.of( inputCoder.getKeyCoder(), IterableCoder.of( FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)))); return BatchViewAsSingleton.applyForSingleton( runner, input, new ToMultimapDoFn<>(windowCoder), finalValueCoder, view); }
@Override public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>> expand( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings("unchecked") KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder(); PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash; keyedByHash = input.apply(ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder))); keyedByHash.setCoder( KvCoder.of( VarIntCoder.of(), KvCoder.of( KvCoder.of(inputCoder.getKeyCoder(), windowCoder), FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)))); return keyedByHash.apply(new GroupByKeyAndSortValuesOnly<>()); } }