/** * Returns the {@code Coder} of the keys of the input to this transform, which is also used as the * {@code Coder} of the keys of the output of this transform. */ public static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) { return getInputKvCoder(inputCoder).getKeyCoder(); }
private static <K, V> Coder<K> getKeyCoder(PCollection<KV<K, V>> pc) { // TODO: This should already have run coder inference for output, but may not have been consumed // as input yet (and won't be fully specified); This is fine // Assumes that the PCollection uses a KvCoder. Coder<?> entryCoder = pc.getCoder(); if (!(entryCoder instanceof KvCoder<?, ?>)) { throw new IllegalArgumentException("PCollection does not use a KvCoder"); } @SuppressWarnings("unchecked") KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder; return coder.getKeyCoder(); }
private Coder<K> getKeyCoder(Coder<KV<K, V>> coder) { checkState( coder instanceof KvCoder, "%s requires a coder of class %s." + " This is an internal error; this is checked during pipeline construction" + " but became corrupted.", getClass().getSimpleName(), KvCoder.class.getSimpleName()); @SuppressWarnings("unchecked") Coder<K> keyCoder = ((KvCoder<K, V>) coder).getKeyCoder(); return keyCoder; }
/** Retrieves the {@link Coder} for the secondary keys. */ private static <PrimaryKeyT, SecondaryKeyT, ValueT> Coder<SecondaryKeyT> getSecondaryKeyCoder( Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) { return getSecondaryKeyValueCoder(inputCoder).getKeyCoder(); }
public Coder<K> getKeyCoder(Coder<KV<K, Iterable<WindowedValue<V>>>> inputCoder) { return getKvCoder(inputCoder).getKeyCoder(); }
private static void validateStateApplicableForInput(DoFn<?, ?> fn, PCollection<?> input) { Coder<?> inputCoder = input.getCoder(); checkArgument( inputCoder instanceof KvCoder, "%s requires its input to use %s in order to use state and timers.", ParDo.class.getSimpleName(), KvCoder.class.getSimpleName()); KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) inputCoder; try { kvCoder.getKeyCoder().verifyDeterministic(); } catch (Coder.NonDeterministicException exc) { throw new IllegalArgumentException( String.format( "%s requires a deterministic key coder in order to use state and timers", ParDo.class.getSimpleName())); } }
@Override public List<? extends Coder<?>> getComponents(KvCoder<?, ?> from) { return ImmutableList.of(from.getKeyCoder(), from.getValueCoder()); }
@Override public void verifyDeterministic() throws NonDeterministicException { verifyDeterministic(this, "Key coder must be deterministic", getKeyCoder()); verifyDeterministic(this, "Value coder must be deterministic", getValueCoder()); }
private static <K, V> PCollection<KV<K, V>> setValueCoder( PCollection<KV<K, V>> kvs, Coder<V> valueCoder) { // safe case because PCollection of KV always has KvCoder KvCoder<K, V> coder = (KvCoder<K, V>) kvs.getCoder(); return kvs.setCoder(KvCoder.of(coder.getKeyCoder(), valueCoder)); }
@Nullable @Override public <T> T get(PCollectionView<T> view, BoundedWindow window) { Iterable<?> elements = getIterable(view, window); // TODO: Add support for choosing which representation is contained based upon the // side input materialization. We currently can assume that we always have a multimap // materialization as that is the only supported type within the Java SDK. ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn(); Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder(); return (T) viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements)); }
@Override public CloudObject toCloudObject(KvCoder target, SdkComponents sdkComponents) { CloudObject result = CloudObject.forClassName(CloudObjectKinds.KIND_PAIR); Structs.addBoolean(result, PropertyNames.IS_PAIR_LIKE, true); return addComponents( result, ImmutableList.<Coder<?>>of(target.getKeyCoder(), target.getValueCoder()), sdkComponents); }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with the * specified reason. The {@link PCollection} must have at most one value per key. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, V>> thatMap( String reason, PCollection<KV<K, V>> actual) { @SuppressWarnings("unchecked") KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder(); return new PCollectionViewAssert<>( actual, View.asMap(), MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()), PAssertionSite.capture(reason)); }
@Override public Object structuralValue(KV<K, V> kv) { if (consistentWithEquals()) { return kv; } else { return KV.of( getKeyCoder().structuralValue(kv.getKey()), getValueCoder().structuralValue(kv.getValue())); } }
@Override public PCollection<KV<K1, Iterable<KV<K2, V>>>> expand(PCollection<KV<K1, KV<K2, V>>> input) { @SuppressWarnings("unchecked") KvCoder<K1, KV<K2, V>> inputCoder = (KvCoder<K1, KV<K2, V>>) input.getCoder(); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getValueCoder()))); } }
@Override public PCollection<KeyedWorkItem<KeyT, InputT>> expand(PCollection<KV<KeyT, InputT>> input) { KvCoder<KeyT, InputT> kvCoder = (KvCoder<KeyT, InputT>) input.getCoder(); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), KeyedWorkItemCoder.of( kvCoder.getKeyCoder(), kvCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder())); }
@SuppressWarnings("unchecked") public ApexGroupByKeyOperator( ApexPipelineOptions pipelineOptions, PCollection<KV<K, V>> input, ApexStateBackend stateBackend) { checkNotNull(pipelineOptions); this.serializedOptions = new SerializablePipelineOptions(pipelineOptions); this.windowingStrategy = (WindowingStrategy<V, BoundedWindow>) input.getWindowingStrategy(); this.keyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder(); this.valueCoder = ((KvCoder<K, V>) input.getCoder()).getValueCoder(); this.stateInternalsFactory = stateBackend.newStateInternalsFactory(keyCoder); TimerInternals.TimerDataCoder timerCoder = TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder()); this.timerInternals = new ApexTimerInternals<>(timerCoder); }
/** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */ private <W extends BoundedWindow> PCollection<?> applyForSingletonFallback( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings({"rawtypes", "unchecked"}) KvCoder<K, V> inputCoder = (KvCoder) input.getCoder(); @SuppressWarnings({"unchecked", "rawtypes"}) Coder<Function<WindowedValue<V>, V>> transformCoder = (Coder) SerializableCoder.of(WindowedValueToValue.class); Coder<TransformedMap<K, WindowedValue<V>, V>> finalValueCoder = TransformedMapCoder.of( transformCoder, MapCoder.of( inputCoder.getKeyCoder(), FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))); return BatchViewAsSingleton.applyForSingleton( runner, input, new ToMapDoFn<>(windowCoder), finalValueCoder, view); } }
/** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */ private <W extends BoundedWindow> PCollection<?> applyForSingletonFallback( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings({"rawtypes", "unchecked"}) KvCoder<K, V> inputCoder = (KvCoder) input.getCoder(); @SuppressWarnings({"unchecked", "rawtypes"}) Coder<Function<Iterable<WindowedValue<V>>, Iterable<V>>> transformCoder = (Coder) SerializableCoder.of(IterableWithWindowedValuesToIterable.class); Coder<TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>> finalValueCoder = TransformedMapCoder.of( transformCoder, MapCoder.of( inputCoder.getKeyCoder(), IterableCoder.of( FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)))); return BatchViewAsSingleton.applyForSingleton( runner, input, new ToMultimapDoFn<>(windowCoder), finalValueCoder, view); }
/** * @param edge IR edge to add. * @param elementCoder element coder. * @param windowCoder window coder. */ void addEdge(final IREdge edge, final Coder elementCoder, final Coder windowCoder) { edge.setProperty(KeyExtractorProperty.of(new BeamKeyExtractor())); if (elementCoder instanceof KvCoder) { Coder keyCoder = ((KvCoder) elementCoder).getKeyCoder(); edge.setProperty(KeyEncoderProperty.of(new BeamEncoderFactory(keyCoder))); edge.setProperty(KeyDecoderProperty.of(new BeamDecoderFactory(keyCoder))); } final WindowedValue.FullWindowedValueCoder coder = WindowedValue.getFullCoder(elementCoder, windowCoder); edge.setProperty(EncoderProperty.of(new BeamEncoderFactory<>(coder))); edge.setProperty(DecoderProperty.of(new BeamDecoderFactory<>(coder))); builder.connectVertices(edge); }
@Override public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>> expand( PCollection<KV<K, V>> input) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); @SuppressWarnings("unchecked") KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder(); PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash; keyedByHash = input.apply(ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder))); keyedByHash.setCoder( KvCoder.of( VarIntCoder.of(), KvCoder.of( KvCoder.of(inputCoder.getKeyCoder(), windowCoder), FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)))); return keyedByHash.apply(new GroupByKeyAndSortValuesOnly<>()); } }