/** Returns whether this {@link PCollection} has an attached schema. */ @Experimental(Kind.SCHEMAS) public boolean hasSchema() { return getCoder() instanceof SchemaCoder; }
private ImmutabilityCheckingEnforcement( CommittedBundle<T> input, AppliedPTransform<?, ?, ?> transform) { this.transform = transform; coder = input.getPCollection().getCoder(); mutationElements = new IdentityHashMap<>(); }
public Map<TupleTag<?>, Coder<?>> getOutputCoders() { return currentTransform .getOutputs() .entrySet() .stream() .filter(e -> e.getValue() instanceof PCollection) .collect(Collectors.toMap(e -> e.getKey(), e -> ((PCollection) e.getValue()).getCoder())); }
/** Returns the attached schema's toRowFunction. */ @Experimental(Kind.SCHEMAS) public SerializableFunction<T, Row> getToRowFunction() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getToRowFunction when there is no schema"); } return ((SchemaCoder<T>) getCoder()).getToRowFunction(); }
@Override public PCollection<KV<K, Long>> expand(PCollection<KV<K, V>> input) { Coder<KV<K, V>> inputCoder = input.getCoder(); if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException( "ApproximateUnique.PerKey requires its input to use KvCoder"); } @SuppressWarnings("unchecked") final Coder<V> coder = ((KvCoder<K, V>) inputCoder).getValueCoder(); return input.apply(Combine.perKey(new ApproximateUniqueCombineFn<>(sampleSize, coder))); }
public <T> Coder<WindowedValue<T>> getWindowedInputCoder(PCollection<T> collection) { Coder<T> valueCoder = collection.getCoder(); return WindowedValue.getFullCoder( valueCoder, collection.getWindowingStrategy().getWindowFn().windowCoder()); }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); }
@Override public PCollection<T> expand(PCollection<T> input) { List<PCollectionView<?>> views = Lists.newArrayList(); for (int i = 0; i < signals.size(); ++i) { views.add(signals.get(i).apply("To wait view " + i, new ToWaitView())); } return input.apply( "Wait", MapElements.into(input.getCoder().getEncodedTypeDescriptor()) .via(fn((t, c) -> t, requiresSideInputs(views)))); } }
@SuppressWarnings({"rawtypes", "unchecked"}) @Override public PCollection<KV<K, Iterable<WindowedValue<V>>>> expand(PCollection<KV<K, V>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), (Coder) GroupByKey.getOutputKvCoder(input.getCoder())); } }
@SuppressWarnings("unchecked") public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) { Coder<T> valueCoder = collection.getCoder(); WindowedValue.FullWindowedValueCoder<T> windowedValueCoder = WindowedValue.getFullCoder( valueCoder, collection.getWindowingStrategy().getWindowFn().windowCoder()); return new CoderTypeInformation<>(windowedValueCoder); }
@Override public PCollection<KV<Void, T>> expand(PCollection<T> input) { PCollection output = input.apply(ParDo.of(new VoidKeyToMultimapMaterializationDoFn<>())); output.setCoder(KvCoder.of(VoidCoder.of(), input.getCoder())); return output; } }
@Override public PCollection<KV<K, Iterable<V>>> expand(PCollection<KeyedWorkItem<K, V>> input) { KeyedWorkItemCoder<K, V> inputCoder = getKeyedWorkItemCoder(input.getCoder()); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), outputWindowingStrategy, input.isBounded(), KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getElementCoder()))); } }
@Test public void testParDoWithNoOutputsErrorDoesNotMentionTupleTag() { PCollection<EmptyClass> input = p.apply(Create.of(1, 2, 3)).apply(ParDo.of(new EmptyClassDoFn())); thrown.expect(IllegalStateException.class); // Output specific to ParDo additional TupleTag outputs should not be present. thrown.expectMessage(not(containsString("erasure"))); thrown.expectMessage(not(containsString("see TupleTag Javadoc"))); // Instead, expect output suggesting other possible fixes. thrown.expectMessage("Building a Coder using a registered CoderProvider failed"); input.getCoder(); }
@Test public void testUntypedMainOutputTagTypedOutputTupleTag() { // mainOutputTag is allowed to be untyped because Coder can be inferred other ways. TupleTag<Integer> mainOutputTag = new TupleTag<>(); TupleTag<Integer> typedOutputTag = new TupleTag<Integer>() {}; PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, typedOutputTag); assertThat(tuple.get(typedOutputTag).getCoder(), instanceOf(VarIntCoder.class)); }
@Override public PCollection<ElemT> expand(final PCollection<ElemT> input) { input .apply(WithKeys.of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder())) .apply(GroupByKey.create()) .apply(Values.create()) .apply(new WriteView<>(view)); return input; } }
@Test public void testCreateDefaultOutputCoderUsingCoder() throws Exception { Coder<Record> coder = new RecordCoder(); assertThat( p.apply(Create.of(new Record(), new Record2()).withCoder(coder)).getCoder(), equalTo(coder)); }
@Test public void testCreateExplicitSchema() { PCollection<String> out = p.apply( Create.of("a", "b", "c", "d") .withSchema( STRING_SCHEMA, s -> Row.withSchema(STRING_SCHEMA).addValue(s).build(), r -> r.getString("field"))); assertThat(out.getCoder(), instanceOf(SchemaCoder.class)); }
@Test public void testCreateTimestampedDefaultOutputCoderUsingCoder() throws Exception { Coder<Record> coder = new RecordCoder(); Create.TimestampedValues<Record> values = Create.timestamped( TimestampedValue.of(new Record(), new Instant(0)), TimestampedValue.of(new Record2(), new Instant(0))) .withCoder(coder); assertThat(p.apply(values).getCoder(), equalTo(coder)); }
@Test @Category(ValidatesRunner.class) public void testCreateEmpty() { PCollection<String> output = p.apply(Create.empty(StringUtf8Coder.of())); PAssert.that(output).containsInAnyOrder(NO_LINES_ARRAY); assertEquals(StringUtf8Coder.of(), output.getCoder()); p.run(); }
@Test public void testPerKeyOutputCoder() { p.enableAbandonedNodeEnforcement(false); KvCoder<String, Long> inputCoder = KvCoder.of(AvroCoder.of(String.class), AvroCoder.of(Long.class)); PCollection<KV<String, Long>> output = p.apply(Create.of(KV.of("foo", 1L)).withCoder(inputCoder)).apply(Latest.perKey()); assertEquals("Should use input coder for outputs", inputCoder, output.getCoder()); }