private CoGbkResultSchema createSchema(int size) { List<TupleTag<?>> tags = new ArrayList<>(); for (int i = 0; i < size; i++) { tags.add(new TupleTag<Integer>("tag" + i)); } return new CoGbkResultSchema(TupleTagList.of(tags)); }
@Override public PCollectionTuple expand(PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>> input) { PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(getMainOutputTag()).and(getAdditionalOutputTags().getAll()), // TODO Collections.emptyMap(), input.getWindowingStrategy(), input.isBounded()); return outputs; } }
KeyedPCollectionTuple( Pipeline pipeline, List<TaggedKeyedPCollection<K, ?>> keyedCollections, TupleTagList tupleTagList, @Nullable Coder<K> keyCoder) { this.pipeline = pipeline; this.keyedCollections = keyedCollections; this.schema = new CoGbkResultSchema(tupleTagList); this.keyCoder = keyCoder; }
private CloudObject toCloudObject(CoGbkResultSchema schema) { CloudObject result = CloudObject.forClass(CoGbkResultSchema.class); List<CloudObject> tags = new ArrayList<>(schema.getTupleTagList().size()); for (TupleTag<?> tag : schema.getTupleTagList().getAll()) { CloudObject tagCloudObject = CloudObject.forClass(TupleTag.class); Structs.addString(tagCloudObject, PropertyNames.VALUE, tag.getId()); tags.add(tagCloudObject); } Structs.addList(result, PropertyNames.TUPLE_TAGS, tags); return result; }
@Test public void testMultiOutputAppliedMultipleTimesDifferentOutputs() { pipeline.enableAbandonedNodeEnforcement(false); PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0)); TupleTag<Long> mainOut = new TupleTag<>(); final TupleTag<String> valueAsString = new TupleTag<>(); final TupleTag<Integer> valueAsInt = new TupleTag<>(); DoFn<Long, Long> fn = new DoFn<Long, Long>() { @ProcessElement public void processElement(ProcessContext cxt, @Element Long element) { cxt.output(cxt.element()); cxt.output(valueAsString, Long.toString(cxt.element())); cxt.output(valueAsInt, element.intValue()); } }; ParDo.MultiOutput<Long, Long> parDo = ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt)); PCollectionTuple firstApplication = longs.apply("first", parDo); PCollectionTuple secondApplication = longs.apply("second", parDo); assertThat(firstApplication, not(equalTo(secondApplication))); assertThat( firstApplication.getAll().keySet(), Matchers.containsInAnyOrder(mainOut, valueAsString, valueAsInt)); assertThat( secondApplication.getAll().keySet(), Matchers.containsInAnyOrder(mainOut, valueAsString, valueAsInt)); }
@Override public String toString() { return "CoGbkResultSchema: " + tupleTagList.getAll(); } }
@Parameters(name = "{index}: {0}") public static Iterable<ParDo.MultiOutput<?, ?>> data() { return ImmutableList.of( ParDo.of(new DropElementsFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()), ParDo.of(new DropElementsFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty()) .withSideInputs(singletonSideInput, multimapSideInput), ParDo.of(new DropElementsFn()) .withOutputTags( new TupleTag<>(), TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {})) .withSideInputs(singletonSideInput, multimapSideInput), ParDo.of(new DropElementsFn()) .withOutputTags( new TupleTag<>(), TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {})), ParDo.of(new SplittableDropElementsFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty()), ParDo.of(new StateTimerDropElementsFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty())); }
/** * Returns a new {@link CoGbkResult} based on this, with the given tag and given data added to it. */ public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) { if (nextTestUnionId != schema.size()) { throw new IllegalArgumentException( "Attempting to call and() on a CoGbkResult apparently not created by" + " of()."); } List<Iterable<?>> valueMap = new ArrayList<>(this.valueMap); valueMap.add(data); return new CoGbkResult( new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap, nextTestUnionId + 1); }
/** * Returns a new {@code KeyedPCollectionTuple<K>} that is the same as this, appended with the * given PCollection. */ public <V> KeyedPCollectionTuple<K> and(TupleTag<V> tag, PCollection<KV<K, V>> pc) { if (pc.getPipeline() != getPipeline()) { throw new IllegalArgumentException("PCollections come from different Pipelines"); } TaggedKeyedPCollection<K, ?> wrapper = new TaggedKeyedPCollection<>(tag, pc); Coder<K> myKeyCoder = keyCoder == null ? getKeyCoder(pc) : keyCoder; List<TaggedKeyedPCollection<K, ?>> newKeyedCollections = copyAddLast(keyedCollections, wrapper); return new KeyedPCollectionTuple<>( getPipeline(), newKeyedCollections, schema.getTupleTagList().and(tag), myKeyCoder); }
public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application) throws IOException { PTransform<?, ?> transform = application.getTransform(); if (transform instanceof ParDo.MultiOutput) { return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags(); } RunnerApi.PTransform protoTransform = PTransformTranslation.toProto( application, SdkComponents.create(application.getPipeline().getOptions())); ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload()); TupleTag<?> mainOutputTag = getMainOutputTag(payload); Set<String> outputTags = Sets.difference( protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId())); ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>(); for (String outputTag : outputTags) { additionalOutputTags.add(new TupleTag<>(outputTag)); } return TupleTagList.of(additionalOutputTags); }
public static <OutputT> PCollectionTuple createPrimitiveOutputFor( PCollection<?> input, DoFn<?, OutputT> fn, TupleTag<OutputT> mainOutputTag, TupleTagList additionalOutputTags, Map<TupleTag<?>, Coder<?>> outputTagsToCoders, WindowingStrategy<?, ?> windowingStrategy) { DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal( input.getPipeline(), TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()), outputTagsToCoders, windowingStrategy, input.isBounded().and(signature.isBoundedPerElement())); // Set output type descriptor similarly to how ParDo.MultiOutput does it. outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor()); return outputs; }
/** Returns an empty {@link CoGbkResult}. */ public static <V> CoGbkResult empty() { return new CoGbkResult( new CoGbkResultSchema(TupleTagList.empty()), new ArrayList<Iterable<?>>()); }