/** * Constructs a {@link SingletonAssert} for the value of the provided {@code PCollection * PCollection<T>}, which must be a singleton. */ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) { return thatSingleton(actual.getName(), actual); }
/** * Constructs an {@link IterableAssert} for the value of the provided {@link PCollection} which * must contain a single {@code Iterable<T>} value. */ public static <T> IterableAssert<T> thatSingletonIterable( PCollection<? extends Iterable<T>> actual) { return thatSingletonIterable(actual.getName(), actual); }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}, which * must have at most one value per key. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) { return thatMap(actual.getName(), actual); }
/** * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}. * * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any {@code Coder<K, * V>}. */ public static <K, V> SingletonAssert<Map<K, Iterable<V>>> thatMultimap( PCollection<KV<K, V>> actual) { return thatMultimap(actual.getName(), actual); }
/** Constructs an {@link IterableAssert} for the elements of the provided {@link PCollection}. */ public static <T> IterableAssert<T> that(PCollection<T> actual) { return that(actual.getName(), actual); }
@Override public void finishSpecifyingOutput( String transformName, PInput input, PTransform<?, ?> transform) { // All component PCollections will already have been finished. Update their names if // appropriate. int i = 0; for (Map.Entry<TupleTag<?>, PCollection<?>> entry : pcollectionMap.entrySet()) { TupleTag<?> tag = entry.getKey(); PCollection<?> pc = entry.getValue(); if (pc.getName().equals(PValueBase.defaultName(transformName))) { pc.setName(String.format("%s.%s", transformName, tag.getOutName(i))); } i++; } }
@Override public void finishSpecifyingOutput( String transformName, PInput input, PTransform<?, ?> transform) { // All component PCollections will have already been finished. int i = 0; for (TaggedPValue tpv : pcollections) { @SuppressWarnings("unchecked") PCollection<T> pc = (PCollection<T>) tpv.getValue(); if (pc.getName().equals(PValueBase.defaultName(transformName))) { pc.setName(String.format("%s.%s%s", transformName, "out", i)); } i++; } }
public static PFeatureRows of(PCollection<FeatureRowExtended> input) { Pipeline pipeline = input.getPipeline(); Create.Values<FeatureRowExtended> empty = Create.empty(ProtoCoder.of(FeatureRowExtended.class)); return new PFeatureRows(input, pipeline.apply(input.getName() + "/empty.errors" + counter.incrementAndGet(), empty)); }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>(taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>(taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>( taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
/** * Registers the provided {@link PCollection} into this {@link SdkComponents}, returning a unique * ID for the {@link PCollection}. Multiple registrations of the same {@link PCollection} will * return the same unique ID. */ public String registerPCollection(PCollection<?> pCollection) throws IOException { String existing = pCollectionIds.get(pCollection); if (existing != null) { return existing; } String uniqueName = uniqify(pCollection.getName(), pCollectionIds.values()); pCollectionIds.put(pCollection, uniqueName); componentsBuilder.putPcollections( uniqueName, PCollectionTranslation.toProto(pCollection, this)); return uniqueName; }
@Test public void testReadNamed() throws Exception { File emptyFile = tempFolder.newFile(); p.enableAbandonedNodeEnforcement(false); assertEquals("TextIO.Read/Read.out", p.apply(TextIO.read().from("somefile")).getName()); assertEquals( "MyRead/Read.out", p.apply("MyRead", TextIO.read().from(emptyFile.getPath())).getName()); }
@Test public void testParDoOutputNameBasedOnDoFnWithTrimmedSuffix() { pipeline.enableAbandonedNodeEnforcement(false); PCollection<String> output = pipeline.apply(Create.of(1)).apply(ParDo.of(new TestDoFn())); assertThat(output.getName(), containsString("ParDo(Test)")); }
@Test public void testParDoOutputNameBasedOnLabel() { pipeline.enableAbandonedNodeEnforcement(false); PCollection<String> output = pipeline.apply(Create.of(1)).apply("MyParDo", ParDo.of(new TestDoFn())); assertThat(output.getName(), containsString("MyParDo")); }
@Test public void testParDoOutputNameBasedDoFnWithoutMatchingSuffix() { pipeline.enableAbandonedNodeEnforcement(false); PCollection<String> output = pipeline.apply(Create.of(1)).apply(ParDo.of(new StrangelyNamedDoer())); assertThat(output.getName(), containsString("ParDo(StrangelyNamedDoer)")); }
public static RunnerApi.PCollection toProto(PCollection<?> pCollection, SdkComponents components) throws IOException { String coderId = components.registerCoder(pCollection.getCoder()); String windowingStrategyId = components.registerWindowingStrategy(pCollection.getWindowingStrategy()); // TODO: Display Data return RunnerApi.PCollection.newBuilder() .setUniqueName(pCollection.getName()) .setCoderId(coderId) .setIsBounded(toProto(pCollection.isBounded())) .setWindowingStrategyId(windowingStrategyId) .build(); }
@Test public void testReadNamed() { writePipeline.enableAbandonedNodeEnforcement(false); assertEquals( "TFRecordIO.Read/Read.out", writePipeline.apply(TFRecordIO.read().from("foo.*").withoutValidation()).getName()); assertEquals( "MyRead/Read.out", writePipeline .apply("MyRead", TFRecordIO.read().from("foo.*").withoutValidation()) .getName()); }
@Override public void translateNode(Window.Assign<T> transform, FlinkBatchTranslationContext context) { PValue input = context.getInput(transform); TypeInformation<WindowedValue<T>> resultTypeInfo = context.getTypeInfo(context.getOutput(transform)); DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input); @SuppressWarnings("unchecked") final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy = (WindowingStrategy<T, ? extends BoundedWindow>) context.getOutput(transform).getWindowingStrategy(); WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn(); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataSet<WindowedValue<T>> resultDataSet = inputDataSet .flatMap(assignWindowsFunction) .name(context.getOutput(transform).getName()) .returns(resultTypeInfo); context.setOutputDataSet(context.getOutput(transform), resultDataSet); } }
@Override public void translateNode( Window.Assign<T> transform, FlinkStreamingTranslationContext context) { @SuppressWarnings("unchecked") WindowingStrategy<T, BoundedWindow> windowingStrategy = (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<T>> typeInfo = context.getTypeInfo(context.getOutput(transform)); DataStream<WindowedValue<T>> inputDataStream = context.getInputDataStream(context.getInput(transform)); WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn(); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); SingleOutputStreamOperator<WindowedValue<T>> outputDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(context.getOutput(transform).getName()) .returns(typeInfo); context.setOutputDataStream(context.getOutput(transform), outputDataStream); } }