@Override public PCollection<Iterable<InputT>> expand(PCollection<InputT> input) { return input .apply(WithKeys.of((Void) null)) .apply(GroupByKey.create()) .apply(Values.create()); } }
@Test public void testWithKeysGetName() { assertEquals("WithKeys", WithKeys.<Integer, String>of(100).getName()); }
@Override public PCollection<T> expand(PCollection<T> input) { Coder<T> coder = input.getCoder(); PCollection<KV<String, T>> keyedRow = input.apply(WithKeys.of("DummyKey")).setCoder(KvCoder.of(StringUtf8Coder.of(), coder)); return keyedRow.apply(ParDo.of(new LimitFn<T>(getCount()))); } }
@Before public void setup() { created = p.apply(Create.of(1, 2, 3)); downstream = created.apply(WithKeys.of("foo")); }
@Override public PCollection<ElemT> expand(final PCollection<ElemT> input) { input .apply(WithKeys.of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder())) .apply(GroupByKey.create()) .apply(Values.create()) .apply(new WriteView<>(view)); return input; } }
@Override public POutput expand(PCollection<? extends T> input) { return input // assign a dummy key and global window, // this is needed to accumulate all observed events in the same state cell .apply(Window.into(new GlobalWindows())) .apply(WithKeys.of("dummyKey")) .apply( "checkAllEventsForSuccess", ParDo.of(new StatefulPredicateCheck<>(coder, formatter, successPredicate))) // signal the success/failure to the result topic .apply("publishSuccess", PubsubIO.writeStrings().to(resultTopicPath.getPath())); } }
@Before public void setup() { MockitoAnnotations.initMocks(this); bundleFactory = ImmutableListBundleFactory.create(); transformEvaluationState = TransformExecutorServices.parallel(MoreExecutors.newDirectExecutorService()); evaluatorCompleted = new CountDownLatch(1); completionCallback = new RegisteringCompletionCallback(evaluatorCompleted); created = p.apply(Create.of("foo", "spam", "third")); PCollection<KV<Integer, String>> downstream = created.apply(WithKeys.of(3)); DirectGraphs.performDirectOverrides(p); DirectGraph graph = DirectGraphs.getGraph(p); createdProducer = graph.getProducer(created); downstreamProducer = graph.getProducer(downstream); when(evaluationContext.getMetrics()).thenReturn(metrics); }
@Test @Category(NeedsRunner.class) public void testConstantVoidKeys() { PCollection<String> input = p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of())); PCollection<KV<Void, String>> output = input.apply(WithKeys.of((Void) null)); PAssert.that(output).containsInAnyOrder(WITH_CONST_NULL_KEYS); p.run(); }
@Override public PCollection<T> expand(PCollection<T> input) { return input .apply(WithKeys.of("")) .apply(GroupByKey.create()) .apply(Values.create()) .apply(Flatten.iterables()); } }
@Test @Category(NeedsRunner.class) public void testConstantKeys() { PCollection<String> input = p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of())); PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(100)); PAssert.that(output).containsInAnyOrder(WITH_CONST_KEYS); p.run(); }
@Test @Category(NeedsRunner.class) public void testVoidValues() throws Exception { pipeline .apply(Create.of("hello")) .apply(WithKeys.of("k")) .apply(new VoidValues<String, String>() {}); // Make sure the pipeline runs pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testVoidValues() throws Exception { pipeline .apply(Create.of("hello")) .apply(WithKeys.of("k")) .apply(new VoidValues<String, String>() {}); // Make sure the pipeline runs pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testExtractKeys() { PCollection<String> input = p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of())); PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(new LengthAsKey())); PAssert.that(output).containsInAnyOrder(WITH_KEYS); p.run(); }
@Test @Category(NeedsRunner.class) public void withLambdaAndNoTypeDescriptorShouldThrow() { PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12")); values.apply("ApplyKeysWithWithKeys", WithKeys.of(Integer::valueOf)); thrown.expect(IllegalStateException.class); thrown.expectMessage("Unable to return a default Coder for ApplyKeysWithWithKeys"); p.run(); } }
@Test @Category(NeedsRunner.class) public void testWithKeysWithUnneededWithKeyTypeSucceeds() { PCollection<String> input = p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of())); PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(new LengthAsKey()).withKeyType(TypeDescriptor.of(Integer.class))); PAssert.that(output).containsInAnyOrder(WITH_KEYS); p.run(); }
@Before public void setup() { MockitoAnnotations.initMocks(this); PCollection<Integer> create = pipeline.apply("forBaseCollection", Create.of(1, 2, 3, 4)); mapView = create.apply("forKeyTypes", WithKeys.of("foo")).apply("asMapView", View.asMap()); singletonView = create.apply("forCombinedTypes", Mean.<Integer>globally().asSingletonView()); iterableView = create.apply("asIterableView", View.asIterable()); container = SideInputContainer.create(context, ImmutableList.of(iterableView, mapView, singletonView)); }
@Override public PCollection<Iterable<ValueInSingleWindow<T>>> expand(PCollection<T> input) { WindowFn<?, ?> originalWindowFn = input.getWindowingStrategy().getWindowFn(); return input .apply(Reify.windows()) .apply( WithKeys.<Integer, ValueInSingleWindow<T>>of(0) .withKeyType(new TypeDescriptor<Integer>() {})) .apply( Window.into( new IdentityWindowFn<KV<Integer, ValueInSingleWindow<T>>>( originalWindowFn.windowCoder())) .triggering(Never.ever()) .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()) .discardingFiredPanes()) // all values have the same key so they all appear as a single output element .apply(GroupByKey.create()) .apply(Values.create()) .setWindowingStrategyInternal(input.getWindowingStrategy()); } }
@Test @Category(NeedsRunner.class) public void withLambdaAndTypeDescriptorShouldSucceed() { PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12")); PCollection<KV<Integer, String>> kvs = values.apply( WithKeys.of((SerializableFunction<String, Integer>) Integer::valueOf) .withKeyType(TypeDescriptor.of(Integer.class))); PAssert.that(kvs) .containsInAnyOrder( KV.of(1234, "1234"), KV.of(0, "0"), KV.of(-12, "-12"), KV.of(3210, "3210")); p.run(); }
private void runBigQueryToTablePipeline() { Pipeline p = Pipeline.create(options); BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery()); if (options.getUsingStandardSql()) { bigQueryRead = bigQueryRead.usingStandardSql(); } PCollection<TableRow> input = p.apply(bigQueryRead); if (options.getReshuffle()) { input = input .apply(WithKeys.<Void, TableRow>of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of())) .apply(Reshuffle.<Void, TableRow>of()) .apply(Values.<TableRow>create()); } input.apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(options.getOutputSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run().waitUntilFinish(); }
@Test @Category({ValidatesRunner.class, UsesTestStream.class}) public void testReshuffleWithTimestampsStreaming() { TestStream<Long> stream = TestStream.create(VarLongCoder.of()) .advanceWatermarkTo(new Instant(0L).plus(Duration.standardDays(48L))) .addElements( TimestampedValue.of(0L, new Instant(0L)), TimestampedValue.of(1L, new Instant(0L).plus(Duration.standardDays(48L))), TimestampedValue.of( 2L, BoundedWindow.TIMESTAMP_MAX_VALUE.minus(Duration.standardDays(48L)))) .advanceWatermarkToInfinity(); PCollection<KV<String, Long>> input = pipeline .apply(stream) .apply(WithKeys.of("")) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L)))); PCollection<KV<String, Long>> reshuffled = input.apply(Reshuffle.of()); PAssert.that(reshuffled.apply(Values.create())).containsInAnyOrder(0L, 1L, 2L); pipeline.run(); } }