@Override public PCollection<T> expand(PCollection<ValueWithRecordId<T>> input) { return input .apply( WithKeys.of( (ValueWithRecordId<T> value) -> Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS) .withKeyType(TypeDescriptors.integers())) // Reshuffle will dedup based on ids in ValueWithRecordId by passing the data through // WindmillSink. .apply(Reshuffle.of()) .apply( "StripIds", ParDo.of( new DoFn<KV<Integer, ValueWithRecordId<T>>, T>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element().getValue().getValue()); } })); } }
@Test @Category(NeedsRunner.class) public void testWithKeysWithUnneededWithKeyTypeSucceeds() { PCollection<String> input = p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of())); PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(new LengthAsKey()).withKeyType(TypeDescriptor.of(Integer.class))); PAssert.that(output).containsInAnyOrder(WITH_KEYS); p.run(); }
WithKeys<IdT, T> withKeys = WithKeys.of(fn); if (representativeType != null) { withKeys = withKeys.withKeyType(representativeType);
@Override public PCollection<Iterable<ValueInSingleWindow<T>>> expand(PCollection<T> input) { WindowFn<?, ?> originalWindowFn = input.getWindowingStrategy().getWindowFn(); return input .apply(Reify.windows()) .apply( WithKeys.<Integer, ValueInSingleWindow<T>>of(0) .withKeyType(new TypeDescriptor<Integer>() {})) .apply( Window.into( new IdentityWindowFn<KV<Integer, ValueInSingleWindow<T>>>( originalWindowFn.windowCoder())) .triggering(Never.ever()) .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()) .discardingFiredPanes()) // all values have the same key so they all appear as a single output element .apply(GroupByKey.create()) .apply(Values.create()) .setWindowingStrategyInternal(input.getWindowingStrategy()); } }
@Test @Category(NeedsRunner.class) public void withLambdaAndTypeDescriptorShouldSucceed() { PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12")); PCollection<KV<Integer, String>> kvs = values.apply( WithKeys.of((SerializableFunction<String, Integer>) Integer::valueOf) .withKeyType(TypeDescriptor.of(Integer.class))); PAssert.that(kvs) .containsInAnyOrder( KV.of(1234, "1234"), KV.of(0, "0"), KV.of(-12, "-12"), KV.of(3210, "3210")); p.run(); }
.apply( WithKeys.<String, String>of(input12 -> input12) .withKeyType(TypeDescriptors.strings())) .apply("ReifyOriginalTimestamps", Reify.timestampsInValue());
@Test @Category(NeedsRunner.class) public void singlePaneSingleReifiedPane() { PCollection<Iterable<ValueInSingleWindow<Iterable<Long>>>> accumulatedPanes = p.apply(GenerateSequence.from(0).to(20000)) .apply(WithTimestamps.of(input -> new Instant(input * 10))) .apply( Window.<Long>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply(WithKeys.<Void, Long>of((Void) null).withKeyType(new TypeDescriptor<Void>() {})) .apply(GroupByKey.create()) .apply(Values.create()) .apply(GatherAllPanes.globally()); PAssert.that(accumulatedPanes) .satisfies( input -> { for (Iterable<ValueInSingleWindow<Iterable<Long>>> windowedInput : input) { if (Iterables.size(windowedInput) > 1) { fail("Expected all windows to have exactly one pane, got " + windowedInput); return null; } } return null; }); p.run(); }
.withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply(WithKeys.<Void, Long>of((Void) null).withKeyType(new TypeDescriptor<Void>() {})) .apply(GroupByKey.create()) .apply(Values.create())