@Test public void testDisplayData() { SerializableBoundedSource boundedSource = new SerializableBoundedSource() { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; SerializableUnboundedSource unboundedSource = new SerializableUnboundedSource() { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; Duration maxReadTime = Duration.standardMinutes(2345); Read.Bounded<String> bounded = Read.from(boundedSource); BoundedReadFromUnboundedSource<String> unbounded = Read.from(unboundedSource).withMaxNumRecords(1234).withMaxReadTime(maxReadTime); DisplayData boundedDisplayData = DisplayData.from(bounded); assertThat(boundedDisplayData, hasDisplayItem("source", boundedSource.getClass())); assertThat(boundedDisplayData, includesDisplayDataFor("source", boundedSource)); DisplayData unboundedDisplayData = DisplayData.from(unbounded); assertThat(unboundedDisplayData, hasDisplayItem("source", unboundedSource.getClass())); assertThat(unboundedDisplayData, includesDisplayDataFor("source", unboundedSource)); assertThat(unboundedDisplayData, hasDisplayItem("maxRecords", 1234)); assertThat(unboundedDisplayData, hasDisplayItem("maxReadTime", maxReadTime)); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testUnboundedSourceSplits() throws Exception { long numElements = 1000; int numSplits = 10; UnboundedSource<Long, ?> initial = CountingSource.unbounded(); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
@Test @Category(NeedsRunner.class) public void testUnboundedSourceRateSplits() throws Exception { int elementsPerPeriod = 10; Duration period = Duration.millis(5); long numElements = 1000; int numSplits = 10; UnboundedCountingSource initial = CountingSource.createUnboundedFrom(0).withRate(elementsPerPeriod, period); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); Instant startTime = Instant.now(); p.run(); Instant endTime = Instant.now(); // 500 ms if the readers are all initialized in parallel; 5000 ms if they are evaluated serially long expectedMinimumMillis = (numElements * period.getMillis()) / elementsPerPeriod; assertThat(expectedMinimumMillis, lessThan(endTime.getMillis() - startTime.getMillis())); }
return input.apply(readUnbounded); } else { return input.apply(readUnbounded.withMaxNumRecords(getTo() - getFrom()));
@Test public void testUnboundedSourceSplits() throws Exception { int numElements = 1000; int numSplits = 10; // Coders must be specified explicitly here due to the way the transform // is used in the test. UnboundedSource<KafkaRecord<Integer, Long>, ?> initial = mkKafkaReadTransform(numElements, null) .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of()) .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of()) .makeSource(); List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)) .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>())) .apply("collection " + i, Values.create())); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
@Test @Category(NeedsRunner.class) public void testUnboundedSourceWithRate() { Duration period = Duration.millis(5); long numElements = 1000L; PCollection<Long> input = p.apply( Read.from( CountingSource.createUnboundedFrom(0) .withTimestampFn(new ValueAsTimestampFn()) .withRate(1, period)) .withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diffs only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); Instant started = Instant.now(); p.run(); Instant finished = Instant.now(); Duration expectedDuration = period.multipliedBy((int) numElements); assertThat(started.plus(expectedDuration).isBefore(finished), is(true)); }
@Test @Category(NeedsRunner.class) public void testBoundedToUnboundedSourceAdapter() throws Exception { long numElements = 100; BoundedSource<Long> boundedSource = CountingSource.upTo(numElements); UnboundedSource<Long, Checkpoint<Long>> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource); PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements)); // Count == numElements PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements); // Unique count == numElements PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally())) .isEqualTo(numElements); // Min == 0 PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L); // Max == numElements-1 PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1); p.run(); }
@Test @Category(NeedsRunner.class) public void testUnboundedSourceTimestamps() { long numElements = 1000; PCollection<Long> input = p.apply( Read.from(CountingSource.unboundedWithTimestampFn(new ValueAsTimestampFn())) .withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diffs only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
private void test(boolean dedup, boolean timeBound) throws Exception { TestCountingSource source = new TestCountingSource(Integer.MAX_VALUE).withoutSplitting(); if (dedup) { source = source.withDedup(); } PCollection<KV<Integer, Integer>> output = timeBound ? p.apply(Read.from(source).withMaxReadTime(Duration.millis(200))) : p.apply(Read.from(source).withMaxNumRecords(NUM_RECORDS)); // Because some of the NUM_RECORDS elements read are dupes, the final output // will only have output from 0 to n where n < NUM_RECORDS. PAssert.that(output).satisfies(new Checker(dedup, timeBound)); p.run(); }
@Test public void testForwardsDisplayData() { TestCountingSource src = new TestCountingSource(1234) { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; BoundedReadFromUnboundedSource<KV<Integer, Integer>> read = Read.from(src).withMaxNumRecords(5); assertThat(DisplayData.from(read), includesDisplayDataFor("source", src)); }
@Test @Category(NeedsRunner.class) public void testUnboundedSource() { long numElements = 1000; PCollection<Long> input = p.apply(Read.from(CountingSource.unbounded()).withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); p.run(); }