void runPickAnyTest(final List<String> lines, int limit) { checkArgument(new HashSet<>(lines).size() == lines.size(), "Duplicates are unsupported."); PCollection<String> input = p.apply(Create.of(lines).withCoder(StringUtf8Coder.of())); PCollection<String> output = input.apply(Sample.any(limit)); PAssert.that(output).satisfies(new VerifyAnySample(lines, limit)); p.run(); }
@Test @Category(ValidatesRunner.class) public void testParDo() { List<Integer> inputs = Arrays.asList(3, -42, 666); PCollection<String> output = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestDoFn())); PAssert.that(output).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)); pipeline.run(); }
@Test @Category(ValidatesRunner.class) public void testParDoEmpty() { List<Integer> inputs = Arrays.asList(); PCollection<String> output = pipeline .apply(Create.of(inputs).withCoder(VarIntCoder.of())) .apply("TestDoFn", ParDo.of(new TestDoFn())); PAssert.that(output).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testSampleMultiplicity() { PCollection<Integer> input = pipeline.apply( Create.of(ImmutableList.copyOf(REPEATED_DATA)).withCoder(BigEndianIntegerCoder.of())); // At least one value must be selected with multiplicity. PCollection<Iterable<Integer>> output = input.apply(Sample.fixedSizeGlobally(6)); PAssert.thatSingletonIterable(output).satisfies(new VerifyCorrectSample<>(6, REPEATED_DATA)); pipeline.run(); }
@Test public void testSampleVariance() { // a special case of aggregator with two parameters String sql = "SELECT COVAR_SAMP(f_int1, f_int2) FROM PCOLLECTION GROUP BY f_int3"; // COVAR_SAMP(f_int1, f_int2) = // (SUM(f_int1 * f_int2) - SUM(f_int1) * SUM(f_int2) / n) / (n-1) = // (SUM([1 * 5, 2 * 1, 3 * 2]) - SUM([1, 2, 3]) * SUM([5, 1, 2]) / 3) / 2 = // -1.5 PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(-1)); pipeline.run(); }
@Test public void testCountNulls() { String sql = "SELECT SUM(CASE f_int1 IS NULL WHEN TRUE THEN 1 ELSE 0 END) " + "FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(4)); pipeline.run(); }
@Test public void testCountThroughSum() { String sql = "SELECT SUM(CASE f_int1 IS NOT NULL WHEN TRUE THEN 1 ELSE 0 END) " + "FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(3)); pipeline.run(); }
@Test public void testSampleVarianceInt() { String sql = "SELECT VAR_SAMP(f_int) FROM PCOLLECTION GROUP BY f_int2"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(30)); pipeline.run().waitUntilFinish(); } }
@Test public void testPopulationVarianceDouble() { String sql = "SELECT VAR_POP(f_double) FROM PCOLLECTION GROUP BY f_int2"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))) .satisfies(matchesScalar(26.40816326, PRECISION)); pipeline.run().waitUntilFinish(); }
/** Test reading metadata by reading back the id of a document after writing it. */ void testReadWithMetadata() throws Exception { if (!useAsITests) { ElasticSearchIOTestUtils.insertTestDocuments(connectionConfiguration, 1, restClient); } PCollection<String> output = pipeline.apply( ElasticsearchIO.read() .withConnectionConfiguration(connectionConfiguration) .withMetadata()); PAssert.that(output).satisfies(new ContainsStringCheckerFn("\"_id\":\"0\"")); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testSampleAnyEmpty() { PCollection<Integer> input = pipeline.apply(Create.empty(BigEndianIntegerCoder.of())); PCollection<Integer> output = input .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3)))) .apply(Sample.any(2)); PAssert.that(output).satisfies(new VerifyCorrectSample<>(0, EMPTY)); pipeline.run(); }
@Test public void testSampleVarianceInt() { String sql = "SELECT COVAR_SAMP(f_int1, f_int2) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(2)); pipeline.run().waitUntilFinish(); } }
@Test public void testSampleVarianceDouble() { String sql = "SELECT COVAR_SAMP(f_double1, f_double2) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))) .satisfies(matchesScalar(2.3, PRECISION)); pipeline.run().waitUntilFinish(); }
@Test public void testPopulationVarianceDouble() { String sql = "SELECT COVAR_POP(f_double1, f_double2) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))) .satisfies(matchesScalar(1.84, PRECISION)); pipeline.run().waitUntilFinish(); }
@Test @Category(NeedsRunner.class) public void testSampleZero() { PCollection<Integer> input = pipeline.apply( Create.of(ImmutableList.copyOf(DATA)).withCoder(BigEndianIntegerCoder.of())); PCollection<Iterable<Integer>> output = input.apply(Sample.fixedSizeGlobally(0)); PAssert.thatSingletonIterable(output).satisfies(new VerifyCorrectSample<>(0, DATA)); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testSample() { PCollection<Integer> input = pipeline.apply( Create.of(ImmutableList.copyOf(DATA)).withCoder(BigEndianIntegerCoder.of())); PCollection<Iterable<Integer>> output = input.apply(Sample.fixedSizeGlobally(3)); PAssert.thatSingletonIterable(output).satisfies(new VerifyCorrectSample<>(3, DATA)); pipeline.run(); }
@Test public void testSampleVarianceReverse() { String sql = "SELECT COVAR_SAMP(f_int2, f_int1) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(-1)); pipeline.run(); } }
@Test public void testSum() { String sql = "SELECT SUM(f_int1) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(6)); pipeline.run(); }
@Test public void testCount() { String sql = "SELECT COUNT(f_int1) FROM PCOLLECTION GROUP BY f_int3"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))).satisfies(matchesScalar(3L)); pipeline.run(); }
@Test public void testSampleVarianceDouble() { String sql = "SELECT VAR_SAMP(f_double) FROM PCOLLECTION GROUP BY f_int2"; PAssert.that(boundedInput.apply(SqlTransform.query(sql))) .satisfies(matchesScalar(30.80952381, PRECISION)); pipeline.run().waitUntilFinish(); }