@Parameterized.Parameters public static List<Object[]> testParameters() { Object[][] params = { /* left-windowing, right-windowing, join-windowing, expected-failure */ {null, null, null, false}, {GlobalWindowing.get(), GlobalWindowing.get(), null, false}, {GlobalWindowing.get(), null, null, false}, {null, GlobalWindowing.get(), null, false}, {Time.of(Duration.ofMinutes(1)), null, null, true}, {null, Time.of(Duration.ofMinutes(1)), null, true}, {Time.of(Duration.ofMinutes(1)), Time.of(Duration.ofMinutes(1)), null, true}, {GlobalWindowing.get(), Time.of(Duration.ofMinutes(1)), null, true}, {Time.of(Duration.ofMinutes(1)), GlobalWindowing.get(), null, true}, {Time.of(Duration.ofMinutes(1)), null, Time.of(Duration.ofHours(1)), false}, {GlobalWindowing.get(), Time.of(Duration.ofMinutes(1)), Time.of(Duration.ofMinutes(1)), false}, {null, Time.of(Duration.ofMinutes(1)), GlobalWindowing.get(), false}, {Time.of(Duration.ofMinutes(1)), null, Count.of(10), false}, {Time.of(Duration.ofMinutes(1)), Count.of(11), GlobalWindowing.get(), false}, {Time.of(Duration.ofMinutes(1)), Count.of(11), Time.of(Duration.ofMinutes(1)), false} }; return Arrays.asList(params); }
@Parameterized.Parameters public static List<Object[]> testParameters() { Object[][] params = { /* left-windowing, right-windowing, join-windowing, expected-failure */ {null, null, null, false}, {GlobalWindowing.get(), GlobalWindowing.get(), null, false}, {GlobalWindowing.get(), null, null, false}, {null, GlobalWindowing.get(), null, false}, {Time.of(Duration.ofMinutes(1)), null, null, true}, {null, Time.of(Duration.ofMinutes(1)), null, true}, {Time.of(Duration.ofMinutes(1)), Time.of(Duration.ofMinutes(1)), null, true}, {GlobalWindowing.get(), Time.of(Duration.ofMinutes(1)), null, true}, {Time.of(Duration.ofMinutes(1)), GlobalWindowing.get(), null, true}, {Time.of(Duration.ofMinutes(1)), null, Time.of(Duration.ofHours(1)), false}, {GlobalWindowing.get(), Time.of(Duration.ofMinutes(1)), Time.of(Duration.ofMinutes(1)), false}, {null, Time.of(Duration.ofMinutes(1)), GlobalWindowing.get(), false}, {Time.of(Duration.ofMinutes(1)), null, Count.of(10), false}, {Time.of(Duration.ofMinutes(1)), Count.of(11), GlobalWindowing.get(), false}, {Time.of(Duration.ofMinutes(1)), Count.of(11), Time.of(Duration.ofMinutes(1)), false} }; return Arrays.asList(params); }
@Override protected Dataset<List<Pair<Integer, List<Integer>>>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, List<Integer>>> reducedByWindow = ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues(Integer::compare) .windowBy(Count.of(3)) .output(); return ReduceWindow.of(reducedByWindow) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues((l, r) -> { int cmp = l.getFirst().compareTo(r.getFirst()); if (cmp == 0) { int firstLeft = l.getSecond().get(0); int firstRight = r.getSecond().get(0); cmp = Integer.compare(firstLeft, firstRight); } return cmp; }) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<List<Pair<Integer, List<Integer>>>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, List<Integer>>> reducedByWindow = ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues(Integer::compare) .windowBy(Count.of(3)) .output(); return ReduceWindow.of(reducedByWindow) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues((l, r) -> { int cmp = l.getFirst().compareTo(r.getFirst()); if (cmp == 0) { int firstLeft = l.getSecond().get(0); int firstRight = r.getSecond().get(0); cmp = Integer.compare(firstLeft, firstRight); } return cmp; }) .windowBy(GlobalWindowing.get()) .output(); }
@Test public void testJoinOnBatch() throws Exception { testJoin(false, GlobalWindowing.get(), true, asList(I.of("one 1"), I.of("two 1"), I.of("one 22"), I.of("one 44")), asList(I.of("one 10"), I.of("two 20"), I.of("one 33"), I.of("three 55"), I.of("one 66")), asList("one, 11", "one, 34", "one, 67", "one, 32", "one, 55", "one, 88", "one, 54", "one, 77", "one, 110", "two, 21"), false); }
@Test public void testOuterJoinOnBatch() throws Exception { testJoin(true, GlobalWindowing.get(), true, asList(I.of("one 1"), I.of("two 1"), I.of("one 22"), I.of("one 44")), asList(I.of("one 10"), I.of("two 20"), I.of("one 33"), I.of("three 55"), I.of("one 66")), asList( "one, 11", "one, 34", "one, 67", "one, 32", "one, 55", "one, 88", "one, 54", "one, 77", "one, 110", "two, 21", "three, 55"), false); }
@Test public void testOneArmLongerJoin() throws Exception { testJoin(false, GlobalWindowing.get(), true, asList(I.of("one 1"), I.of("two 1"), I.of("one 22"), I.of("one 44")), asList(I.of("one 10"), I.of("two 20"), I.of("one 33"), I.of("three 55"), I.of("one 66")), asList("one, 11", "one, 34", "one, 67", "one, 32", "one, 55", "one, 88", "one, 54", "one, 77", "one, 110", "two, 21"), true); } }
@Test public void testWrite() throws IOException { List<String> data = Arrays.asList("a", "b", "bbb", "bbbb", "c", "xy"); List<String> inputs = data.stream() .sorted(Comparator.reverseOrder()) .collect(Collectors.toList()); File tmp = folder.newFolder(); tmp.deleteOnExit(); ListDataSource<String> source = ListDataSource.unbounded(inputs); Dataset<String> input = flow.createInput(source); MapElements.of(input) .using(HBaseTestCase::kv) .output() .persist(traceLoading(HFileSink.newBuilder() .withTable(table.getNameAsString()) .withConfiguration(cluster.getConfiguration()) .withOutputPath(new Path("file://" + tmp.getPath())) .windowBy(GlobalWindowing.get(), w -> "global") .build())); new LocalExecutor().submit(flow).join(); // we should not have success marker assertFalse(new File(tmp, "_SUCCESS").exists()); assertEquals(Collections.singletonList("file:" + tmp.getPath() + "/global"), loadedPaths); // validate that the bulk load directory was deleted assertFalse(new File(tmp + "/global", "t").exists()); // validate that the data have been written to hbase data.forEach(s -> assertArrayEquals(b(s), get(s))); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
.withOutputPath(new Path(tmpDir)) .applyIf(ds.isBounded(), b -> b.windowBy(GlobalWindowing.get(), w -> ""), b -> b.windowBy( Time.of(Duration.ofMinutes(5)), w -> String.valueOf(w.getStartMillis())))
.windowBy(GlobalWindowing.get()) .output();