public <S> Dataset<S> mapElements(UnaryFunction<T, S> f) { return new Dataset<>(MapElements.of(this.wrap).using(requireNonNull(f)).output()); }
@Test public void testDatasetConsumers() throws Exception { Dataset<Object> input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> transformed = MapElements.of(input).using(e -> e).output(); Dataset<Object> transformed2 = Filter.of(transformed).by(e -> false).output(); Dataset<Object> union = Union.of(transformed, transformed2).output(); assertEquals(1, input.getConsumers().size()); assertEquals(2, transformed.getConsumers().size()); assertEquals(1, transformed2.getConsumers().size()); assertEquals(0, union.getConsumers().size()); // the 'transformed' data set is consumed by Filter and Union operators assertEquals(toSet(Arrays.asList(Filter.class, Union.class)), toSet(transformed.getConsumers().stream().map(Object::getClass))); }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = MapElements.of(dataset) .using(s -> s) .output(); MapElements map = (MapElements) flow.operators().iterator().next(); assertEquals("MapElements", map.getName()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = MapElements.named("Map1") .of(dataset) .using(s -> s) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); MapElements map = (MapElements) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("Map1", map.getName()); assertNotNull(map.getMapper()); assertEquals(mapped, map.output()); }
/** * Finalizes the operator and retrieves its output dataset. * Using this output new operator {@link MapElements} is added * to the flow to extract values from pairs. * * @param outputHints output dataset description * @return the dataset representing the new operator's output */ default Dataset<V> outputValues(OutputHint... outputHints) { return MapElements .named("extract-values") .of(output()) .using(Pair::getSecond) .output(outputHints); } }
/** * Finalizes the operator and retrieves its output dataset. * Using this output new operator {@link MapElements} is added * to the flow to extract values from pairs. * * @param outputHints output dataset description * @return the dataset representing the new operator's output */ default Dataset<V> outputValues(OutputHint... outputHints) { return MapElements .named("extract-values") .of(output()) .using(Pair::getSecond) .output(outputHints); } }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Test public void testTestSimpleOutput() throws IOException { List<String> data = Arrays.asList("a", "b", "c"); ListDataSource<String> source = ListDataSource.unbounded(data); Dataset<String> input = flow.createInput(source); MapElements.of(input) .using(HBaseTestCase::put) .output() .persist(sink); new LocalExecutor().submit(flow).join(); for (String v : data) { assertArrayEquals(b(v), get(v)); } }
@Override protected Dataset<Integer> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunctionEnv<Integer, Integer>) (x, context) -> { context.getHistogram("dist").add(x, 1); return x; }) .output(); }
@Override public boolean prepareDataset(Dataset<OUT> output) { Dataset<IN> mapped = MapElements.of(output) .using(mapper) .output(); mapped.persist(sink); sink.prepareDataset(mapped); return true; }
@Override protected Dataset<Integer> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunctionEnv<Integer, Integer>) (x, context) -> { context.getHistogram("dist").add(x, 1); return x; }) .output(); }
@Override protected Dataset<String> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunction<Integer, String>) String::valueOf) .output(); }
@Override public boolean prepareDataset(Dataset<T> input) { MapElements.of(input) .using(i -> Pair.of(EMPTY, i)) .output() .persist(wrapped); return true; }
@Test public void testBuild_Hints() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> dataSetWithHint = MapElements.of(dataset).using(i -> i).output(SizeHint.FITS_IN_MEMORY); assertTrue(dataSetWithHint.getProducer().getHints().contains(SizeHint.FITS_IN_MEMORY)); assertEquals(1, dataSetWithHint.getProducer().getHints().size()); Dataset<String> dataSetWithoutHint = MapElements.of(dataset).using(i -> i).output(); assertEquals(0, dataSetWithoutHint.getProducer().getHints().size()); } }
/** * Persist given dataset into this sink via given mapper. * @param <T> input datatype * @param input the input dataset * @param mapper map function for transformation of input value into {@link Cell}. */ public <T> void persist(Dataset<T> input, UnaryFunction<T, Cell> mapper) { MapElements.of(input) .using(mapper) .output() .persist(this); }
@SuppressWarnings("unchecked") static <T, W extends Window> Dataset<Pair<W, T>> extractWindow(Dataset<T> input) { return MapElements.of(input) .using((e, ctx) -> Pair.of((W) ctx.getWindow(), e)) .output(); } }
@Override protected Dataset<String> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunction<Integer, String>) String::valueOf) .output(); }
@Override public boolean prepareDataset(Dataset<OUT> output) { Dataset<IN> mapped = MapElements.of(output) .using(mapper) .output(); mapped.persist(sink); sink.prepareDataset(mapped); return true; }
@SuppressWarnings("unchecked") static <T, W extends Window> Dataset<Pair<W, T>> extractWindow(Dataset<T> input) { return MapElements.of(input) .using((e, ctx) -> Pair.of((W) ctx.getWindow(), e)) .output(); } }