@Override protected Dataset<String> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunction<Integer, String>) String::valueOf) .output(); }
/** * Finalizes the operator and retrieves its output dataset. * Using this output new operator {@link MapElements} is added * to the flow to extract values from pairs. * * @param outputHints output dataset description * @return the dataset representing the new operator's output */ default Dataset<V> outputValues(OutputHint... outputHints) { return MapElements .named("extract-values") .of(output()) .using(Pair::getSecond) .output(outputHints); } }
/** * This is not a basic operator. It can be straightforwardly implemented * by using {@code FlatMap} operator. * @return the operator chain representing this operation including FlatMap */ @Override public DAG<Operator<?, ?>> getBasicOps() { return DAG.of( // do not use the client API here, because it modifies the Flow! new FlatMap<IN, OUT>(getName(), getFlow(), input, (i, c) -> c.collect(mapper.apply(i, c.asContext())), null, getHints())); }
@Override public Dataset<OUT> output(OutputHint... outputHints) { Flow flow = input.getFlow(); MapElements<IN, OUT> map = new MapElements<>(name, flow, input, mapper, Sets.newHashSet(outputHints)); flow.add(map); return map.output(); } }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = MapElements.of(dataset) .using(s -> s) .output(); MapElements map = (MapElements) flow.operators().iterator().next(); assertEquals("MapElements", map.getName()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = MapElements.named("Map1") .of(dataset) .using(s -> s) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); MapElements map = (MapElements) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("Map1", map.getName()); assertNotNull(map.getMapper()); assertEquals(mapped, map.output()); }
@Override public DAG<Operator<?, ?>> getBasicOps() { Flow flow = input.getFlow(); String name = getName() + "::" + "ReduceByKey"; ReduceByKey<IN, ELEM, Void, Void, W> reduce = new ReduceByKey<>(name, flow, input, getKeyExtractor(), null, e -> null, windowing, (CombinableReduceFunction<Void>) e -> null, Collections.emptySet()); MapElements format = new MapElements<>( getName() + "::" + "Map", flow, reduce.output(), Pair::getFirst, getHints()); DAG<Operator<?, ?>> dag = DAG.of(reduce); dag.add(format, reduce); return dag; } }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = MapElements.named("Map1") .of(dataset) .using((input, context) -> { // use simple counter context.getCounter("my-counter").increment(); return input.toLowerCase(); }) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); MapElements map = (MapElements) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("Map1", map.getName()); assertNotNull(map.getMapper()); assertEquals(mapped, map.output()); }
@Override public Dataset<OUT> output(OutputHint... outputHints) { Flow flow = input.getFlow(); MapElements<IN, OUT> map = new MapElements<>(name, flow, input, mapper, Sets.newHashSet(outputHints)); flow.add(map); return map.output(); } }
@Override public DAG<Operator<?, ?>> getBasicOps() { Flow flow = input.getFlow(); String name = getName() + "::" + "ReduceByKey"; ReduceByKey<IN, ELEM, Void, Void, W> reduce = new ReduceByKey<>(name, flow, input, getKeyExtractor(), null, e -> null, windowing, (CombinableReduceFunction<Void>) e -> null, Collections.emptySet()); MapElements format = new MapElements<>( getName() + "::" + "Map", flow, reduce.output(), Pair::getFirst, getHints()); DAG<Operator<?, ?>> dag = DAG.of(reduce); dag.add(format, reduce); return dag; } }
@Override protected Dataset<String> getOutput(Dataset<Integer> input) { return MapElements.of(input) .using((UnaryFunction<Integer, String>) String::valueOf) .output(); }
/** * This is not a basic operator. It can be straightforwardly implemented * by using {@code FlatMap} operator. * @return the operator chain representing this operation including FlatMap */ @Override public DAG<Operator<?, ?>> getBasicOps() { return DAG.of( // do not use the client API here, because it modifies the Flow! new FlatMap<IN, OUT>(getName(), getFlow(), input, (i, c) -> c.collect(mapper.apply(i, c.asContext())), null, getHints())); }
/** * Finalizes the operator and retrieves its output dataset. * Using this output new operator {@link MapElements} is added * to the flow to extract values from pairs. * * @param outputHints output dataset description * @return the dataset representing the new operator's output */ default Dataset<V> outputValues(OutputHint... outputHints) { return MapElements .named("extract-values") .of(output()) .using(Pair::getSecond) .output(outputHints); } }
final Flow flow = getFlow(); final MapElements<LEFT, Either<LEFT, RIGHT>> leftMap = new MapElements<>( getName() + "::Map-left", flow, left, Either::left); final MapElements<RIGHT, Either<LEFT, RIGHT>> rightMap = new MapElements<>( getName() + "::Map-right", flow, right, Either::right); Arrays.asList(leftMap.output(), rightMap.output()));
new MapElements<>(getName() + "::MapElements", flow, reduce.output(), e -> Triple.of( e.getFirst(),
public <S> Dataset<S> mapElements(UnaryFunction<T, S> f) { return new Dataset<>(MapElements.of(this.wrap).using(requireNonNull(f)).output()); }
final Dataset<LogLine> parsed = MapElements.named("LOG-PARSER") .of(lines) .using(LogParser::parseLine)
final Flow flow = getFlow(); final MapElements<LEFT, Either<LEFT, RIGHT>> leftMap = new MapElements<>( getName() + "::Map-left", flow, left, Either::left); final MapElements<RIGHT, Either<LEFT, RIGHT>> rightMap = new MapElements<>( getName() + "::Map-right", flow, right, Either::right); Arrays.asList(leftMap.output(), rightMap.output()));
new MapElements<>(getName() + "::MapElements", flow, reduce.output(), e -> Triple.of( e.getFirst(),