@Override @SuppressWarnings("unchecked") public JavaRDD<?> translate(FlatMap operator, SparkExecutorContext context) { final JavaRDD<?> input = context.getSingleInput(operator); final UnaryFunctor<?, ?> mapper = operator.getFunctor(); final ExtractEventTime<?> evtTimeFn = operator.getEventTimeExtractor(); LazyAccumulatorProvider accumulators = new LazyAccumulatorProvider(context.getAccumulatorFactory(), context.getSettings()); if (evtTimeFn != null) { return input .flatMap(new EventTimeAssigningUnaryFunctor(mapper, evtTimeFn, accumulators)) .setName(operator.getName() + "::event-time-and-apply-udf"); } else { return input .flatMap(new UnaryFunctorWrapper(mapper, accumulators)) .setName(operator.getName() + "::apply-udf"); } } }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.of(dataset) .using((String s, Collector<String> c) -> c.collect(s)) .output(); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals("FlatMap", map.getName()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> c.collect(s)) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNull(map.getEventTimeExtractor()); }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(s); }) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); }
@Test public void testBuild_EventTimeExtractor() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<BigDecimal> mapped = FlatMap.named("FlatMap2") .of(dataset) .using((String s, Collector<BigDecimal> c) -> c.collect(null)) .eventTimeBy(Long::parseLong) // ~ consuming the original input elements .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap2", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNotNull(map.getEventTimeExtractor()); }