DataSource<String> datasource = new SimpleHadoopTextFileSource(uri.toString()); Dataset<String> in = flow.createInput(datasource); return FlatMap.named("PARSE-INPUT") .of(in) .using(new UnaryFunctor<String, Pair<Long, String>>() {
/** * Collects Avro record as JSON string * * @param outSink * @param inSource * @throws Exception */ public static void runFlow( DataSink<String> outSink, DataSource<Pair<AvroKey<GenericData.Record>, NullWritable>> inSource) throws Exception { Flow flow = Flow.create("simple read avro"); Dataset<Pair<AvroKey<GenericData.Record>, NullWritable>> input = flow.createInput(inSource); final Dataset<String> output = FlatMap.named("avro2csv").of(input).using(AvroSourceTest::apply).output(); output.persist(outSink); Executor executor = new LocalExecutor(); executor.submit(flow).get(); }
return FlatMap.named("FORMAT-OUTPUT") .of(aggregated) .using(((Pair<String, Long> elem, Collector<String> context) -> {
final Dataset<String> words = FlatMap.named("TOKENIZER") .of(lines) .using((String line, Collector<String> c) ->
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> c.collect(s)) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNull(map.getEventTimeExtractor()); }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<String> mapped = FlatMap.named("FlatMap1") .of(dataset) .using((String s, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(s); }) .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap1", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); }
@Test public void testBuild_EventTimeExtractor() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 1); Dataset<BigDecimal> mapped = FlatMap.named("FlatMap2") .of(dataset) .using((String s, Collector<BigDecimal> c) -> c.collect(null)) .eventTimeBy(Long::parseLong) // ~ consuming the original input elements .output(); assertEquals(flow, mapped.getFlow()); assertEquals(1, flow.size()); FlatMap map = (FlatMap) flow.operators().iterator().next(); assertEquals(flow, map.getFlow()); assertEquals("FlatMap2", map.getName()); assertNotNull(map.getFunctor()); assertEquals(mapped, map.output()); assertNotNull(map.getEventTimeExtractor()); }