@Test public void testSourceConfInit() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = new SchemeWithProperties( new Fields( "line" ) ); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); Tap sink = getPlatform().getTextFile( getOutputPath( "sourceconfinit" ), SinkMode.REPLACE ); Properties properties = new Properties(); properties.setProperty( "default", "connector-default" ); properties.setProperty( "replace", "connector-replace" ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testSinkConfInit() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); Scheme scheme = new SchemeWithProperties( new Fields( "line" ) ); Tap sink = getPlatform().getTap( scheme, getOutputPath( "sinkconfinit" ), SinkMode.REPLACE ); Properties properties = new Properties(); properties.setProperty( "default", "connector-default" ); properties.setProperty( "replace", "connector-replace" ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testDupeConfigFromScheme() throws IOException { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTap( new DupeConfigScheme( new Fields( "offset", "line" ) ), inputFileUpper, SinkMode.KEEP ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "dupeconfig" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); // by default the source is decorated with a DistCacheTap which uses a Lfs tap to read the local file, so it is safe // to call #sourceConfInit a second time client side as we are leveraging a new tap instance Properties properties = flowConnectorProps() .setEnableDecorateAccumulatedTap( false ) .buildProperties( getProperties() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testDupeConfigFromScheme() throws IOException { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTap( new DupeConfigScheme( new Fields( "offset", "line" ) ), inputFileUpper, SinkMode.KEEP ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "dupeconfig" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); // by default the source is decorated with a DistCacheTap which uses a Lfs tap to read the local file, so it is safe // to call #sourceConfInit a second time client side as we are leveraging a new tap instance Properties properties = flowConnectorProps() .setEnableDecorateAccumulatedTap( false ) .buildProperties( getProperties() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testTrapTapSourceSink() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Scheme scheme = getPlatform().getTestFailScheme(); Tap source = getPlatform().getTap( scheme, inputFileApache, SinkMode.KEEP ); Pipe pipe = new Pipe( "map" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); Tap sink = getPlatform().getTap( scheme, getOutputPath( "trapsourcesink/sink" ), SinkMode.REPLACE ); Tap trap = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "trapsourcesink/trap" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); // compensate for running in cluster mode getPlatform().setNumMapTasks( properties, 1 ); getPlatform().setNumReduceTasks( properties, 1 ); getPlatform().setNumGatherPartitionTasks( properties, 1 ); Flow flow = getPlatform().getFlowConnector( properties ).connect( "trap test", source, sink, trap, pipe ); flow.complete(); validateLength( flow.openTapForRead( getPlatform().getTextFile( sink.getIdentifier() ) ), 7 ); validateLength( flow.openTrap(), 2, Pattern.compile( "bad data" ) ); // confirm the payload is written }
@Test public void testTrapTapSourceSink() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Scheme scheme = getPlatform().getTestFailScheme(); Tap source = getPlatform().getTap( scheme, inputFileApache, SinkMode.KEEP ); Pipe pipe = new Pipe( "map" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); Tap sink = getPlatform().getTap( scheme, getOutputPath( "trapsourcesink/sink" ), SinkMode.REPLACE ); Tap trap = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "trapsourcesink/trap" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); // compensate for running in cluster mode getPlatform().setNumMapTasks( properties, 1 ); getPlatform().setNumReduceTasks( properties, 1 ); getPlatform().setNumGatherPartitionTasks( properties, 1 ); Flow flow = getPlatform().getFlowConnector( properties ).connect( "trap test", source, sink, trap, pipe ); flow.complete(); validateLength( flow.openTapForRead( getPlatform().getTextFile( sink.getIdentifier() ) ), 7 ); validateLength( flow.openTrap(), 2, Pattern.compile( "bad data" ) ); // confirm the payload is written }
@Test public void testTapSourceConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls source.getConfigDef().setProperty( Mode.DEFAULT, "default", "source-default" ); // steps on above value source.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); source.getConfigDef().setProperty( Mode.DEFAULT, "replace", "source-default" ); source.getConfigDef().setProperty( Mode.REPLACE, "replace", "source-replace" ); source.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); source.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( getOutputPath( "tapsourceconfigdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSinkConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap sink = getPlatform().getTap( scheme, getOutputPath( "tapsinkconfigdef" ), SinkMode.REPLACE ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls sink.getConfigDef().setProperty( Mode.DEFAULT, "default", "sink-default" ); // steps on above value sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); sink.getConfigDef().setProperty( Mode.DEFAULT, "replace", "sink-default" ); sink.getConfigDef().setProperty( Mode.REPLACE, "replace", "sink-replace" ); sink.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); sink.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSourceConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls source.getConfigDef().setProperty( Mode.DEFAULT, "default", "source-default" ); // steps on above value source.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); source.getConfigDef().setProperty( Mode.DEFAULT, "replace", "source-default" ); source.getConfigDef().setProperty( Mode.REPLACE, "replace", "source-replace" ); source.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); source.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( getOutputPath( "tapsourceconfigdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSinkConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap sink = getPlatform().getTap( scheme, getOutputPath( "tapsinkconfigdef" ), SinkMode.REPLACE ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls sink.getConfigDef().setProperty( Mode.DEFAULT, "default", "sink-default" ); // steps on above value sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); sink.getConfigDef().setProperty( Mode.DEFAULT, "replace", "sink-default" ); sink.getConfigDef().setProperty( Mode.REPLACE, "replace", "sink-replace" ); sink.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); sink.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test(expected = CascadingException.class) public void testTrapFailure() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); Scheme scheme = getPlatform().getTestFailScheme(); Tap trap2 = getPlatform().getTap( scheme, getOutputPath( "trapFailure/badTrap" ), SinkMode.REPLACE ); Tap sink = getPlatform().getTextFile( getOutputPath( "trapFailure/tap" ), SinkMode.REPLACE ); Pipe pipe = new Each( new Pipe( "firstPipe" ), new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( new Pipe( "secondPipe", pipe ), new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL ); Tap trap1 = getPlatform().getTextFile( getOutputPath( "trapFailure/firstTrap" ), SinkMode.REPLACE ); FlowDef flowDef = FlowDef.flowDef() .addSource( "firstPipe", source ) .addTrap( "firstPipe", trap1 ) .addTrap( "secondPipe", trap2 ) .addTail( pipe ) .addSink( pipe, sink ); Flow flow = getPlatform().getFlowConnector().connect( flowDef ); flow.complete(); } }
@Test(expected = CascadingException.class) public void testTrapFailure() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); Scheme scheme = getPlatform().getTestFailScheme(); Tap trap2 = getPlatform().getTap( scheme, getOutputPath( "trapFailure/badTrap" ), SinkMode.REPLACE ); Tap sink = getPlatform().getTextFile( getOutputPath( "trapFailure/tap" ), SinkMode.REPLACE ); Pipe pipe = new Each( new Pipe( "firstPipe" ), new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( new Pipe( "secondPipe", pipe ), new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL ); Tap trap1 = getPlatform().getTextFile( getOutputPath( "trapFailure/firstTrap" ), SinkMode.REPLACE ); FlowDef flowDef = FlowDef.flowDef() .addSource( "firstPipe", source ) .addTrap( "firstPipe", trap1 ) .addTrap( "secondPipe", trap2 ) .addTail( pipe ) .addSink( pipe, sink ); Flow flow = getPlatform().getFlowConnector().connect( flowDef ); flow.complete(); } }