@Test public void testResolvedSinkFields() throws IOException { getPlatform().copyFromLocal( inputFileLower ); Tap source = new Hfs( new TextLine( new Fields( "line" ) ), inputFileLower ); Pipe pipe = new Pipe( "test" ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); pipe = new Each( pipe, new Fields( "line" ), splitter ); Tap sink = new Hfs( new ResolvedScheme( new Fields( "num", "char" ) ), getOutputPath( "resolvedfields" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector( getProperties() ).connect( source, sink, pipe ); flow.complete(); List<Tuple> tuples = asList( flow, sink ); List<Object> values = new ArrayList<Object>(); for( Tuple tuple : tuples ) values.add( tuple.getObject( 1 ) ); assertTrue( values.contains( "1\ta" ) ); assertTrue( values.contains( "2\tb" ) ); assertTrue( values.contains( "3\tc" ) ); assertTrue( values.contains( "4\td" ) ); assertTrue( values.contains( "5\te" ) ); assertEquals( 5, tuples.size() ); // confirm the tuple iterator can handle nulls from the source assertEquals( 5, asList( flow, source ).size() ); }
@Test public void testResolvedSinkFields() throws IOException { getPlatform().copyFromLocal( inputFileLower ); Tap source = new Hfs( new TextLine( new Fields( "line" ) ), inputFileLower ); Pipe pipe = new Pipe( "test" ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); pipe = new Each( pipe, new Fields( "line" ), splitter ); Tap sink = new Hfs( new ResolvedScheme( new Fields( "num", "char" ) ), getOutputPath( "resolvedfields" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector( getProperties() ).connect( source, sink, pipe ); flow.complete(); List<Tuple> tuples = asList( flow, sink ); List<Object> values = new ArrayList<Object>(); for( Tuple tuple : tuples ) values.add( tuple.getObject( 1 ) ); assertTrue( values.contains( "1\ta" ) ); assertTrue( values.contains( "2\tb" ) ); assertTrue( values.contains( "3\tc" ) ); assertTrue( values.contains( "4\td" ) ); assertTrue( values.contains( "5\te" ) ); assertEquals( 5, tuples.size() ); // confirm the tuple iterator can handle nulls from the source assertEquals( 5, asList( flow, source ).size() ); }
@Test public void testLfs() throws URISyntaxException, IOException { Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" ); String path = tap.getFullIdentifier( getPlatform().getFlowProcess() ); assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) ); new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" ); try { new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" ); fail( "not valid url" ); } catch( Exception exception ) { } }
@Test public void testLfs() throws URISyntaxException, IOException { Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" ); String path = tap.getFullIdentifier( getPlatform().getFlowProcess() ); assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) ); new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" ); try { new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" ); fail( "not valid url" ); } catch( Exception exception ) { } }
@Test public void testDupeConfigFromScheme() throws IOException { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTap( new DupeConfigScheme( new Fields( "offset", "line" ) ), inputFileUpper, SinkMode.KEEP ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "dupeconfig" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); // by default the source is decorated with a DistCacheTap which uses a Lfs tap to read the local file, so it is safe // to call #sourceConfInit a second time client side as we are leveraging a new tap instance Properties properties = flowConnectorProps() .setEnableDecorateAccumulatedTap( false ) .buildProperties( getProperties() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testDupeConfigFromScheme() throws IOException { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTap( new DupeConfigScheme( new Fields( "offset", "line" ) ), inputFileUpper, SinkMode.KEEP ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "dupeconfig" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); // by default the source is decorated with a DistCacheTap which uses a Lfs tap to read the local file, so it is safe // to call #sourceConfInit a second time client side as we are leveraging a new tap instance Properties properties = flowConnectorProps() .setEnableDecorateAccumulatedTap( false ) .buildProperties( getProperties() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testHfsAsterisk() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "*" ); assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) ); TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() ); assertTrue( iterator.hasNext() ); iterator.close(); try { Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" ); iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() ); fail(); } catch( IOException exception ) { // do nothing } }
@Test public void testHfsAsterisk() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "*" ); assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) ); TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() ); assertTrue( iterator.hasNext() ); iterator.close(); try { Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" ); iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() ); fail(); } catch( IOException exception ) { // do nothing } }
@Test public void testHfsBracketAsterisk() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "{*}" ); assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) ); TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() ); assertTrue( iterator.hasNext() ); iterator.close(); try { Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" ); iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() ); fail(); } catch( IOException exception ) { // do nothing } }
@Test public void testHfsBracketAsterisk() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "{*}" ); assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) ); TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() ); assertTrue( iterator.hasNext() ); iterator.close(); try { Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" ); iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() ); fail(); } catch( IOException exception ) { // do nothing } }