@Test public void testTemporarySinkPathIsDeleted() throws Exception { getPlatform().copyFromLocal( inputFileLowerOffset ); Tap source = getPlatform().getDelimitedFile( new Fields( "a", "b" ), " ", inputFileLowerOffset ); Pipe pipe = new Pipe( "test" ); String outputPath = getOutputPath( "partition-tap-sink" ); Tap sink = getPlatform().getDelimitedFile( new Fields( "a" ), " ", outputPath ); sink = getPlatform().getPartitionTap( sink, new DelimitedPartition( new Fields( "b" ) ), 1 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); Path tempPath = new Path( outputPath, Hadoop18TapUtil.TEMPORARY_PATH ); FileSystem fileSystem = tempPath.getFileSystem( (Configuration) flow.getConfigCopy() ); assertFalse( fileSystem.exists( tempPath ) ); }
@Test public void testSourceConfInit() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = new SchemeWithProperties( new Fields( "line" ) ); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); Tap sink = getPlatform().getTextFile( getOutputPath( "sourceconfinit" ), SinkMode.REPLACE ); Properties properties = new Properties(); properties.setProperty( "default", "connector-default" ); properties.setProperty( "replace", "connector-replace" ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@After public void tearDown() throws Exception { try { for( String path : currentPaths ) { LOG.info( "copying to local {}", path ); if( getPlatform().isUseCluster() && getPlatform().remoteExists( path ) ) getPlatform().copyToLocal( path ); } currentPaths.clear(); } finally { getPlatform().tearDown(); } } }
protected TupleEntryIterator getTable( String tableName ) throws IOException { Tap tap = getPlatform().getDelimitedFile( ",", "\"", new SQLTypeResolver(), TEST_ROOT + tableName + ".tcsv", SinkMode.KEEP ); tap.retrieveSourceFields( getPlatform().getFlowProcess() ); return tap.openForRead( getPlatform().getFlowProcess() ); }
@Test public void testMultiSourceIterator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Tap source = new MultiSourceTap( sourceLower, sourceUpper ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 ); }
if( getPlatform().isMapReduce() && getPlatform().isUseCluster() ) return; getPlatform().copyFromLocal( inputFileLowerOffset ); getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLowerOffset = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLowerOffset ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "joinmixed" ), SinkMode.REPLACE ); Pipe splice = new HashJoin( pipes, fields, Fields.size( 6 ), join ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );
@Test public void testCopy() throws Exception { getPlatform().copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "copy" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5, null ); }
@Test public void testTupleEntryWriter() throws Exception { getPlatform().copyFromLocal( inputFileNums20 ); FlowProcess flowProcess = getPlatform().getFlowProcess(); Tap source = getPlatform().getDelimitedFile( new Fields( "num", Integer.class ), " ", inputFileNums20 ); Tap sink = getPlatform().getDelimitedFile( new Fields( "num", Integer.class ), " ", getOutputPath() ); Stream<TupleEntry> stream = TupleEntryStream.entryStream( source, flowProcess ); Tap result = TupleEntryStream.writeEntry( stream::iterator, sink, flowProcess ); assertEquals( 20, TupleEntryStream.entryStream( result, flowProcess ).count() ); }
private void runComprehensiveCase( Boolean[] testCase, boolean useCollectionsComparator ) throws IOException { getPlatform().copyFromLocal( inputFileCrossNulls ); String test = Util.join( testCase, "_", true ) + "_" + useCollectionsComparator; String path = "comprehensive/" + test; Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileCrossNulls ); Tap sink = getPlatform().getDelimitedFile( new Fields( "num", "lower", "upper" ).applyTypes( Long.class, String.class, String.class ), " ", getOutputPath( path ), SinkMode.REPLACE ); sink.getScheme().setNumSinkParts( 1 ); Pipe pipe = new Pipe( "comprehensivesort" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ), "\\s" ) ); pipe = new Each( pipe, new Fields( "num" ), new Identity( Long.class ), Fields.REPLACE ); Fields groupFields = new Fields( "num" ); if( testCase[ 0 ] ) groupFields.setComparator( "num", useCollectionsComparator ? new NullSafeReverseComparator() : getPlatform().getLongComparator( true ) ); Fields sortFields = null; if( testCase[ 1 ] != null ) { sortFields = new Fields( "upper" ); if( testCase[ 1 ] ) sortFields.setComparator( "upper", useCollectionsComparator ? new NullSafeReverseComparator() : getPlatform().getStringComparator( true ) ); } pipe = new GroupBy( pipe, groupFields, sortFields, testCase[ 2 ] ); Map<Object, Object> properties = getProperties(); if( getPlatform().isMapReduce() && getPlatform().getNumMapTasks( properties ) != null ) getPlatform().setNumMapTasks( properties, 13 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateCase( test, testCase, sink ); }
@Test public void testTrapTapSourceSink() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Scheme scheme = getPlatform().getTestFailScheme(); Tap source = getPlatform().getTap( scheme, inputFileApache, SinkMode.KEEP ); Pipe pipe = new Pipe( "map" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); Tap sink = getPlatform().getTap( scheme, getOutputPath( "trapsourcesink/sink" ), SinkMode.REPLACE ); Tap trap = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "trapsourcesink/trap" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); // compensate for running in cluster mode getPlatform().setNumMapTasks( properties, 1 ); getPlatform().setNumReduceTasks( properties, 1 ); getPlatform().setNumGatherPartitionTasks( properties, 1 ); Flow flow = getPlatform().getFlowConnector( properties ).connect( "trap test", source, sink, trap, pipe ); flow.complete(); validateLength( flow.openTapForRead( getPlatform().getTextFile( sink.getIdentifier() ) ), 7 ); validateLength( flow.openTrap(), 2, Pattern.compile( "bad data" ) ); // confirm the payload is written }
private void runCoGroupComparatorTest( String path, boolean reverseSort ) throws IOException, ParseException getPlatform().copyFromLocal( inputFileApache200 ); getPlatform().copyFromLocal( inputFileIps ); Tap sourceApache = getPlatform().getTextFile( inputFileApache200 ); Tap sourceIP = getPlatform().getTextFile( inputFileIps ); Tap sink = getPlatform().getTextFile( getOutputPath( path ), SinkMode.REPLACE ); groupApache.setComparator( "octet", getPlatform().getLongComparator( reverseSort ) ); groupIP.setComparator( "rawoctet", getPlatform().getLongComparator( reverseSort ) ); if( getPlatform().isMapReduce() && getPlatform().getNumMapTasks( properties ) != null ) getPlatform().setNumMapTasks( properties, 13 ); sources.put( "ip", sourceIP ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, pipe );
/** Tests that proper pipe graph is assembled without throwing an internal error */ @Test public void testPipeAssembly() { Pipe pipe = new TestAssembly( "test" ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); Tap source = getPlatform().getTextFile( "foo" ); Tap sink = getPlatform().getTextFile( "foo/split1", SinkMode.REPLACE ); List<FlowStep> steps = getPlatform().getFlowConnector().connect( source, sink, pipe ).getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
private void invokeRawAsKeyValue( boolean useDefaultComparator, boolean secondarySortOnValue, boolean ignoreSerializationToken, boolean compositeGrouping ) throws IOException getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); getPlatform().setNumMapTasks( properties, 1 ); getPlatform().setNumReduceTasks( properties, 1 ); getPlatform().setNumGatherPartitionTasks( properties, 1 ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice );
private Flow firstFlow( String path ) { Tap source = getPlatform().getTextFile( inputFileIps ); Pipe pipe = new Pipe( "first" ); pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( "ip" ) ), new Fields( "ip" ) ); Tap sink = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( path + "/first" ), SinkMode.REPLACE ); return getPlatform().getFlowConnector().connect( source, sink, pipe ); }
if( !getPlatform().isUseCluster() ) return; getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); final Flow flow = getPlatform().getFlowConnector( getProperties() ).connect( sources, sink, splice );
@Test public void testChildIdentifiers() throws Exception if( !getPlatform().isUseCluster() ) return; getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); tap.deleteResource( getPlatform().getFlowProcess() ); assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 0, false ) ); tap.createResource( getPlatform().getFlowProcess() );
@Test public void testHeaderAll() throws IOException { Fields fields = new Fields( "first", "second", "third", "fourth", "fifth" ); Tap input = getPlatform().getDelimitedFile( fields, true, true, ",", "\"", null, testDelimited, SinkMode.KEEP ); Tap output = getPlatform().getDelimitedFile( Fields.ALL, true, true, ",", "\"", null, getOutputPath( "headerall" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "pipe" ); Flow flow = getPlatform().getFlowConnector().connect( input, output, pipe ); flow.complete(); validateLength( flow, 11, 5 ); }
@Override public Tap supply( SinkMode mode ) { Tap partitionTap = getPlatform().getDelimitedFile( new Fields( "upper" ), "+", outputPath, mode ); Partition partition = new DelimitedPartition( new Fields( "lower", "number" ) ); partitionTap = getPlatform().getPartitionTap( partitionTap, partition, 1 ); return partitionTap; } }, new FlowSkipIfSinkExists() );
@Test public void testNotLocalMode() throws Exception { if( !getPlatform().isUseCluster() ) return; Tap source = new Hfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); FlowStep step = steps.get( 0 ); boolean isLocal = HadoopUtil.isLocal( (Configuration) ( (BaseFlowStep) step ).createInitializedConfig( flow.getFlowProcess(), ( (BaseHadoopPlatform) getPlatform() ).getConfiguration() ) ); assertTrue( "is local", !isLocal ); }
@Test public void testMultiSourceIterator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" ); GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" ); MultiSourceTap source = new MultiSourceTap( source1, source2 ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 ); GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" ); source = new MultiSourceTap( sourceMulti ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10, null ); }