@Override protected Tap newTap( Scheme scheme, String identifier, SinkMode mode ) { return new FileTap( scheme, identifier, mode ); }
@Override public TapWith<Properties, InputStream, OutputStream> withChildIdentifier( String identifier ) { Path path = Paths.get( identifier ); if( !path.startsWith( getPath() ) ) path = getPath().resolve( path ); return create( getScheme(), path, getSinkMode() ); }
@Override public String[] getChildIdentifiers( Properties conf ) throws IOException { return getChildIdentifiers( conf, 1, false ); }
@Override public TapWith<Properties, InputStream, OutputStream> withScheme( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme ) { return create( scheme, getPath(), getSinkMode() ); }
@Override public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream output ) throws IOException { if( output == null ) output = new TapFileOutputStream( getOutputIdentifier( flowProcess ), isUpdate() ); // append if we are in update mode return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), output, getIdentifier() ); }
@Override public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException { if( input == null ) input = new FileInputStream( getIdentifier() ); flowProcess.getFlowProcessContext().setSourcePath( getFullIdentifier( flowProcess ) ); return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), input, getIdentifier() ); }
/** * Only used with {@link #openForWrite(FlowProcess, OutputStream)} calls. * * @param flowProcess */ protected String getOutputIdentifier( FlowProcess<? extends Properties> flowProcess ) { return getIdentifier(); }
private boolean getChildPaths( Set<String> results, String identifier, int depth ) { File file = new File( identifier ); if( depth == 0 || file.isFile() ) { results.add( identifier ); return true; } String[] paths = file.list(); if( paths == null ) return false; boolean result = false; for( String path : paths ) result |= getChildPaths( results, new File( file, path ).getPath(), depth - 1 ); return result; } }
@Override public long getSize( Properties conf ) throws IOException { File file = new File( getIdentifier() ); if( file.isDirectory() ) return 0; return file.length(); }
@Override public Tap getTap( Scheme scheme, String filename, SinkMode mode ) { return new FileTap( scheme, filename, mode ); }
@Override public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException { return getChildIdentifiers( flowProcess.getConfig() ); }
@Override public boolean createResource( Properties conf ) throws IOException { File parentFile = new File( getIdentifier() ).getParentFile(); // parent dir return parentFile.exists() || parentFile.mkdirs(); }
@Override public FileType getFileTypeFor( String identifier ) { return new FileTap( new TextLine(), identifier, SinkMode.KEEP ); }
@Override public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException { return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified ); }
@Override public boolean deleteResource( Properties conf ) throws IOException { String[] childIdentifiers = ( (FileTap) parent ).getChildIdentifiers( conf, Integer.MAX_VALUE, false ); if( childIdentifiers.length == 0 ) return deleteParent( conf ); DirTap.deleteChildren( Paths.get( parent.getIdentifier() ), childIdentifiers ); return deleteParent( conf ); }
@Test public void testWriteReadHDFS() throws Exception { copyFromLocal( inputFileApache ); Tap source = new FileTap( new cascading.scheme.local.TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Tap intermediate = new LocalHfsAdaptor( new Hfs( new cascading.scheme.hadoop.TextLine(), getOutputPath( "/intermediate" ), SinkMode.REPLACE ) ); Tap sink = new FileTap( new cascading.scheme.local.TextLine(), getOutputPath( "/final" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow first = new LocalFlowConnector( getPlatform().getProperties() ).connect( source, intermediate, pipe ); first.complete(); validateLength( first, 10 ); Flow second = new LocalFlowConnector( getPlatform().getProperties() ).connect( intermediate, sink, pipe ); second.complete(); validateLength( second, 10 ); }
@Test public void testPartitionedWriteReadHDFS() throws Exception { copyFromLocal( inputFileLhs ); Tap source = new FileTap( new cascading.scheme.local.TextDelimited( new Fields( "num", "char" ), " " ), inputFileLhs ); Hfs original = new Hfs( new TextDelimited( new Fields( "num", "char" ), " " ), getOutputPath( "/intermediate" ), SinkMode.REPLACE ); Tap intermediate = new LocalHfsAdaptor( new PartitionTap( original, new DelimitedPartition( new Fields( "num" ), "/" ) ) ); Tap sink = new FileTap( new cascading.scheme.local.TextDelimited( new Fields( "num", "char" ), " " ), getOutputPath( "/final" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow first = new LocalFlowConnector( getPlatform().getProperties() ).connect( source, intermediate, pipe ); first.complete(); validateLength( first, 13 ); Flow second = new LocalFlowConnector( getPlatform().getProperties() ).connect( intermediate, sink, pipe ); second.complete(); validateLength( second, 13 ); } }