/** * Method deleteDirTap will recursively delete all files referenced by the given DirTap. * * @param dirTap the directory to delete */ public static boolean deleteDirTap( DirTap dirTap, Properties conf ) throws IOException { deleteChildren( dirTap.getPath(), dirTap.getChildIdentifiers( conf ) ); Files.deleteIfExists( dirTap.getPath() ); return true; }
@Override public boolean deleteResource( Properties conf ) throws IOException { return deleteDirTap( this, conf ); }
protected String getOutputFilename( FlowProcess<? extends Properties> flowProcess ) { int partNum = flowProcess.getIntegerProperty( PartitionTap.PART_NUM_PROPERTY, -1 ); String outputFileBasename = getOutputFileBasename(); if( partNum != -1 ) outputFileBasename = String.format( "%s.%05d", outputFileBasename, partNum ); if( getScheme() instanceof FileFormat ) return outputFileBasename + "." + ( (FileFormat) getScheme() ).getExtension(); return outputFileBasename; }
protected PathMatcher getPathMatcher() { if( getPattern() == null ) return path -> true; FileSystem fileSystem = getPath().getFileSystem(); return fileSystem.getPathMatcher( getPattern() ); }
@Override protected String getOutputIdentifier( FlowProcess<? extends Properties> flowProcess ) { return getPath().resolve( getOutputFilename( flowProcess ) ).toString(); }
@Override public Tap getDelimitedFile( Fields fields, boolean hasHeader, String delimiter, String quote, Class[] types, String filename, SinkMode mode ) { return new DirTap( new TextDelimited( fields, hasHeader, delimiter, quote, types ), filename, mode ); }
@Override public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException if( !Files.isDirectory( getPath() ) && getPattern() != null ) throw new IllegalStateException( "a file pattern was provided and given path is not a directory: " + getPath() ); if( !Files.isDirectory( getPath() ) ) return super.openForRead( flowProcess, input ); PathMatcher pathMatcher = getPathMatcher(); Stream<Path> stream = Files.walk( getPath(), maxDepth ) .filter( path -> !Files.isDirectory( path ) ) .filter( pathMatcher::matches ); return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), iterator, () -> flowProcess.getFlowProcessContext().getSourcePath() );
protected void verify() { super.verify(); if( maxDepth < 0 ) throw new IllegalArgumentException( "maxDepth must be greater than 0, given: " + maxDepth ); try { getPathMatcher(); } catch( RuntimeException exception ) { throw new IllegalArgumentException( "could not parse pattern: " + getPattern(), exception ); } }
@Override public boolean deleteResource( Properties conf ) throws IOException { String[] childIdentifiers = ( (FileTap) parent ).getChildIdentifiers( conf, Integer.MAX_VALUE, false ); if( childIdentifiers.length == 0 ) return deleteParent( conf ); DirTap.deleteChildren( Paths.get( parent.getIdentifier() ), childIdentifiers ); return deleteParent( conf ); }
@Override public Tap getDelimitedFile( Fields fields, boolean skipHeader, boolean writeHeader, String delimiter, String quote, Class[] types, String filename, SinkMode mode ) { return new DirTap( new TextDelimited( fields, skipHeader, writeHeader, delimiter, quote, types ), filename, mode ); }
@Override public Tap getJSONFile( Fields fields, String filename, SinkMode mode ) { return new DirTap( new JSONTextLine( fields ), filename, mode ); }
@Override public Tap getDelimitedFile( String delimiter, String quote, FieldTypeResolver fieldTypeResolver, String filename, SinkMode mode ) { return new DirTap( new TextDelimited( true, new DelimitedParser( delimiter, quote, fieldTypeResolver ) ), filename, mode ); }
@Test public void testDirTap() throws Exception { Tap source = new DirTap( new TextLine(), InputData.inputPath, "glob:**/*.txt" ); DirTap sink = new DirTap( new TextLine(), getOutputPath(), SinkMode.REPLACE ); Pipe pipe = new Pipe( "copy" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); List<Tuple> list = getSinkAsList( flow ); assertEquals( 674, list.size() ); }
@Override public Fields retrieveSourceFields( FlowProcess<? extends Properties> process, Tap tap ) { if( !skipHeader || !getSourceFields().isUnknown() ) return getSourceFields(); // no need to open them all if( tap instanceof CompositeTap ) tap = (Tap) ( (CompositeTap) tap ).getChildTaps().next(); // can read either a single file, or search the directory for a file if( tap instanceof TapWith ) tap = ( (TapWith) tap ).withScheme( new TextLine( new Fields( "line" ), compressor, charsetName ) ).asTap(); else tap = new DirTap( new TextLine( new Fields( "line" ), compressor, charsetName ), tap.getIdentifier() ); setSourceFields( delimitedParser.parseFirstLine( process, tap ) ); return getSourceFields(); }
@Test public void testSchemeCompression() throws Exception { Tap source = new DirTap( new TextLine(), InputData.inputPath, "glob:**/*.txt" ); DirTap compressed = new DirTap( new TextLine( Compressors.GZIP ), getOutputPath( "compressed" ), SinkMode.REPLACE ); DirTap sink = new DirTap( new TextLine(), getOutputPath( "uncompressed" ), SinkMode.REPLACE ); Flow first = getPlatform().getFlowConnector().connect( "first", source, compressed, new Pipe( "copy" ) ); first.complete(); Flow second = getPlatform().getFlowConnector().connect( "second", compressed, sink, new Pipe( "copy" ) ); second.complete(); List<Tuple> list = getSinkAsList( second ); assertEquals( 674, list.size() ); } }