/** Creats a stream with one marker in. * * @param in the underlying input stream. * @param delimiter an array of segment delimiters. */ public SegmentedInputStream( final InputStream in, final long... delimiter ) throws NullPointerException, IOException, IllegalStateException { this( in ); addBlock( delimiter ); }
/** Creats a stream with one marker in. * * @param in the underlying input stream. * @param delimiter an array of segment delimiters. */ public SegmentedInputStream(final InputStream in, final long... delimiter) throws NullPointerException, IOException, IllegalStateException { this(in); addBlock(delimiter); }
/** Creats a stream with one marker in. * * @param in the underlying input stream. * @param delimiter an array of segment delimiters. */ public SegmentedInputStream( final InputStream in, final long... delimiter ) throws NullPointerException, IOException, IllegalStateException { this( in ); addBlock( delimiter ); }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }