/** Creats a stream with one marker in. * * @param in the underlying input stream. * @param delimiter an array of segment delimiters. */ public SegmentedInputStream( final InputStream in, final long... delimiter ) throws NullPointerException, IOException, IllegalStateException { this( in ); addBlock( delimiter ); }
/** Skips to the next block, closing this segmented input stream if there are no more blocks. */ public void close() throws IOException { ensureBlocksNotEmpty(); if ( closed ) return; if ( hasMoreBlocks() ) { nextBlock(); return; } if ( DEBUG ) LOGGER.debug( "Closing the underlying input stream of this segmented input stream" ); closed = true; in.close(); }
@Override public int read() throws IOException { ensureNotClosed(); if (eofInBlock()) return -1; final int r = in.read(); relativePos++; return r; }
/** Checks if the current position is a stop marker. * * @return false if a skip has to be done or eof has been reached, true otherwise. */ private boolean eofInBlock() { ensureBlocksNotEmpty(); ensureNotClosed(); return relativePos >= segmentLen; }
/** Moves into the next segment of the current block. */ public void reset() throws IOException { ensureNotClosed(); nextSegment(); } }
public Document nextDocument() throws IOException { /* If necessary, skip to the next segment, else, try skipping to the next gzip file. */ if ( DEBUG ) LOGGER.debug( "nextDocument() has been called " ); if ( last != null ) { last.close(); if ( ! siStream.hasMoreBlocks() ) { if ( ! nextFile() ) return last = null; } else siStream.nextBlock(); } else if ( ! nextFile() ) return null; // First call return last = factory.getDocument( siStream, metadata( currentDocument++ ) ); }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }
/** Skips to the first segment of the next block, if any. In such case, it returns true, or false * otherwise. */ public void nextBlock() throws IOException { if (! hasMoreBlocks()) throw new NoSuchElementException(); currentBlock++; if (DEBUG) LOGGER.debug("Moving to block # " + currentBlock); nextSegment(); }
/** Adds a new block defined by its array of segment delimiters. * * <p>The block has length defined by the difference between the last and first * delimiter. * * <p>This method performs the initial call to {@link #nextBlock()} when the first marker * is put in. * * @param delimiter a list of segment delimiters. * @throws IllegalArgumentException if the elements of <code>delimiter</code> are negative or not increasing. */ public void addBlock( final long... delimiter ) throws IllegalArgumentException, IOException { ensureNotClosed(); blocks.add( new SegmentBlock( delimiter ) ); if ( DEBUG ) LOGGER.debug( "Adding a new block with delimiters " + Arrays.toString( delimiter ) ); if ( currentBlock == -1 ) nextBlock(); }
public void close() throws IOException { super.close(); if ( lastStream != null ) lastStream.close(); descriptors = null; }
siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) );
@Override public long length() throws IOException { ensureNotClosed(); return segmentLen; }
public Document nextDocument() throws IOException { /* If necessary, skip to the next segment, else, try skipping to the next gzip file. */ if ( DEBUG ) LOGGER.debug( "nextDocument() has been called " ); if ( last != null ) { last.close(); if ( ! siStream.hasMoreBlocks() ) { if ( ! nextFile() ) return last = null; } else siStream.nextBlock(); } else if ( ! nextFile() ) return null; // First call return last = factory.getDocument( siStream, metadata( currentDocument++ ) ); }
private boolean nextFile() throws FileNotFoundException, IOException { if ( size() == 0 ) return false; IOUtils.closeQuietly( siStream ); if ( ! descriptorIterator.hasNext() ) return false; /* * We assume documents contained in the same gzip file are * contiguous so we collect all of them until we find a different * file index. */ TRECDocumentDescriptor currentDescriptor = firstNextDescriptor != null ? firstNextDescriptor : descriptorIterator.next(); int currentFileIndex = currentDescriptor.fileIndex; if ( DEBUG ) LOGGER.debug( "Skipping to contents file " + currentFileIndex + " (" + file[ currentFileIndex ] + ")" ); /* * We create the segmented input stream with all just collected * descriptors */ siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) ); do { siStream.addBlock( currentDescriptor.toSegments() ); if ( ! descriptorIterator.hasNext() ) break; currentDescriptor = descriptorIterator.next(); } while ( currentDescriptor.fileIndex == currentFileIndex ); firstNextDescriptor = currentDescriptor; // The last assignment will be meaningless, but it won't be used anyway return true; }
/** Skips to the first segment of the next block, if any. In such case, it returns true, or false * otherwise. */ public void nextBlock() throws IOException { if ( ! hasMoreBlocks() ) throw new NoSuchElementException(); currentBlock++; if ( DEBUG ) LOGGER.debug( "Moving to block # " + currentBlock ); nextSegment(); }
/** Adds a new block defined by its array of segment delimiters. * * <p>The block has length defined by the difference between the last and first * delimiter. * * <p>This method performs the initial call to {@link #nextBlock()} when the first marker * is put in. * * @param delimiter a list of segment delimiters. * @throws IllegalArgumentException if the elements of <code>delimiter</code> are negative or not increasing. */ public void addBlock( final long... delimiter ) throws IllegalArgumentException, IOException { ensureNotClosed(); blocks.add( new SegmentBlock( delimiter ) ); if ( DEBUG ) LOGGER.debug( "Adding a new block with delimiters " + Arrays.toString( delimiter ) ); if ( currentBlock == -1 ) nextBlock(); }
public void close() throws IOException { super.close(); if ( lastStream != null ) lastStream.close(); descriptors = null; }
siStream = new SegmentedInputStream( openFileStream( file[ currentFileIndex ] ) );
/** Checks if the current position is a stop marker. * * @return false if a skip has to be done or eof has been reached, true otherwise. */ private boolean eofInBlock() { ensureBlocksNotEmpty(); ensureNotClosed(); return relativePos >= segmentLen; }
/** Moves into the next segment of the current block. */ @Override public void reset() throws IOException { ensureNotClosed(); nextSegment(); } }