/** * Count and note a line * * @throws IOException */ protected void noteLine() throws IOException { lines++; considerTimestamp(); }
/** * Create a new crawler journal at the given location * * @param file path at which to make journal * @throws IOException */ public CrawlerJournal(File file) throws IOException { this.gzipFile = file; this.out = initialize(gzipFile); }
/** * Skips over any property information that may precede a piece table. * These property structures contain stylesheet information that applies * to the piece table. Since we're only interested in the text itself, * we just ignore this property stuff. (I suppose a third buffered * stream could be used to add style information to {@link Piece}, but * we don't need it.) * * @param input the input stream containing the piece table * @throws IOException if an IO error occurs */ private static void skipProperties(SeekInputStream input) throws IOException { int tag = input.read(); while (tag == 1) { int size = Endian.littleChar(input); while (size > 0) { size -= input.skip(size); } tag = input.read(); } if (tag != 2) { throw new IllegalStateException(); } }
/** * Wraps the raw table stream. This is used to create the charPos and * filePos streams. The streams that this method returns are "safe", * meaning that the charPos and filePos position() fields never clobber * each other. They are buffered, meaning that up to <i>n</i> elements * can be read before the disk is accessed again. And they are "origined", * meaning result.position(0) actually positions the stream at the * beginning of the piece table array, not the beginning of the file. * * @param input the stream to wrap * @param pos the origin for the returned stream * @param cache the number of bytes for the returned stream to buffer * @return the wrapped stream * @throws IOException if an IO error occurs */ private SeekInputStream wrap(SeekInputStream input, long pos, int cache) throws IOException { input.position(pos); SeekInputStream r = new SafeSeekInputStream(input); r = new OriginSeekInputStream(r, pos); r = new BufferedSeekInputStream(r, cache); return r; }
/** * Get size of data recorded (transferred) * * @return recorded data size */ public long getRecordedSize() { return (getRecorder() != null) ? getRecorder() .getRecordedInput().getSize() // if unavailable fall back on content-size : getContentSize(); }
public void close() throws IOException { doc.close(); table = null; }
public int read() throws IOException { if (!ensureBuffer()) { return -1; } int r = raw.read(); position++; expectedRawPosition++; blockBytesRead++; return r; }
/** * Returns the text of the given .doc file. * * @param doc the .doc file whose text to return * @return the text of that file * @throws IOException if an IO error occurs */ public static SeekReader getText(File doc) throws IOException { RandomAccessFile raf = new RandomAccessFile(doc, "r"); RandomAccessInputStream rais = new RandomAccessInputStream(raf); return getText(rais); }
public Reader obtainReader() { return textSource.obtainReader(); } }
public void stop() { if(!isRunning()) { return; } // XXX happens at finish; move to teardown? log.close(); log = null; }
/** * Note a serious error vioa a special log line * * @param err */ public synchronized void seriousError(String err) { writeLine(LOG_ERROR+ArchiveUtils.getLog14Date()+" "+err+"\n"); }
private static String getRevision() { return Warc2Arc.parseRevision("$Revision$"); }
public void doCheckpoint(Checkpoint checkpointInProgress) throws IOException { // rotate log log.rotateForCheckpoint(checkpointInProgress); }
public void setAsText(String text) throws IllegalArgumentException { setValue(text); }
protected AtomicInteger getSerialNo() { return ((WriterPool)getPool()).getSerialNo(); }
public void stop() { if (!isRunning()) { return; } super.stop(); // XXX happens at finish; move to teardown? this.pool.close(); }
private void seek(long block, long rem) throws IOException { assert rem < BLOCK_SIZE; long pos = (block + 1) * BLOCK_SIZE + rem; blockBytesRead = (int)rem; expectedRawPosition = pos; raw.position(pos); }
/** * Create a new crawler journal at the given location * * @param path Directory to make thejournal in. * @param filename Name to use for journal file. * @throws IOException */ public CrawlerJournal(String path, String filename) throws IOException { this.gzipFile = new File(path, filename); this.out = initialize(gzipFile); }
private static String getRevision() { return parseRevision("$Revision$"); }