org.archive.io java code examples

/**
 * Count and note a line
 * 
 * @throws IOException
 */
protected void noteLine() throws IOException {
  lines++;
  considerTimestamp();
}

private void ensureRawPosition() throws IOException {
  if (raw.position() != expectedRawPosition) {
    raw.position(expectedRawPosition);
  }
}

/**
 * Create a new crawler journal at the given location
 * 
 * @param file path at which to make journal
 * @throws IOException
 */
public CrawlerJournal(File file) throws IOException {
  this.gzipFile = file;
  this.out = initialize(gzipFile);
}

/**
 * Skips over any property information that may precede a piece table.
 * These property structures contain stylesheet information that applies
 * to the piece table.  Since we're only interested in the text itself,
 * we just ignore this property stuff.  (I suppose a third buffered
 * stream could be used to add style information to {@link Piece}, but
 * we don't need it.)
 * 
 * @param input  the input stream containing the piece table
 * @throws IOException  if an IO error occurs
 */
private static void skipProperties(SeekInputStream input) throws IOException {
  int tag = input.read();
  while (tag == 1) {
    int size = Endian.littleChar(input);
    while (size > 0) {
      size -= input.skip(size);
    }
    tag = input.read();
  }
  if (tag != 2) {
    throw new IllegalStateException();
  }
}

/**
 * Wraps the raw table stream.  This is used to create the charPos and
 * filePos streams.  The streams that this method returns are "safe",
 * meaning that the charPos and filePos position() fields never clobber
 * each other.  They are buffered, meaning that up to <i>n</i> elements
 * can be read before the disk is accessed again.  And they are "origined",
 * meaning result.position(0) actually positions the stream at the 
 * beginning of the piece table array, not the beginning of the file.
 * 
 * @param input   the stream to wrap
 * @param pos     the origin for the returned stream
 * @param cache   the number of bytes for the returned stream to buffer
 * @return   the wrapped stream
 * @throws IOException  if an IO error occurs
 */
private SeekInputStream wrap(SeekInputStream input, long pos, int cache) 
throws IOException {
  input.position(pos);
  SeekInputStream r = new SafeSeekInputStream(input);
  r = new OriginSeekInputStream(r, pos);
  r = new BufferedSeekInputStream(r, cache);
  return r;
}

/**
 * Get size of data recorded (transferred)
 * 
 * @return recorded data size
 */
public long getRecordedSize() {
  return (getRecorder() != null) ? getRecorder()
      .getRecordedInput().getSize()
      // if unavailable fall back on content-size
      : getContentSize();
}

public void close() throws IOException {
  doc.close();
  table = null;
}

public int read() throws IOException {
  if (!ensureBuffer()) {
    return -1;
  }
  int r = raw.read();
  position++;
  expectedRawPosition++;
  blockBytesRead++;
  return r;
}

/**
 * Returns the text of the given .doc file.
 * 
 * @param doc   the .doc file whose text to return
 * @return   the text of that file
 * @throws IOException   if an IO error occurs
 */
public static SeekReader getText(File doc) throws IOException {
  RandomAccessFile raf = new RandomAccessFile(doc, "r");
  RandomAccessInputStream rais = new RandomAccessInputStream(raf);
  return getText(rais);
}

  public Reader obtainReader() {
    return textSource.obtainReader();
  }
}

public void stop() {
  if(!isRunning()) {
    return;
  }
  
  // XXX happens at finish; move to teardown?
  log.close();
  log = null; 
}

/**
 * Note a serious error vioa a special log line
 * 
 * @param err
 */
public synchronized void seriousError(String err) {
  writeLine(LOG_ERROR+ArchiveUtils.getLog14Date()+" "+err+"\n");
}

private static String getRevision() {
  return Warc2Arc.parseRevision("$Revision$");
}

public void doCheckpoint(Checkpoint checkpointInProgress) throws IOException {
  // rotate log
  log.rotateForCheckpoint(checkpointInProgress);
}

public void setAsText(String text) throws IllegalArgumentException {
  setValue(text);
}

protected AtomicInteger getSerialNo() {
  return ((WriterPool)getPool()).getSerialNo();
}

public void stop() {
  if (!isRunning()) {
    return;
  }
  super.stop(); 
  
  // XXX happens at finish; move to teardown?
  this.pool.close();
}

private void seek(long block, long rem) throws IOException {
  assert rem < BLOCK_SIZE;
  long pos = (block + 1) * BLOCK_SIZE + rem;
  blockBytesRead = (int)rem;
  expectedRawPosition = pos;
  raw.position(pos);
}

/**
 * Create a new crawler journal at the given location
 * 
 * @param path Directory to make thejournal in.
 * @param filename Name to use for journal file.
 * @throws IOException
 */
public CrawlerJournal(String path, String filename)
throws IOException {
  this.gzipFile = new File(path, filename);
  this.out = initialize(gzipFile);
}

private static String getRevision() {
  return parseRevision("$Revision$");
}

How to use org.archive.io

Best Java code snippets using org.archive.io (Showing top 20 results out of 315)