edu.stanford.nlp.process.DocumentPreprocessor$PlainTextIterator java code examples

private void primeNext() {
 // It is necessary to loop because if a document has a pattern
 // that goes: <tag></tag> the xmlItr will return an empty
 // string, which the plainItr will process to null.  If we
 // didn't loop to find the next tag, the iterator would stop.
 do {
  if (plainItr != null && plainItr.hasNext()) {
   nextSent = plainItr.next();
  } else if (xmlItr.hasNext()) {
   String block = xmlItr.next();
   inputReader = new BufferedReader(new StringReader(block));
   plainItr = new PlainTextIterator();
   if (plainItr.hasNext()) {
    nextSent = plainItr.next();
   } else {
    nextSent = null;
   }
  } else {
   IOUtils.closeIgnoringExceptions(originalDocReader);
   nextSent = null;
   break;
  }
 } while (nextSent == null);
}

@Override
public List<HasWord> next() {
 if (nextSent == null) {
  primeNext();
 }
 if (nextSent == null) {
  throw new NoSuchElementException();
 }
 List<HasWord> thisIteration = nextSent;
 nextSent = null;
 return thisIteration;
}

private void primeNext() {
 // It is necessary to loop because if a document has a pattern
 // that goes: <tag></tag> the xmlItr will return an empty
 // string, which the plainItr will process to null.  If we
 // didn't loop to find the next tag, the iterator would stop.
 do {
  if (plainItr != null && plainItr.hasNext()) {
   nextSent = plainItr.next();
  } else if (xmlItr.hasNext()) {
   String block = xmlItr.next();
   inputReader = new BufferedReader(new StringReader(block));
   plainItr = new PlainTextIterator();
   if (plainItr.hasNext()) {
    nextSent = plainItr.next();
   } else {
    nextSent = null;
   }
  } else {
   IOUtils.closeIgnoringExceptions(originalDocReader);
   nextSent = null;
   break;
  }
 } while (nextSent == null);
}

/**
 * Returns sentences until the document is exhausted. Calls close() if the end of the document
 * is reached. Otherwise, the user is required to close the stream.
 */
public Iterator<List<HasWord>> iterator() {
 try {
  if (inputReader == null)
   inputReader = getReaderFromPath(inputPath);
  //TODO: Add new document types here
  if (docType == DocType.Plain) {
   return new PlainTextIterator();
  } else if (docType == DocType.XML) {
   return new XMLIterator();
  }
 } catch (IOException e) {
  System.err.printf("%s: Could not open path %s\n", this.getClass().getName(), inputPath);
 }
 return new Iterator<List<HasWord>>() {
  public boolean hasNext() { return false; }
  public List<HasWord> next() { throw new NoSuchElementException(); }
  public void remove() {}
 };
}

private void primeNext() {
 // It is necessary to loop because if a document has a pattern
 // that goes: <tag></tag> the xmlItr will return an empty
 // string, which the plainItr will process to null.  If we
 // didn't loop to find the next tag, the iterator would stop.
 do {
  if (plainItr != null && plainItr.hasNext()) {
   nextSent = plainItr.next();
  } else if (xmlItr.hasNext()) {
   String block = xmlItr.next();
   inputReader = new BufferedReader(new StringReader(block));
   plainItr = new PlainTextIterator();
   if (plainItr.hasNext()) {
    nextSent = plainItr.next();
   } else {
    nextSent = null;
   }
  } else {
   IOUtils.closeIgnoringExceptions(originalDocReader);
   nextSent = null;
   break;
  }
 } while (nextSent == null);
}

private void primeNext() {
 // It is necessary to loop because if a document has a pattern
 // that goes: <tag></tag> the xmlItr will return an empty
 // string, which the plainItr will process to null.  If we
 // didn't loop to find the next tag, the iterator would stop.
 do {
  if (plainItr != null && plainItr.hasNext()) {
   nextSent = plainItr.next();
  } else if (xmlItr.hasNext()) {
   String block = xmlItr.next();
   inputReader = new BufferedReader(new StringReader(block));
   plainItr = new PlainTextIterator();
   if (plainItr.hasNext()) {
    nextSent = plainItr.next();
   } else {
    nextSent = null;
   }
  } else {
   IOUtils.closeIgnoringExceptions(originalDocReader);
   nextSent = null;
   break;
  }
 } while (nextSent == null);
}

private void primeNext() {
 // It is necessary to loop because if a document has a pattern
 // that goes: <tag></tag> the xmlItr will return an empty
 // string, which the plainItr will process to null.  If we
 // didn't loop to find the next tag, the iterator would stop.
 do {
  if (plainItr != null && plainItr.hasNext()) {
   nextSent = plainItr.next();
  } else if (xmlItr.hasNext()) {
   String block = xmlItr.next();
   inputReader = new BufferedReader(new StringReader(block));
   plainItr = new PlainTextIterator();
   if (plainItr.hasNext()) {
    nextSent = plainItr.next();
   } else {
    nextSent = null;
   }
  } else {
   IOUtils.closeIgnoringExceptions(originalDocReader);
   nextSent = null;
   break;
  }
 } while (nextSent == null);
}

/**
 * Returns sentences until the document is exhausted. Calls close() if the end of the document
 * is reached. Otherwise, the user is required to close the stream.
 *
 * @return An Iterator over sentences (each a List of word tokens).
 * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel,
 * and you can cast down to that. (Someday we might manage to fix the generic typing....)
 */
@Override
public Iterator<List<HasWord>> iterator() {
 // Add new document types here
 if (docType == DocType.Plain) {
  return new PlainTextIterator();
 } else if (docType == DocType.XML) {
  return new XMLIterator();
 } else {
  throw new IllegalStateException("Someone didn't add a handler for a new docType.");
 }
}

/**
 * Returns sentences until the document is exhausted. Calls close() if the end of the document
 * is reached. Otherwise, the user is required to close the stream.
 *
 * @return An Iterator over sentences (each a List of word tokens).
 * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel,
 * and you can cast down to that. (Someday we might manage to fix the generic typing....)
 */
@Override
public Iterator<List<HasWord>> iterator() {
 // Add new document types here
 if (docType == DocType.Plain) {
  return new PlainTextIterator();
 } else if (docType == DocType.XML) {
  return new XMLIterator();
 } else {
  throw new IllegalStateException("Someone didn't add a handler for a new docType.");
 }
}

/**
 * Returns sentences until the document is exhausted. Calls close() if the end of the document
 * is reached. Otherwise, the user is required to close the stream.
 */
@Override
public Iterator<List<HasWord>> iterator() {
 // Add new document types here
 if (docType == DocType.Plain) {
  return new PlainTextIterator();
 } else if (docType == DocType.XML) {
  return new XMLIterator();
 } else {
  throw new IllegalStateException("Someone didn't add a handler for a new docType.");
 }
}

public boolean hasNext() {
 if (nextSent == null) {
  primeNext();
 }
 return nextSent != null;
}

@Override
public boolean hasNext() {
 if (nextSent == null) {
  primeNext();
 }
 return nextSent != null;
}

@Override
public boolean hasNext() {
 if (nextSent == null) {
  primeNext();
 }
 return nextSent != null;
}

public List<HasWord> next() {
 if (nextSent == null) {
  primeNext();
 }
 if (nextSent == null) {
  throw new NoSuchElementException();
 }
 List<HasWord> thisIteration = nextSent;
 nextSent = null;
 return thisIteration;
}

public boolean hasNext() { 
 if (nextSent == null) {
  primeNext();
 }
 return nextSent != null; 
}

public List<HasWord> next() {
 if (nextSent == null) {
  primeNext();
 }
 if (nextSent == null) {
  throw new NoSuchElementException();
 }
 List<HasWord> thisIteration = nextSent;
 nextSent = null;
 return thisIteration;
}

@Override
public List<HasWord> next() {
 if (nextSent == null) {
  primeNext();
 }
 if (nextSent == null) {
  throw new NoSuchElementException();
 }
 List<HasWord> thisIteration = nextSent;
 nextSent = null;
 return thisIteration;
}

@Override
public List<HasWord> next() {
 if (nextSent == null) {
  primeNext();
 }
 if (nextSent == null) {
  throw new NoSuchElementException();
 }
 List<HasWord> thisIteration = nextSent;
 nextSent = null;
 return thisIteration;
}

/**
 * Returns sentences until the document is exhausted. Calls close() if the end of the document
 * is reached. Otherwise, the user is required to close the stream.
 *
 * @return An Iterator over sentences (each a List of word tokens).
 * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel,
 * and you can cast down to that. (Someday we might manage to fix the generic typing....)
 */
@Override
public Iterator<List<HasWord>> iterator() {
 // Add new document types here
 if (docType == DocType.Plain) {
  return new PlainTextIterator();
 } else if (docType == DocType.XML) {
  return new XMLIterator();
 } else {
  throw new IllegalStateException("Someone didn't add a handler for a new docType.");
 }
}

@Override
public boolean hasNext() {
 if (nextSent == null) {
  primeNext();
 }
 return nextSent != null;
}

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
getSupportFragmentManager (FragmentActivity)
getResourceAsStream (ClassLoader)
getContentResolver (Context)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Top Sublime Text plugins

How to useDocumentPreprocessor$PlainTextIterator in edu.stanford.nlp.process

Best Java code snippets using edu.stanford.nlp.process.DocumentPreprocessor$PlainTextIterator (Showing top 20 results out of 315)

How to use
DocumentPreprocessor$PlainTextIterator
in
edu.stanford.nlp.process