private void primeNext() { // It is necessary to loop because if a document has a pattern // that goes: <tag></tag> the xmlItr will return an empty // string, which the plainItr will process to null. If we // didn't loop to find the next tag, the iterator would stop. do { if (plainItr != null && plainItr.hasNext()) { nextSent = plainItr.next(); } else if (xmlItr.hasNext()) { String block = xmlItr.next(); inputReader = new BufferedReader(new StringReader(block)); plainItr = new PlainTextIterator(); if (plainItr.hasNext()) { nextSent = plainItr.next(); } else { nextSent = null; } } else { IOUtils.closeIgnoringExceptions(originalDocReader); nextSent = null; break; } } while (nextSent == null); }
@Override public List<HasWord> next() { if (nextSent == null) { primeNext(); } if (nextSent == null) { throw new NoSuchElementException(); } List<HasWord> thisIteration = nextSent; nextSent = null; return thisIteration; }
private void primeNext() { // It is necessary to loop because if a document has a pattern // that goes: <tag></tag> the xmlItr will return an empty // string, which the plainItr will process to null. If we // didn't loop to find the next tag, the iterator would stop. do { if (plainItr != null && plainItr.hasNext()) { nextSent = plainItr.next(); } else if (xmlItr.hasNext()) { String block = xmlItr.next(); inputReader = new BufferedReader(new StringReader(block)); plainItr = new PlainTextIterator(); if (plainItr.hasNext()) { nextSent = plainItr.next(); } else { nextSent = null; } } else { IOUtils.closeIgnoringExceptions(originalDocReader); nextSent = null; break; } } while (nextSent == null); }
/** * Returns sentences until the document is exhausted. Calls close() if the end of the document * is reached. Otherwise, the user is required to close the stream. */ public Iterator<List<HasWord>> iterator() { try { if (inputReader == null) inputReader = getReaderFromPath(inputPath); //TODO: Add new document types here if (docType == DocType.Plain) { return new PlainTextIterator(); } else if (docType == DocType.XML) { return new XMLIterator(); } } catch (IOException e) { System.err.printf("%s: Could not open path %s\n", this.getClass().getName(), inputPath); } return new Iterator<List<HasWord>>() { public boolean hasNext() { return false; } public List<HasWord> next() { throw new NoSuchElementException(); } public void remove() {} }; }
private void primeNext() { // It is necessary to loop because if a document has a pattern // that goes: <tag></tag> the xmlItr will return an empty // string, which the plainItr will process to null. If we // didn't loop to find the next tag, the iterator would stop. do { if (plainItr != null && plainItr.hasNext()) { nextSent = plainItr.next(); } else if (xmlItr.hasNext()) { String block = xmlItr.next(); inputReader = new BufferedReader(new StringReader(block)); plainItr = new PlainTextIterator(); if (plainItr.hasNext()) { nextSent = plainItr.next(); } else { nextSent = null; } } else { IOUtils.closeIgnoringExceptions(originalDocReader); nextSent = null; break; } } while (nextSent == null); }
private void primeNext() { // It is necessary to loop because if a document has a pattern // that goes: <tag></tag> the xmlItr will return an empty // string, which the plainItr will process to null. If we // didn't loop to find the next tag, the iterator would stop. do { if (plainItr != null && plainItr.hasNext()) { nextSent = plainItr.next(); } else if (xmlItr.hasNext()) { String block = xmlItr.next(); inputReader = new BufferedReader(new StringReader(block)); plainItr = new PlainTextIterator(); if (plainItr.hasNext()) { nextSent = plainItr.next(); } else { nextSent = null; } } else { IOUtils.closeIgnoringExceptions(originalDocReader); nextSent = null; break; } } while (nextSent == null); }
private void primeNext() { // It is necessary to loop because if a document has a pattern // that goes: <tag></tag> the xmlItr will return an empty // string, which the plainItr will process to null. If we // didn't loop to find the next tag, the iterator would stop. do { if (plainItr != null && plainItr.hasNext()) { nextSent = plainItr.next(); } else if (xmlItr.hasNext()) { String block = xmlItr.next(); inputReader = new BufferedReader(new StringReader(block)); plainItr = new PlainTextIterator(); if (plainItr.hasNext()) { nextSent = plainItr.next(); } else { nextSent = null; } } else { IOUtils.closeIgnoringExceptions(originalDocReader); nextSent = null; break; } } while (nextSent == null); }
/** * Returns sentences until the document is exhausted. Calls close() if the end of the document * is reached. Otherwise, the user is required to close the stream. * * @return An Iterator over sentences (each a List of word tokens). * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel, * and you can cast down to that. (Someday we might manage to fix the generic typing....) */ @Override public Iterator<List<HasWord>> iterator() { // Add new document types here if (docType == DocType.Plain) { return new PlainTextIterator(); } else if (docType == DocType.XML) { return new XMLIterator(); } else { throw new IllegalStateException("Someone didn't add a handler for a new docType."); } }
/** * Returns sentences until the document is exhausted. Calls close() if the end of the document * is reached. Otherwise, the user is required to close the stream. * * @return An Iterator over sentences (each a List of word tokens). * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel, * and you can cast down to that. (Someday we might manage to fix the generic typing....) */ @Override public Iterator<List<HasWord>> iterator() { // Add new document types here if (docType == DocType.Plain) { return new PlainTextIterator(); } else if (docType == DocType.XML) { return new XMLIterator(); } else { throw new IllegalStateException("Someone didn't add a handler for a new docType."); } }
/** * Returns sentences until the document is exhausted. Calls close() if the end of the document * is reached. Otherwise, the user is required to close the stream. */ @Override public Iterator<List<HasWord>> iterator() { // Add new document types here if (docType == DocType.Plain) { return new PlainTextIterator(); } else if (docType == DocType.XML) { return new XMLIterator(); } else { throw new IllegalStateException("Someone didn't add a handler for a new docType."); } }
public boolean hasNext() { if (nextSent == null) { primeNext(); } return nextSent != null; }
@Override public boolean hasNext() { if (nextSent == null) { primeNext(); } return nextSent != null; }
@Override public boolean hasNext() { if (nextSent == null) { primeNext(); } return nextSent != null; }
public List<HasWord> next() { if (nextSent == null) { primeNext(); } if (nextSent == null) { throw new NoSuchElementException(); } List<HasWord> thisIteration = nextSent; nextSent = null; return thisIteration; }
public boolean hasNext() { if (nextSent == null) { primeNext(); } return nextSent != null; }
public List<HasWord> next() { if (nextSent == null) { primeNext(); } if (nextSent == null) { throw new NoSuchElementException(); } List<HasWord> thisIteration = nextSent; nextSent = null; return thisIteration; }
@Override public List<HasWord> next() { if (nextSent == null) { primeNext(); } if (nextSent == null) { throw new NoSuchElementException(); } List<HasWord> thisIteration = nextSent; nextSent = null; return thisIteration; }
@Override public List<HasWord> next() { if (nextSent == null) { primeNext(); } if (nextSent == null) { throw new NoSuchElementException(); } List<HasWord> thisIteration = nextSent; nextSent = null; return thisIteration; }
/** * Returns sentences until the document is exhausted. Calls close() if the end of the document * is reached. Otherwise, the user is required to close the stream. * * @return An Iterator over sentences (each a List of word tokens). * Although the type is given as {@code List<HasWord>}, in practice you get a List of CoreLabel, * and you can cast down to that. (Someday we might manage to fix the generic typing....) */ @Override public Iterator<List<HasWord>> iterator() { // Add new document types here if (docType == DocType.Plain) { return new PlainTextIterator(); } else if (docType == DocType.XML) { return new XMLIterator(); } else { throw new IllegalStateException("Someone didn't add a handler for a new docType."); } }
@Override public boolean hasNext() { if (nextSent == null) { primeNext(); } return nextSent != null; }