PTBEscapingProcessor escaper = new PTBEscapingProcessor(); List<Tree> trees = Generics.newArrayList(); for (String line : IOUtils.readLines(parseFilename, "utf-8")) {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
@SuppressWarnings("unchecked") public static <T extends HasWord> List<T> applyPtbEscaping(List<T> words, Collection<String> quoteBegin, Collection<String> quoteEnd) { PTBEscapingProcessor<T, String, Word> escaper = new PTBEscapingProcessor<T, String, Word>(); // Apply escaper to the whole sentence, not to each token individually. The // escaper takes context into account, e.g. when transforming regular double // quotes into PTB opening and closing quotes (`` and ''). words = (List<T>) escaper.apply(words); for (HasWord w : words) { if (quoteBegin != null && quoteBegin.contains(w.word())) { w.setWord("``"); } else if (quoteEnd != null && quoteEnd.contains(w.word())) { w.setWord("\'\'"); } } return words; } }
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<Word, String, Word>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
PTBEscapingProcessor escaper = new PTBEscapingProcessor(); List<Tree> trees = Generics.newArrayList(); for (String line : IOUtils.readLines(parseFilename, "utf-8")) {
PTBEscapingProcessor escaper = new PTBEscapingProcessor(); List<Tree> trees = Generics.newArrayList(); for (String line : IOUtils.readLines(parseFilename, "utf-8")) {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<Word, String, Word>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
PTBEscapingProcessor escaper = new PTBEscapingProcessor(); escaper.apply(tokensInDocument);