PTBEscapingProcessor escaper = new PTBEscapingProcessor(); List<Tree> trees = Generics.newArrayList(); for (String line : IOUtils.readLines(parseFilename, "utf-8")) {
/** * @param input must be a List of objects of type HasWord */ @Override public List<HasWord> process(List<? extends IN> input) { List<HasWord> output = new ArrayList<>(); for (IN h : input) { String s = h.word(); h.setWord(escapeString(s)); output.add(h); } if (fixQuotes) { return fixQuotes(output); } return output; }
if (maybeAppendOneMore(i + 1, s, buff)) { i++;
/** Escape a List of HasWords. Implements the * Function<List<HasWord>, List<HasWord>> interface. */ @Override public List<HasWord> apply(List<IN> hasWordsList) { return process(hasWordsList); }
@SuppressWarnings("unchecked") public static <T extends HasWord> List<T> applyPtbEscaping(List<T> words, Collection<String> quoteBegin, Collection<String> quoteEnd) { PTBEscapingProcessor<T, String, Word> escaper = new PTBEscapingProcessor<T, String, Word>(); // Apply escaper to the whole sentence, not to each token individually. The // escaper takes context into account, e.g. when transforming regular double // quotes into PTB opening and closing quotes (`` and ''). words = (List<T>) escaper.apply(words); for (HasWord w : words) { if (quoteBegin != null && quoteBegin.contains(w.word())) { w.setWord("``"); } else if (quoteEnd != null && quoteEnd.contains(w.word())) { w.setWord("\'\'"); } } return words; } }
leaf.label().setValue(escaper.escapeString(leaf.label().value()));
/** Escape a List of HasWords. Implements the * Function<List<HasWord>, List<HasWord>> interface. */ @Override public List<HasWord> apply(List<IN> hasWordsList) { return process(hasWordsList); }
PTBEscapingProcessor escaper = new PTBEscapingProcessor(); escaper.apply(tokensInDocument);
leaf.label().setValue(escaper.escapeString(leaf.label().value()));
/** * @param input must be a List of objects of type HasWord */ @Override public List<HasWord> process(List<? extends IN> input) { List<HasWord> output = new ArrayList<>(); for (IN h : input) { String s = h.word(); h.setWord(escapeString(s)); output.add(h); } if (fixQuotes) { return fixQuotes(output); } return output; }
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
/** Escape a List of HasWords. Implements the * Function<List<HasWord>, List<HasWord>> interface. */ public List<HasWord> apply(List<IN> hasWordsList) { return process(hasWordsList); }
if (maybeAppendOneMore(i + 1, s, buff)) { i++;
leaf.label().setValue(escaper.escapeString(leaf.label().value()));
/** * @param input must be a List of objects of type HasWord */ public List<HasWord> process(List<? extends IN> input) { List<HasWord> output = new ArrayList<HasWord>(); for (IN h : input) { String s = h.word(); h.setWord(escapeString(s)); output.add(h); } if (fixQuotes) { return fixQuotes(output); } return output; }
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {
/** Escape a List of HasWords. Implements the * Function<List<HasWord>, List<HasWord>> interface. */ @Override public List<HasWord> apply(List<IN> hasWordsList) { return process(hasWordsList); }
if (maybeAppendOneMore(i + 1, s, buff)) { i++;
/** * @param input must be a List of objects of type HasWord */ public List<HasWord> process(List<? extends IN> input) { List<HasWord> output = new ArrayList<HasWord>(); for (IN h : input) { String s = h.word(); h.setWord(escapeString(s)); output.add(h); } if (fixQuotes) { return fixQuotes(output); } return output; }
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename)); DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<>(); Document<String, Word, HasWord> newD = proc.processDocument(d); for (HasWord word : newD) {