@Override public void visitTree(final Tree t) { num += t.yield().size(); } });
/** * Gets the yield of the tree. The {@code Label} of all leaf nodes * is returned * as a list ordered by the natural left to right order of the * leaves. Null values, if any, are inserted into the list like any * other value. * * @return a {@code List} of the data in the tree's leaves. */ public ArrayList<Label> yield() { return yield(new ArrayList<>()); }
public boolean test(Tree tree) { return tree.yield().size() <= length; }
/** * Get a string representation of the immediate phrase which contains the given node. */ private static String getContainingPhrase(Tree t, Tree parent) { if (parent == null) return null; List<Label> phraseYield = parent.yield(); StringBuilder containingPhrase = new StringBuilder(); for (Label l : phraseYield) containingPhrase.append(l.value()).append(" "); return containingPhrase.toString().substring(0, containingPhrase.length() - 1); }
private List<Tree> helper(List<Tree> treeList, int start) { List<Tree> newTreeList = new ArrayList<>(treeList.size()); for (Tree tree : treeList) { int end = start + tree.yield().size(); newTreeList.add(prune(tree, start)); start = end; } return newTreeList; }
/** * Set the sentence index of all the leaves in the tree * (only works on CoreLabel) */ public static void setSentIndex(Tree tree, int sentIndex) { List<Label> leaves = tree.yield(); for (Label leaf : leaves) { if (!(leaf instanceof CoreLabel)) { throw new IllegalArgumentException("Only works on CoreLabel"); } ((CoreLabel) leaf).setSentIndex(sentIndex); } } }
@Override public void evaluate(Tree guess, Tree gold, PrintWriter pw) { if(gold == null || guess == null) { System.err.printf("%s: Cannot compare against a null gold or guess tree!\n",this.getClass().getName()); return; } else if (guess.yield().size() != gold.yield().size()) { log.info("Warning: yield differs:"); log.info("Guess: " + SentenceUtils.listToString(guess.yield())); log.info("Gold: " + SentenceUtils.listToString(gold.yield())); } super.evaluate(guess, gold, pw); }
@Override public void evaluate(Tree guess, Tree gold, PrintWriter pw) { if(gold == null || guess == null) { System.err.printf("%s: Cannot compare against a null gold or guess tree!\n", this.getClass().getName()); return; } else if (guess.yield().size() != gold.yield().size()) { log.info("Warning: yield differs:"); log.info("Guess: " + SentenceUtils.listToString(guess.yield())); log.info("Gold: " + SentenceUtils.listToString(gold.yield())); } super.evaluate(guess, gold, pw); }
/** * Returns the string associated with the input parse tree. Traces and * ATB-specific escape sequences (e.g., "-RRB-" for ")") are removed. * * @param t - A parse tree * @return The yield of the input parse tree */ public static String flattenTree(Tree t) { t = t.prune(emptyFilter, tf); String flatString = SentenceUtils.listToString(t.yield()); return flatString; }
/** * Return a null list if we don't care about context words, return a * list of the words at the leaves of the tree if we do care */ private List<String> getContextWords(Tree tree) { List<String> words = null; if (op.trainOptions.useContextWords) { words = Generics.newArrayList(); List<Label> leaves = tree.yield(); for (Label word : leaves) { words.add(word.value()); } } return words; }
/** * Returns the positional index of the right edge of a tree * <i>t</i> within a given root, as defined by the size of the yield * of all material preceding <i>t</i> plus all the material * contained in <i>t</i>. */ public static int rightEdge(Tree t, Tree root) { MutableInteger i = new MutableInteger(root.yield().size()); if (rightEdge(t, root, i)) { return i.intValue(); } else { throw new RuntimeException("Tree is not a descendant of root."); // return root.yield().size() + 1; } }
/** * Returns the indices of all imperative verbs in the * tree t. */ private static Set<Integer> getImperatives(Tree t) { Set<Integer> imps = new HashSet<>(); TregexMatcher matcher = IMPERATIVE_PATTERN.matcher(t); while (matcher.find()) { List<Label> verbs = matcher.getMatch().yield(); CoreLabel cl = (CoreLabel) verbs.get(0); imps.add(cl.index()); } return imps; }
@Override public void evaluate(Tree guess, Tree gold, PrintWriter pw, double weight) { List<Label> words = guess.yield(); int pos = 0; for (Label word : words) { ++pos; SimpleMatrix wv = model.getWordVector(word.value()); // would be faster but more implementation-specific if we // removed wv.equals if (wv == unk || wv.equals(unk)) { pw.printf(" Unknown word in position %d: %s%n", pos, word.value()); unkWords.add(word.value()); } } }
private Tree postProcessMWE(Tree t) { String tYield = SentenceUtils.listToString(t.yield()).replaceAll("\\s+", ""); if(tYield.matches("[\\d\\p{Punct}]*")) { List<Tree> kids = new ArrayList<>(); kids.add(treeFactory.newLeaf(tYield)); t = treeFactory.newTreeNode(t.value(), kids); } else { t.setValue(MWE_PHRASAL + t.value()); } return t; }
private ObservedCorpusStats gatherStats(DiskTreebank tb, String name) { ObservedCorpusStats ocs = new ObservedCorpusStats(name); if(makeVocab) trainVocab = Generics.newHashSet(); System.out.println("Reading treebank:"); for(Tree t : tb) { Pair<Integer,Integer> treeFacts = dissectTree(t, ocs, makeVocab); ocs.addStatsForTree(t.yield().size(), treeFacts.first(), treeFacts.second()); if(ocs.numTrees % 100 == 0) System.out.print("."); else if(ocs.numTrees % 8001 == 0) System.out.println(); } ocs.computeFinalValues(); System.out.println("done!"); return ocs; }
private static void addLemmata(Tree tree) { tree.yield().forEach(l -> { CoreLabel w = (CoreLabel) l; if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
public TreeFromFile(Tree t) { this.treeString = t.toString(); sentence = SentenceUtils.listToString(t.yield()); if(t.label() instanceof HasIndex) { sentId = ((CoreLabel)t.label()).sentIndex(); filename = ((CoreLabel)t.label()).docID(); if(sentId != -1 && filename != null && !filename.equals("")) sentence = String.format("%s-%d %s", filename,sentId,sentence); } }
public static List<Tree> getTrees(String path, int low, int high, int minLength, int maxLength) { Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new WordFactory()), new BobChrisTreeNormalizer())); treebank.loadPath(path, new NumberRangeFileFilter(low, high, true)); List<Tree> trees = new ArrayList<>(); for (Tree tree : treebank) { if (tree.yield().size() <= maxLength && tree.yield().size() >= minLength) { trees.add(tree); } } return trees; }
static boolean leftEdge(Tree t, Tree t1, MutableInteger i) { if (t == t1) { return true; } else if (t1.isLeaf()) { int j = t1.yield().size(); // so that empties don't add size i.set(i.intValue() + j); return false; } else { for (Tree kid : t1.children()) { if (leftEdge(t, kid, i)) { return true; } } return false; } }
Tree prune(Tree tree, int start) { if (tree.isLeaf() || tree.isPreTerminal()) { return tree; } // check each node's children for deletion List<Tree> children = helper(tree.getChildrenAsList(), start); children = prune(children, tree.label(), start, start + tree.yield().size()); return tree.treeFactory().newTreeNode(tree.label(), children); }