/** * Normalize a whole tree -- one can assume that this is the * root. This implementation deletes empty elements (ones with * nonterminal tag label '-NONE-') from the tree, and splices out * unary A over A nodes. It assumes that it is not given a * null tree, but it may return one if there are no real words. */ @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { Tree middle = tree.prune(emptyFilter, tf); if (middle == null) { return null; } else { return middle.spliceOut(aOverAFilter, tf); } }
/** * Creates a deep copy of the tree, where all nodes that the filter * does not accept and all children of such nodes are pruned. If all * of a node's children are pruned, that node is cut as well. * A {@code Filter} can assume * that it will not be called with a {@code null} argument. * <p/> * For example, the following code excises all PP nodes from a Tree: <br> * <tt> * Filter<Tree> f = new Filter<Tree> { <br> * public boolean accept(Tree t) { <br> * return ! t.label().value().equals("PP"); <br> * } <br> * }; <br> * tree.prune(f); * </tt> <br> * * If the root of the tree is pruned, null will be returned. * * @param filter the filter to be applied * @return a filtered copy of the tree, including the possibility of * {@code null} if the root node of the tree is filtered */ public Tree prune(final Predicate<Tree> filter) { return prune(filter, treeFactory()); }
/** * Returns the string associated with the input parse tree. Traces and * ATB-specific escape sequences (e.g., "-RRB-" for ")") are removed. * * @param t - A parse tree * @return The yield of the input parse tree */ public static String flattenTree(Tree t) { t = t.prune(emptyFilter, tf); String flatString = SentenceUtils.listToString(t.yield()); return flatString; }
public void visitTree(Tree t) { if(t == null || t.value().equals("X")) return; t = t.prune(nullFilter, new LabeledScoredTreeFactory()); //Do *not* strip traces here. The ArabicTreeReader will do that if needed for(Tree node : t) if(node.isPreTerminal()) processPreterminal(node); treesVisited++; String flatString = (makeFlatFile) ? ATBTreeUtils.flattenTree(t) : null; //Do the decimation if(treesVisited % 9 == 0) { write(t, outFiles.get(devExtension)); if(makeFlatFile) outFiles.get(devExtension + flatExtension).println(flatString); } else if(treesVisited % 10 == 0) { write(t, outFiles.get(testExtension)); if(makeFlatFile) outFiles.get(testExtension + flatExtension).println(flatString); } else { write(t, outFiles.get(trainExtension)); if(makeFlatFile) outFiles.get(trainExtension + flatExtension).println(flatString); } }
public void visitTree(Tree t) { if(t == null || t.value().equals("X")) return; t = t.prune(nullFilter, new LabeledScoredTreeFactory()); for(Tree node : t) { if(node.isPreTerminal()) { processPreterminal(node); } } outfile.println(ATBTreeUtils.taggedStringFromTree(t, removeEscapeTokens, wordTagDelim)); if(flatFile != null) { flatFile.println(ATBTreeUtils.flattenTree(t)); } } }
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(hebrewEmptyFilter, tf).spliceOut(aOverAFilter, tf); //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets. //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree. while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild(); if(tree != null && !tree.value().equals(tlp.startSymbol())) tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree)); return tree; }
Tree[] kids = children(); for (Tree kid : kids) { Tree prunedChild = kid.prune(filter, tf); if (prunedChild != null) { l.add(prunedChild);
/** * Split the given tree based on a split point such that the * terminals leading up to the split point are in the left returned * tree and those following the split point are in the left returned * tree. * * AnCora contains a nontrivial amount of trees with multiple * sentences in them. This method is used to break apart these * sentences into separate trees. * * @param t Tree from which to extract a subtree. This may be * modified during processing. * @param splitPoint Point up to which to extract. If {@code null}, * {@code t} is returned unchanged in the place of * the right tree. * @return A pair where the left tree contains every terminal leading * up to and including {@code splitPoint} and the right tree * contains every terminal following {@code splitPoint}. * Both trees may be normalized before return. */ static Pair<Tree, Tree> split(Tree t, Tree splitPoint) { if (splitPoint == null) return new Pair<>(t, null); Tree left = t.prune(new LeftOfFilter(splitPoint, t)); Tree right = t.prune(new RightOfExclusiveFilter(splitPoint, t)); left = splittingNormalizer.normalizeWholeTree(left, splittingTreeFactory); right = splittingNormalizer.normalizeWholeTree(right, splittingTreeFactory); return new Pair<>(left, right); }
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
t = t.prune(nullFilter, tf); t = arabicAoverAFilter(t);
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
/** * Converts a parse tree into a string of tokens. Each token is a word and * its POS tag separated by the delimiter specified by <code>separator</code> * * @param t - A parse tree * @param removeEscaping - If true, remove LDC escape characters. Otherwise, leave them. * @param separator Word/tag separator * @return A string of tagged words */ public static String taggedStringFromTree(Tree t, boolean removeEscaping, String separator) { t = t.prune(emptyFilter, tf); List<CoreLabel> taggedSentence = t.taggedLabeledYield(); for (CoreLabel token : taggedSentence) { String word = (removeEscaping) ? unEscape(token.word()) : token.word(); token.setWord(word); token.setValue(word); } return SentenceUtils.listToString(taggedSentence, false, separator); }
tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree)); tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
outputTree = outputTree.prune(new BobChrisTreeNormalizer.EmptyFilter());
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(emptyFilter).spliceOut(aOverAFilter) .transform(constituentRenamer);
tree = tree.prune(subtreeFilter, tf); if (tree == null) { return null; } tree = tree.spliceOut(nodeFilter, tf);
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { Tree newTree = tree.prune(chineseEmptyFilter, tf).spliceOut(aOverAFilter);
/** * Returns the string associated with the input parse tree. Traces and * ATB-specific escape sequences (e.g., "-RRB-" for ")") are removed. * * @param t - A parse tree * @return The yield of the input parse tree */ public static String flattenTree(Tree t) { t = t.prune(emptyFilter, tf); String flatString = SentenceUtils.listToString(t.yield()); return flatString; }
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(hebrewEmptyFilter, tf).spliceOut(aOverAFilter, tf); //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets. //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree. while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild(); if(tree != null && !tree.value().equals(tlp.startSymbol())) tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree)); return tree; }
@Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = tree.prune(hebrewEmptyFilter, tf).spliceOut(aOverAFilter, tf); //Add start symbol so that the root has only one sub-state. Escape any enclosing brackets. //If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree. while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild(); if(tree != null && !tree.value().equals(tlp.startSymbol())) tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree)); return tree; }