/** * Replace the labels of the leaves with the given leaves. */ public static void setLeafLabels(Tree tree, List<Label> labels) { Iterator<Tree> leafIterator = tree.getLeaves().iterator(); Iterator<Label> labelIterator = labels.iterator(); while (leafIterator.hasNext() && labelIterator.hasNext()) { Tree leaf = leafIterator.next(); Label label = labelIterator.next(); leaf.setLabel(label); //leafIterator.next().setLabel(labelIterator.next()); } if (leafIterator.hasNext()) { throw new IllegalArgumentException("Tree had more leaves than the labels provided"); } if (labelIterator.hasNext()) { throw new IllegalArgumentException("More labels provided than tree had leaves"); } }
@Override public void restoreOriginalWords(Tree tree) { if (originalSentence == null || tree == null) { return; } List<Tree> leaves = tree.getLeaves(); int expectedSize = addedPunct ? originalSentence.size() + 1 : originalSentence.size(); if (leaves.size() != expectedSize) { throw new IllegalStateException("originalWords and sentence of different sizes: " + expectedSize + " vs. " + leaves.size() + "\n Orig: " + SentenceUtils.listToString(originalSentence) + "\n Pars: " + SentenceUtils.listToString(leaves)); } Iterator<Tree> leafIterator = leaves.iterator(); for (HasWord word : originalSentence) { Tree leaf = leafIterator.next(); if (!(word instanceof Label)) { continue; } leaf.setLabel((Label) word); } }
/** * TODO: clearly this should be a default method in ParserQuery once Java 8 comes out */ @Override public void restoreOriginalWords(Tree tree) { if (originalSentence == null || tree == null) { return; } List<Tree> leaves = tree.getLeaves(); if (leaves.size() != originalSentence.size()) { throw new IllegalStateException("originalWords and sentence of different sizes: " + originalSentence.size() + " vs. " + leaves.size() + "\n Orig: " + SentenceUtils.listToString(originalSentence) + "\n Pars: " + SentenceUtils.listToString(leaves)); } // TODO: get rid of this cast Iterator<? extends Label> wordsIterator = (Iterator<? extends Label>) originalSentence.iterator(); for (Tree leaf : leaves) { leaf.setLabel(wordsIterator.next()); } }
/** * Sets the label of the leaf nodes of a Tree to be the CoreLabels in the given sentence. * The original value() of the Tree nodes is preserved, and otherwise the label of tree * leaves becomes the label from the List. */ // todo [cdm 2015]: This clearly shouldn't be here! Maybe it's not needed at all now since parsing code does this? public static void mergeLabels(Tree tree, List<CoreLabel> sentence) { int idx = 0; for (Tree t : tree.getLeaves()) { CoreLabel cl = sentence.get(idx ++); String value = t.value(); cl.set(CoreAnnotations.ValueAnnotation.class, value); t.setLabel(cl); } tree.indexLeaves(); }
t.setLabel(new CategoryWordTag(catSB.toString(), word, tag)); return t;
t.setLabel(new CategoryWordTag(catSB.toString(), word, tag)); return t;
private static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if (! (l instanceof CoreLabel)) { CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
private static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if (! (l instanceof CoreLabel)) { CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
/** * Changes the ROOT label, and adds a Lexicon.BOUNDARY daughter to it. * This is needed for the dependency parser. * <i>Note:</i> This is a destructive operation on the tree passed in!! * * @param t The current tree into which a boundary is inserted */ public void addRoot(Tree t) { if (t.isLeaf()) { log.info("Warning: tree is leaf: " + t); t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t)); } t.setLabel(new CategoryWordTag(tlp.startSymbol(), Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG)); List<Tree> preTermChildList = new ArrayList<>(); Tree boundaryTerm = tf.newLeaf(new Word(Lexicon.BOUNDARY));//CategoryWordTag(Lexicon.BOUNDARY,Lexicon.BOUNDARY,"")); preTermChildList.add(boundaryTerm); Tree boundaryPreTerm = tf.newTreeNode(new CategoryWordTag(Lexicon.BOUNDARY_TAG, Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG), preTermChildList); List<Tree> childList = t.getChildrenAsList(); childList.add(boundaryPreTerm); t.setChildren(childList); }
public Tree treeSkeletonConstituentCopy(TreeFactory tf, LabelFactory lf) { if (isLeaf()) { // Reuse the current label for a leaf. This way, trees which // are based on tokens in a sentence can have the same tokens // even after a "deep copy". // TODO: the LabeledScoredTreeFactory copies the label for a new // leaf. Perhaps we could add a newLeafNoCopy or something like // that for efficiency. Tree newLeaf = tf.newLeaf(label()); newLeaf.setLabel(label()); return newLeaf; } Label label = lf.newLabel(label()); Tree[] kids = children(); List<Tree> newKids = new ArrayList<>(kids.length); for (Tree kid : kids) { newKids.add(kid.treeSkeletonConstituentCopy(tf, lf)); } return tf.newTreeNode(label, newKids); }
/** * Converts the tree labels to CoreLabels. * We need this because we store additional info in the CoreLabel, like token span. * @param tree */ public static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if(! (l instanceof CoreLabel)){ CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
/** * Converts the tree labels to CoreLabels. * We need this because we store additional info in the CoreLabel, like token span. * @param tree */ public static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if (!(l instanceof CoreLabel)) { CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
subtree.setLabel(new StringLabel("")); } else if (lab.value() == null) { subtree.label().setValue("");
t.setLabel(lf.newLabel(t.label().value() + "-G")); ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } while (!ht.isPreTerminal()); ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht = headFinder.determineHead(ht); kid.setLabel(lf.newLabel(kid.value() + "-TMP")); ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; ht.setLabel(lf.newLabel(ht.value() + "-TMP")); oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value()))); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht.setLabel(lf.newLabel(ht.label().value() + "-ADV")); oldT = ht;
if (translation == null) translation = "[UNK]"; Label newLabel = new StringLabel(oldLabel.value() + ':' + translation); subtree.setLabel(newLabel);
Tree binarizeLocalTree(Tree t, int headNum, TaggedWord head) { //System.out.println("Working on: "+headNum+" -- "+t.label()); if (markovFactor) { String topCat = t.label().value(); Label newLabel = new CategoryWordTag(topCat, head.word(), head.tag()); t.setLabel(newLabel); Tree t2; if (insideFactor) { t2 = markovInsideBinarizeLocalTreeNew(t, headNum, 0, t.numChildren() - 1, true); // t2 = markovInsideBinarizeLocalTree(t, head, headNum, topCat, false); } else { t2 = markovOutsideBinarizeLocalTree(t, head, headNum, topCat, new LinkedList<>(), false); } if (DEBUG) { CategoryWordTag.printWordTag = false; StringBuilder sb1 = new StringBuilder(); localTreeString(t, sb1, 0); StringBuilder sb2 = new StringBuilder(); localTreeString(t2, sb2, 0); System.out.println("Old Local Tree: " + sb1); System.out.println("New Local Tree: " + sb2); CategoryWordTag.printWordTag = true; } return t2; } if (insideFactor) { return insideBinarizeLocalTree(t, headNum, head, 0, 0); } return outsideBinarizeLocalTree(t, t.label().value(), t.label().value(), headNum, head, 0, "", 0, ""); }
private static int markStrahler(Tree t) { if (t.isLeaf()) { // don't annotate the words at leaves! return 1; } else { String cat = t.label().value(); int maxStrahler = -1; int maxMultiplicity = 0; for (int i = 0; i < t.numChildren(); i++) { int strahler = markStrahler(t.getChild(i)); if (strahler > maxStrahler) { maxStrahler = strahler; maxMultiplicity = 1; } else if (strahler == maxStrahler) { maxMultiplicity++; } } if (maxMultiplicity > 1) { maxStrahler++; // this is the one case where it grows } cat = cat + '~' + maxStrahler; Label label = t.label().labelFactory().newLabel(t.label()); label.setValue(cat); t.setLabel(label); return maxStrahler; } }