@Override public Tree transformTree(Tree t) { if (t.isLeaf()) return t; String value = t.value(); if (value == null) return t; if (value.equals("sa")) t.setValue("s.a"); return t; } };
@Override protected void processPreterminal(Tree node) { String rawTag = node.value(); if(rawTag.equals("-NONE-")) return; String rawWord = node.firstChild().value().trim(); Matcher left = leftClitic.matcher(rawWord); boolean hasLeft = left.find(); Matcher right = rightClitic.matcher(rawWord); boolean hasRight = right.find(); if(rawTag.equals("PUNC") || !(hasRight || hasLeft)) { node.firstChild().setValue("XSEG"); } else if(hasRight && hasLeft){ node.firstChild().setValue("SEGC"); } else if(hasRight) { node.firstChild().setValue("SEGL"); } else if(hasLeft) { node.firstChild().setValue("SEGR"); } else { throw new RuntimeException("Messy token: " + rawWord); } } }
private Tree postProcessMWE(Tree t) { String tYield = SentenceUtils.listToString(t.yield()).replaceAll("\\s+", ""); if(tYield.matches("[\\d\\p{Punct}]*")) { List<Tree> kids = new ArrayList<>(); kids.add(treeFactory.newLeaf(tYield)); t = treeFactory.newTreeNode(t.value(), kids); } else { t.setValue(MWE_PHRASAL + t.value()); } return t; }
private static boolean includesEmptyNPSubj(Tree t) { if (t == null) { return false; } Tree[] kids = t.children(); if (kids == null) { return false; } boolean foundNullSubj = false; for (Tree kid : kids) { Tree[] kidkids = kid.children(); if (NPSbjPattern.matcher(kid.value()).matches()) { kid.setValue("NP"); if (kidkids != null && kidkids.length == 1 && kidkids[0].value().equals("-NONE-")) { // only set flag, since there are 2 a couple of times (errors) foundNullSubj = true; } } } return foundNullSubj; }
@Override public Tree transformTree(Tree t) { //deal with empty root t.setValue(cleanUpRoot(t.value())); //strips tags stripTag(t); // strip empty nodes return stripEmptyNode(t); }
public static void traverseAndFix(Tree t, Tree parent, TwoDimensionalCounter<String, String> unigramTagger, boolean retainNER) { if(t.isPreTerminal()) { if(t.value().equals(SpanishTreeNormalizer.MW_TAG)) { nMissingPOS++; String pos = inferPOS(t, parent, unigramTagger); if (pos != null) { t.setValue(pos); nFixedPOS++; } } return; } for(Tree kid : t.children()) traverseAndFix(kid, t, unigramTagger, retainNER); // Post-order visit if(t.value().startsWith(SpanishTreeNormalizer.MW_PHRASE_TAG)) { nMissingPhrasal++; String phrasalCat = inferPhrasalCategory(t, retainNER); if (phrasalCat != null) { t.setValue(phrasalCat); nFixedPhrasal++; } } }
private Tree fixNonUnaryRoot(Tree t, TreeFactory tf) { List<Tree> kids = t.getChildrenAsList(); if(kids.size() == 2 && t.firstChild().isPhrasal() && tlp.isSentenceFinalPunctuationTag(t.lastChild().value())) { List<Tree> grandKids = t.firstChild().getChildrenAsList(); grandKids.add(t.lastChild()); t.firstChild().setChildren(grandKids); kids.remove(kids.size() - 1); t.setChildren(kids); t.setValue(tlp.startSymbol()); } else { t.setValue(nonUnaryRoot); t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t)); } return t; }
public static void traverseAndFix(Tree t, TwoDimensionalCounter<String, String> pretermLabel, TwoDimensionalCounter<String, String> unigramTagger) { if(t.isPreTerminal()) { if(t.value().equals(FrenchXMLTreeReader.MISSING_POS)) { nMissingPOS++; String word = t.firstChild().value(); String tag = (unigramTagger.firstKeySet().contains(word)) ? Counters.argmax(unigramTagger.getCounter(word)) : ManualUWModel.getTag(word); t.setValue(tag); } return; } for(Tree kid : t.children()) traverseAndFix(kid,pretermLabel,unigramTagger); //Post-order visit if(t.value().equals(FrenchXMLTreeReader.MISSING_PHRASAL)) { nMissingPhrasal++; StringBuilder sb = new StringBuilder(); for(Tree kid : t.children()) sb.append(kid.value()).append(" "); String posSequence = sb.toString().trim(); if(pretermLabel.firstKeySet().contains(posSequence)) { String phrasalCat = Counters.argmax(pretermLabel.getCounter(posSequence)); t.setValue(phrasalCat); } else { System.out.println("No phrasal cat for: " + posSequence); } } }
protected void processPreterminal(Tree node) { String rawTag = node.value(); String posTag = (posMapper == null) ? rawTag : posMapper.map(rawTag,node.firstChild().value()); String rawWord = node.firstChild().value(); //Hack for LDC2008E22 idiosyncrasy in which (NOUN.VN F) is a pre-terminal/word //This is a bare fathatan that bears no semantic content. Replacing it with the //conjunction ف / f . if(rawWord.equals("F")) { posTag = posTag.equals("NOUN.VN") ? "CONJ" : "CC"; rawWord = "f"; } // Hack for annotation error in ATB if (rawWord.startsWith("MERGE_with_previous_token:")) { rawWord = rawWord.replace("MERGE_with_previous_token:", ""); } // Hack for annotation error in ATB if (rawWord.contains("e")) { rawWord = rawWord.replace("e", ""); } String finalWord = lexMapper.map(rawTag, rawWord); if(lexMapper.canChangeEncoding(rawTag, finalWord)) finalWord = encodingMap.apply(finalWord); node.setValue(posTag); if(morphDelim == null) { node.firstChild().setValue(finalWord); if (node.firstChild().label() instanceof CoreLabel) ((CoreLabel) node.firstChild().label()).setWord(finalWord); } else { node.firstChild().setValue(finalWord + morphDelim + rawTag); } }
protected void stripTag(Tree t) { if ( ! t.isLeaf()) { String label = cleanUpLabel(t.value()); t.setValue(label); for (Tree child : t.getChildrenAsList()) { stripTag(child); } } }
t.setValue(posStr); if(t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr);
node.setValue(s);
/** * This method does language-specific tree transformations such as annotating particular nodes with language-relevant * features. Such parameterizations should be inside the specific TreebankLangParserParams class. This method is * recursively applied to each node in the tree (depth first, left-to-right), so you shouldn't write this method to * apply recursively to tree members. This method is allowed to (and in some cases does) destructively change the * input tree <code>t</code>. It changes both labels and the tree shape. * * @param t The input tree (with non-language specific annotation already done, so you need to strip back to basic * categories) * @param root The root of the current tree (can be null for words) * @return The fully annotated tree node (with daughters still as you want them in the final result) */ @Override public Tree transformTree(Tree t, Tree root) { String newCat = t.value() + getAnnotationString(t, root); t.setValue(newCat); if (t.isPreTerminal() && t.label() instanceof HasTag) ((HasTag) t.label()).setTag(newCat); return t; }
@Override public Tree transformTree(Tree t, Tree root) { String baseCat = t.value(); StringBuilder newCategory = new StringBuilder(); //Add manual state splits for (Pair<TregexPattern,Function<TregexMatcher,String>> e : activeAnnotations) { TregexMatcher m = e.first().matcher(root); if (m.matchesAt(t)) newCategory.append(e.second().apply(m)); } // WSGDEBUG //Add morphosyntactic features if this is a POS tag if(t.isPreTerminal() && tagSpec != null) { if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null ) throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString())); String morphoStr = ((CoreLabel) t.firstChild().label()).originalText(); MorphoFeatures feats = tagSpec.strToFeatures(morphoStr); baseCat = feats.getTag(baseCat); } //Update the label(s) String newCat = baseCat + newCategory.toString(); t.setValue(newCat); if (t.isPreTerminal() && t.label() instanceof HasTag) ((HasTag) t.label()).setTag(newCat); return t; }
/** * Perform (possibly destructive) operations on the tree. Do a top-down DFS on the tree. */ public void visitTree(Tree tree) { if (tree == null) return; String yield = SentenceUtils.listToString(tree.yield()); if (mweDictionary.contains(yield)) { List<Tree> children = getPreterminalSubtrees(tree); String newLabel = "MW" + tree.value(); tree.setValue(newLabel); tree.setChildren(children); // Bottom out of the recursion return; } else { for (Tree subTree : tree.children()) { if (subTree.isPhrasal()) { // Only phrasal trees can have yields > 1!! visitTree(subTree); } } } }
node.setValue(tlp.basicCategory(node.value()));
@Override public Tree transformTree(Tree t, Tree root) { // Perform tregex-powered annotations t = super.transformTree(t, root); String cat = t.value(); //Add morphosyntactic features if this is a POS tag if(t.isPreTerminal() && tagSpec != null) { if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null ) throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString())); String morphoStr = ((CoreLabel) t.firstChild().label()).originalText(); Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString("", morphoStr); MorphoFeatures feats = tagSpec.strToFeatures(lemmaMorph.second()); cat = feats.getTag(cat); } //Update the label(s) t.setValue(cat); if (t.isPreTerminal() && t.label() instanceof HasTag) ((HasTag) t.label()).setTag(cat); return t; }
if(t.value() == null || t.value().equals("")) t.setValue("DUMMY");
node.setValue(s); node.setScore(tree.score()); if(node.label() instanceof HasTag)
t.setValue(phraseValue); } else { Tree newHead = tf.newTreeNode(phraseValue, newNodes);