Refine search
public boolean test(Tree t) { return ! (t.isPreTerminal() && t.value().equals("-NONE-")); } }
private static String localTreeToRule(Tree localTree) { StringBuilder sb = new StringBuilder(); sb.append(localTree.value()).append(" -> "); for (int i = 0; i < localTree.children().length - 1; i++) { sb.append(localTree.children()[i].value()).append(" "); } sb.append(localTree.children()[localTree.children().length - 1].value()); return sb.toString(); }
public String apply(TregexMatcher m) { if(mwCounter == null) throw new RuntimeException("Cannot enable POSSequence features without POS sequence map. Use option -frenchMWMap."); Tree t = m.getMatch(); StringBuilder sb = new StringBuilder(); for(Tree kid : t.children()) { if( ! kid.isPreTerminal()) throw new RuntimeException("Not POS sequence for tree: " + t.toString()); String tag = doBasicCat ? tlp.basicCategory(kid.value()) : kid.value(); sb.append(tag).append(" "); } if(mwCounter.getCount(t.value(), sb.toString().trim()) > cutoff) return annotationMark + sb.toString().replaceAll("\\s+", "").toLowerCase(); else return ""; }
public Tree arabicAoverAFilter(Tree t) { if(t == null || t.isLeaf() || t.isPreTerminal()) return t; //Specific nodes to filter out if(t.numChildren() == 1) { final Tree fc = t.firstChild(); //A over A nodes i.e. from BobChrisTreeNormalizer if(t.label() != null && fc.label() != null && t.value().equals(fc.value())) { t.setChildren(fc.children()); } } for(Tree kid : t.getChildrenAsList()) arabicAoverAFilter(kid); return t; }
/** Doesn't accept nodes that are A over A nodes (perhaps due to * empty removal or are EDITED nodes). * * Also removes all w nodes. */ public boolean test(Tree t) { if(t.value() != null && t.value().equals("w")) return false; if (t.isLeaf() || t.isPreTerminal()) return true; return ! (t.label() != null && t.label().value() != null && t.label().value().equals(t.getChild(0).label().value())); } }
private static void inferParentParseTag(Tree tree) { String grandstr = tree.value(); for (Tree child : tree.children()) { for (Tree grand : child.children()) { if (grand.isLeaf()) { ((CoreLabel) grand.label()).set(CoreAnnotations.GrandparentAnnotation.class, grandstr); } } inferParentParseTag(child); } }
private static Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) { // log.info("Searching " + root + "\n for " + token + " at position " + index + " (plus up to " + approximateness + ")"); List<Tree> leaves = root.getLeaves(); for (Tree leaf : leaves) { CoreLabel label = CoreLabel.class.cast(leaf.label()); Integer indexInteger = label.get(CoreAnnotations.IndexAnnotation.class); if (indexInteger == null) continue; int ind = indexInteger - 1; if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) { return leaf; } } // this shouldn't happen // throw new RuntimeException("RuleBasedCorefMentionFinder: ERROR: Failed to find head token"); SieveCoreferenceSystem.logger.warning("RuleBasedCorefMentionFinder: Failed to find head token:\n" + "Tree is: " + root + "\n" + "token = |" + token + "|" + index + "|, approx=" + approximateness); for (Tree leaf : leaves) { if (token.equals(leaf.value())) { //log.info("Found something: returning " + leaf); return leaf; } } int fallback = Math.max(0, leaves.size() - 2); SieveCoreferenceSystem.logger.warning("RuleBasedCorefMentionFinder: Last resort: returning as head: " + leaves.get(fallback)); return leaves.get(fallback); // last except for the added period. }
private static boolean vpContainsParticiple(Tree t) { for (Tree kid : t.children()) { if (DEBUG) { log.info("vpContainsParticiple examining " + kid); } if (kid.isPreTerminal()) { Label kidLabel = kid.label(); String tag = null; if (kidLabel instanceof HasTag) { tag = ((HasTag) kidLabel).tag(); } if (tag == null) { tag = kid.value(); } if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) { if (DEBUG) { log.info("vpContainsParticiple found VBN/VBG/VBD VP"); } return true; } } } return false; }
/** * Predicatively marked elements in a sentence should be noted as heads */ @Override protected Tree findMarkedHead(Tree t) { String cat = t.value(); if (cat.equals("S")) { Tree[] kids = t.children(); for (Tree kid : kids) { if (predPattern.matcher(kid.value()).matches()) { return kid; } } } return null; }
/** * This method does language-specific tree transformations such as annotating particular nodes with language-relevant * features. Such parameterizations should be inside the specific TreebankLangParserParams class. This method is * recursively applied to each node in the tree (depth first, left-to-right), so you shouldn't write this method to * apply recursively to tree members. This method is allowed to (and in some cases does) destructively change the * input tree <code>t</code>. It changes both labels and the tree shape. * * @param t The input tree (with non-language specific annotation already done, so you need to strip back to basic * categories) * @param root The root of the current tree (can be null for words) * @return The fully annotated tree node (with daughters still as you want them in the final result) */ @Override public Tree transformTree(Tree t, Tree root) { String newCat = t.value() + getAnnotationString(t, root); t.setValue(newCat); if (t.isPreTerminal() && t.label() instanceof HasTag) ((HasTag) t.label()).setTag(newCat); return t; }
String motherCat = tlp.basicCategory(t.label().value()); Tree[] kids = parent.children(); if (!kid.value().equals("VP")) { List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { Tree[] kids = parent.children(); if (!kid.value().startsWith("VB")) {//not necessary to look into the verb List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) {
public static void findKnownStates(Tree tree, Set<String> knownStates) { if (tree.isLeaf() || tree.isPreTerminal()) { return; } if (!ShiftReduceUtils.isTemporary(tree)) { knownStates.add(tree.value()); } for (Tree child : tree.children()) { findKnownStates(child, knownStates); } }
private Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) { logger.fine("Looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); List<Tree> leaves = root.getLeaves(); for (Tree leaf : leaves) { CoreLabel label = CoreLabel.class.cast(leaf.label()); int ind = label.get(CoreAnnotations.BeginIndexAnnotation.class); // log.info("Token #" + ind + ": " + leaf.value()); if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) { return leaf; } } // this shouldn't happen // but it does happen (VERY RARELY) on some weird web text that includes SGML tags with spaces // TODO: does this mean that somehow tokenization is different for the parser? check this by throwing an Exception in KBP logger.severe("GenericDataSetReader: WARNING: Failed to find head token"); logger.severe(" when looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); return null; }
private boolean shouldSkip(Tree t, boolean origWasInterjection) { return t.isPreTerminal() && (tlp.isPunctuationTag(t.value()) || ! origWasInterjection && "UH".equals(t.value())) || "INTJ".equals(t.value()) && ! origWasInterjection; }
/** * Given a tree, set the tags on the leaf nodes if they are not * already set. Do this by using the preterminal's value as a tag. */ public static void setLeafTagsIfUnset(Tree tree) { if (tree.isPreTerminal()) { Tree leaf = tree.children()[0]; if (!(leaf.label() instanceof HasTag)) { return; } HasTag label = (HasTag) leaf.label(); if (label.tag() == null) { label.setTag(tree.value()); } } else { for (Tree child : tree.children()) { setLeafTagsIfUnset(child); } } }
if(t.isPreTerminal()) { String posStr = normalizePreterminal(t); t.setValue(posStr); if(t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr); if(t.value().contains(MorphoFeatureSpecification.MORPHO_MARK)) { String[] toks = t.value().split(MorphoFeatureSpecification.MORPHO_MARK); if(toks.length != 2) System.err.printf("%s: Word contains malformed morph annotation: %s%n",this.getClass().getName(),t.value()); else if(t.label() instanceof CoreLabel) { ((CoreLabel) t.label()).setValue(toks[0].trim().intern()); ((CoreLabel) t.label()).setWord(toks[0].trim().intern()); while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild(); if(tree != null && !tree.value().equals(rootLabel)) tree = tf.newTreeNode(rootLabel, Collections.singletonList(tree));
String motherCat = tlp.basicCategory(t.label().value()); Tree[] kids = parent.children(); if (!kid.value().equals("VP")) { List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { Tree[] kids = parent.children(); if (!kid.value().startsWith("VB")) {//not necessary to look into the verb List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) {
private void searchRulesForBatch(TwoDimensionalSet<String, String> binaryRules, Set<String> unaryRules, Set<String> words, Tree tree) { if (tree.isLeaf()) { return; } if (tree.isPreTerminal()) { words.add(getVocabWord(tree.children()[0].value())); return; } Tree[] children = tree.children(); if (children.length == 1) { unaryRules.add(basicCategory(children[0].value())); searchRulesForBatch(binaryRules, unaryRules, words, children[0]); } else if (children.length == 2) { binaryRules.add(basicCategory(children[0].value()), basicCategory(children[1].value())); searchRulesForBatch(binaryRules, unaryRules, words, children[0]); searchRulesForBatch(binaryRules, unaryRules, words, children[1]); } else { throw new AssertionError("Expected a binarized tree"); } }