Refine search
public Tree arabicAoverAFilter(Tree t) { if(t == null || t.isLeaf() || t.isPreTerminal()) return t; //Specific nodes to filter out if(t.numChildren() == 1) { final Tree fc = t.firstChild(); //A over A nodes i.e. from BobChrisTreeNormalizer if(t.label() != null && fc.label() != null && t.value().equals(fc.value())) { t.setChildren(fc.children()); } } for(Tree kid : t.getChildrenAsList()) arabicAoverAFilter(kid); return t; }
private int headEmbeddingLevel(Tree tree, int headIndex) { int embeddingLevel = 0; try { Tree subtree = tree.getLeaves().get(headIndex); while (subtree != null) { String label = ((CoreLabel) subtree.label()).get(CoreAnnotations.ValueAnnotation.class); subtree = subtree.ancestor(1, tree); if (label.equals("NP")) { embeddingLevel++; } } } catch (Exception e) { return -1; } return embeddingLevel; }
/** * TODO: make this method do something with the weight */ @Override public void train(Tree tree, double weight) { trainingSentences.add(tree.taggedYield()); }
boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC")))); if (suppressIndent) { pw.print(" "); pw.println(); pw.print(" "); if (isLeaf() || isPreTerminal()) { String terminalString = toStringBuilder(new StringBuilder(), labelFormatter).toString(); pw.print(terminalString); pw.flush(); pw.print("("); pw.print(labelFormatter.apply(label())); boolean parentIsNull = label() == null || label().value() == null; displayChildren(children(), indent + 1, parentIsNull, labelFormatter, pw); pw.print(")"); pw.flush();
for(Tree t : tb) { if(removeBracket) { if(t.value().equals(startSymbol)) { t = t.firstChild(); } else if( ! t.value().equals(startSymbol)) { //Add a bracket if it isn't already there t = tf.newTreeNode(startSymbol, Collections.singletonList(t)); pwo.println(t.toString()); nTrees++; pwo.close(); System.err.printf("Processed %d trees.%n", nTrees);
private static void countTaggings(Treebank tb, final PrintWriter pw) { final TwoDimensionalCounter<String,String> wtc = new TwoDimensionalCounter<>(); tb.apply(tree -> { List<TaggedWord> tags = tree.taggedYield(); for (TaggedWord tag : tags) wtc.incrementCount(tag.word(), tag.tag()); }); for (String key : wtc.firstKeySet()) { pw.print(key); pw.print('\t'); Counter<String> ctr = wtc.getCounter(key); for (String k2 : ctr.keySet()) { pw.print(k2 + '\t' + ctr.getCount(k2) + '\t'); } pw.println(); } }
List<CoreLabel> sentenceAnno = sentence.get(CoreAnnotations.TokensAnnotation.class); Tree sentenceTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); Map<Pair<Integer,Integer>,String> sentenceInfo = Generics.newHashMap(); Set<Tree> sentenceSubTrees = sentenceTree.subTrees(); sentenceTree.setSpans(); Map<Pair<Integer,Integer>,Tree> treeSpanMap = Generics.newHashMap(); Map<Pair<Integer,Integer>,List<Tree>> wordSpanMap = Generics.newHashMap(); IntPair span = ctree.getSpan(); if (span != null) { treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree); wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves()); String head = null; if (mentionTree != null) { head = mentionTree.headTerminal(headFinder).nodeString(); } else if (mention.first.equals(mention.second)) { head = word; pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]); pw.println("");
public void visitTree(Tree t) { if(t == null || t.value().equals("X")) return; t = t.prune(nullFilter, new LabeledScoredTreeFactory()); for(Tree node : t) { if(node.isPreTerminal()) { processPreterminal(node); } } outfile.println(ATBTreeUtils.taggedStringFromTree(t, removeEscapeTokens, wordTagDelim)); if(flatFile != null) { flatFile.println(ATBTreeUtils.flattenTree(t)); } } }
/** * Indented list printing of a tree. The tree is printed in an * indented list notation, with node labels followed by node scores. * String parameters are used rather than integer levels for efficiency. * * @param indent The base {@code String} (normally just spaces) * to print before each line of tree * @param pad The additional {@code String} (normally just more * spaces) to add when going to a deeper level of {@code Tree}. * @param pw The PrintWriter to print the tree to * @param printScores Whether to print the scores (log probs) of tree nodes */ private void indentedListPrint(String indent, String pad, PrintWriter pw, boolean printScores) { StringBuilder sb = new StringBuilder(indent); Label label = label(); if (label != null) { sb.append(label); } if (printScores) { sb.append(" "); sb.append(score()); } pw.println(sb); Tree[] children = children(); String newIndent = indent + pad; for (Tree child : children) { child.indentedListPrint(newIndent, pad, pw, printScores); } }
public void visitTree(Tree t) { if(t == null || t.value().equals("X")) return; if(t.yield().size() > maxLen) return; t = t.prune(nullFilter, tf); t = arabicAoverAFilter(t); if(node.isPreTerminal()) { processPreterminal(node); if(removeDashTags && !node.isLeaf()) node.setValue(tlp.basicCategory(node.value())); if (addRoot && t.value() != null && !t.value().equals("ROOT")) { t = tf.newTreeNode("ROOT", Collections.singletonList(t)); outfile.println(t.toString()); if(flatFile != null) { String flatString = (removeEscapeTokens) ? ATBTreeUtils.unEscape(ATBTreeUtils.flattenTree(t)) : ATBTreeUtils.flattenTree(t); flatFile.println(flatString);
private static void printPunct(Treebank treebank, TreebankLanguagePack tlp, PrintWriter pw) { if (tlp == null) { log.info("The -punct option requires you to specify -tlp"); } else { Predicate<String> punctTagFilter = tlp.punctuationTagAcceptFilter(); for (Tree t : treebank) { List<TaggedWord> tws = t.taggedYield(); for (TaggedWord tw : tws) { if (punctTagFilter.test(tw.tag())) { pw.println(tw); } } } } }
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexLeaves(); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t); mentions.add(m); mentionSpanSet.add(mSpan); } } } /** Extract enumerations (A, B, and C) */
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = enumerationsMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap(); while (matcher.find()) { matcher.getMatch(); Tree m1 = matcher.getNode("m1"); Tree m2 = matcher.getNode("m2"); List<Tree> mLeaves = m1.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1); mLeaves = m2.getLeaves(); beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2); } for(IntPair mSpan : spanToMentionSubTree.keySet()){ if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, mSpan.get(0), mSpan.get(1), dependency, new ArrayList<>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan)); mentions.add(m); mentionSpanSet.add(mSpan); } } }
protected void findHead(CoreMap s, List<Mention> mentions) { Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); tree.indexSpans(0); for (Mention m : mentions){ Tree head = findSyntacticHead(m, tree, sent); m.headIndex = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class)-1; m.headWord = sent.get(m.headIndex); m.headString = m.headWord.get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH); int start = m.headIndex - m.startIndex; if (start < 0 || start >= m.originalSpan.size()) { SieveCoreferenceSystem.logger.warning("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.joinWords(m.originalSpan, " ") + "], head=" + m.headWord); SieveCoreferenceSystem.logger.warning("Setting head string to entire mention"); m.headIndex = m.startIndex; m.headWord = m.originalSpan.size() > 0 ? m.originalSpan.get(0) : sent.get(m.startIndex); m.headString = m.originalSpan.toString(); } } }
for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); tree.setSpans(); List<String[]> sentWords = document.sentenceWordLists.get(i); String label = nerSpan.third(); CoreMap nerChunk = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken+1); nerChunk.set(CoreAnnotations.NamedEntityTagAnnotation.class, label); nerChunk.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)); nerChunks.add(nerChunk); Tree t = getTreeNonTerminal(tree, startToken, endToken, true); if (t.getSpan().getSource() == startToken && t.getSpan().getTarget() == endToken) { nerChunk.set(TreeCoreAnnotations.TreeAnnotation.class, t); if (options.annotateTreeNer) { Label tlabel = t.label(); if (tlabel instanceof CoreLabel) { ((CoreLabel) tlabel).set(NamedEntityAnnotation.class, nerChunk); mention.set(TreeCoreAnnotations.TreeAnnotation.class, t); if (options.annotateTreeCoref) { Label tlabel = t.label(); if (tlabel instanceof CoreLabel) { ((CoreLabel) tlabel).set(CorefMentionAnnotation.class, mention);
static Tree createNode(Tree top, String label, Tree ... children) { CoreLabel headLabel = (CoreLabel) top.label(); CoreLabel production = new CoreLabel(); production.setValue(label); production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); Tree newTop = new LabeledScoredTreeNode(production); for (Tree child : children) { newTop.addChild(child); } return newTop; }