/** For printing tree in a better format */ private static String formatPennTree(Tree parseTree) { String treeString = parseTree.pennString(); treeString = treeString.replaceAll("\\[TextAnnotation=", ""); treeString = treeString.replaceAll("(NamedEntityTag|Value|Index|PartOfSpeech)Annotation.+?\\)", ")"); treeString = treeString.replaceAll("\\[.+?\\]", ""); return treeString; }
protected static String exportString(JComponent c) { if (c instanceof ScrollableTreeJPanel) { ScrollableTreeJPanel tjp = (ScrollableTreeJPanel) c; return tjp.getTree().pennString(); } return ""; }
/** * Returns all currently displayed matches in string buffer, penn treebank form * (suitable for writing out, for instance) * * @return String filled with the Penn treebank forms of all trees in the matches panel */ public String getMatches() { StringBuilder sb = new StringBuilder(); for(int i = 0, sz = list.getModel().getSize(); i < sz; i++) { Tree t = ((TreeFromFile) list.getModel().getElementAt(i)).getTree(); sb.append(t.pennString()); sb.append("\n\n"); } return sb.toString(); }
public static boolean isPleonasticDebug(Mention m, Tree tree, StringBuilder sbLog) { if ( ! m.spanToString().equalsIgnoreCase("it")) return false; boolean isPleonastic = false; int patternIdx = -1; int matchedPattern = -1; for (TregexPattern p : pleonasticPatterns) { patternIdx++; if (checkPleonastic(m, tree, p)) { // SieveCoreferenceSystem.logger.fine("RuleBasedCorefMentionFinder: matched pleonastic pattern '" + p + "' for " + tree); isPleonastic = true; matchedPattern = patternIdx; } } sbLog.append("PLEONASTIC IT: mention ID: "+m.mentionID +"\thastwin: "+m.hasTwin+"\tpleonastic it? "+isPleonastic+"\tcorrect? "+(m.hasTwin!=isPleonastic)+"\tmatched pattern: "+matchedPattern+"\n"); sbLog.append(m.contextParseTree.pennString()).append("\n"); sbLog.append("PLEONASTIC IT END\n"); return isPleonastic; }
private Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) { logger.fine("Looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); List<Tree> leaves = root.getLeaves(); for (Tree leaf : leaves) { CoreLabel label = CoreLabel.class.cast(leaf.label()); int ind = label.get(CoreAnnotations.BeginIndexAnnotation.class); // log.info("Token #" + ind + ": " + leaf.value()); if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) { return leaf; } } // this shouldn't happen // but it does happen (VERY RARELY) on some weird web text that includes SGML tags with spaces // TODO: does this mean that somehow tokenization is different for the parser? check this by throwing an Exception in KBP logger.severe("GenericDataSetReader: WARNING: Failed to find head token"); logger.severe(" when looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString()); return null; }
if(g.sentenceWords!=null) if(g.sentenceWords.size() > g.endIndex) sbLog.append("\tnextword: ").append(g.sentenceWords.get(g.endIndex)).append("\t").append(g.sentenceWords.get(g.endIndex).tag()).append("\n"); if(g.contextParseTree!=null) sbLog.append(g.contextParseTree.pennString()).append("\n\n"); else sbLog.append("\n\n"); if(sentences.get(i).get(TreeAnnotation.class)!=null) sbLog.append("\n\tparse: \n").append(sentences.get(i).get(TreeAnnotation.class).pennString()); sbLog.append("\n\tcollapsedDependency: \n").append(sentences.get(i).get(BasicDependenciesAnnotation.class));
/** * @param args */ public static void main(String[] args) { if(args.length != 2) { System.out.println("Usage: java Tdiff tree1 tree2"); return; } File tree1Path = new File(args[0]); File tree2Path = new File(args[1]); try { TreeReaderFactory trf = new LabeledScoredTreeReaderFactory(); TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path))); TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path))); Tree t1 = tR1.readTree(); Tree t2 = tR2.readTree(); Set<Constituent> t1Diff = markDiff(t1,t2); System.out.println(t2.pennString()); System.out.println(); for(Constituent c : t1Diff) System.out.println(c); } catch (FileNotFoundException e) { log.info("File not found!"); } catch (IOException e) { log.info("Unable to read file!"); } }
+"\t\tfoundAnt? "+foundCorefAnt+"\t\tcorrectDecision? "+correctDecision); sb.append("\n\ttype: "+m.mentionType+"\tHeadword: "+m.headWord.word()+"\tNEtype: "+m.nerString+"\tnumber: "+m.number+"\tgender: "+m.gender+"\tanimacy: "+m.animacy).append("\n"); if(m.contextParseTree!=null) sb.append(m.contextParseTree.pennString());
+"\t\tfoundAnt? "+foundCorefAnt+"\t\tcorrectDecision? "+correctDecision+"\tbarePlural? "+barePlural); sb.append("\n\ttype: "+m.mentionType+"\tHeadword: "+m.headWord.word()+"\tNEtype: "+m.nerString+"\tnumber: "+m.number+"\tgender: "+m.gender+"\tanimacy: "+m.animacy).append("\n"); if(m.contextParseTree!=null) sb.append(m.contextParseTree.pennString());
logger.fine("Index spans were NOT generated."); logger.fine("Parse tree using CoreLabel:\n" + tree.pennString());
+ "Tree: " + ((t == null) ? "null" : t.pennString()));
while (matcher.find()) { sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { matchWriter.set("match", matcher.getMatch().pennString()); matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) -> namedNodeWriter.set(nodeName, matcher.getNode(nodeName).pennString()) )); });
List<ParserConstraint> constraints = Collections.singletonList(constraint); Tree tree = parse(extentTokens, constraints); logger.fine("No exact match found. Local parse:\n" + tree.pennString()); convertToCoreLabels(tree);
log.warn(String.format("%s: missing tag for %s",this.getClass().getName(),t.pennString())); } else if(t.label() instanceof HasTag) { ((HasTag) t.label()).setTag(t.value()); log.warn(String.format("%s: Bare tagged word being wrapped in FRAG %s", this.getClass().getName(),tree.pennString())); tree = tf.newTreeNode("FRAG", Collections.singletonList(tree)); } else { log.warn(String.format("%s: Bare tagged word %s", this.getClass().getName(), tree.pennString()));
logger.fine("No exact match found. Local parse:\n" + tree.pennString()); convertToCoreLabels(tree); tree.indexSpans(ent.getExtentTokenStart());
log.info("WARNING: CollinsDependency.extractFromTree() could not find root for:\n" + node.pennString()); log.info(t.pennString()); log.info(); int num = 0;
/** For printing tree in a better format */ private static String formatPennTree(Tree parseTree) { String treeString = parseTree.pennString(); treeString = treeString.replaceAll("\\[TextAnnotation=", ""); treeString = treeString.replaceAll("(NamedEntityTag|Value|Index|PartOfSpeech)Annotation.+?\\)", ")"); treeString = treeString.replaceAll("\\[.+?\\]", ""); return treeString; }
/** For printing tree in a better format */ public static String formatPennTree(Tree parseTree) { String treeString = parseTree.pennString(); treeString = treeString.replaceAll("\\[TextAnnotation=", ""); treeString = treeString.replaceAll("(NamedEntityTag|Value|Index|PartOfSpeech)Annotation.+?\\)", ")"); treeString = treeString.replaceAll("\\[.+?\\]", ""); return treeString; }
protected static String exportString(JComponent c) { if (c instanceof ScrollableTreeJPanel) { ScrollableTreeJPanel tjp = (ScrollableTreeJPanel) c; return tjp.getTree().pennString(); } return ""; }
public void tagPOS(List<CoreLabel> tokens, Tree tree) { try { List<TaggedWord> posList = tree.getChild(0).taggedYield(); for (int i = 0; i < tokens.size(); i++) { String pos = posList.get(i).tag(); tokens.get(i).setTag(pos); } } catch (Exception e) { tagPOS(tokens); // At least gives you something. LOG.warn("POS Failed:\n" + tree.pennString()); } }