/** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The tree is printed to {@code System.out}. The basic deviation * from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. */ public void pennPrint() { pennPrint(System.out); }
/** * Calls {@code pennPrint()} and saves output to a String * * @return The indent S-expression representation of a Tree */ public String pennString() { StringWriter sw = new StringWriter(); pennPrint(new PrintWriter(sw)); return sw.toString(); }
/** Divide a Treebank into 3, by taking every 9th sentence for the dev * set and every 10th for the test set. Penn people do this. */ public void decimate(Writer trainW, Writer devW, Writer testW) { PrintWriter trainPW = new PrintWriter(trainW, true); PrintWriter devPW = new PrintWriter(devW, true); PrintWriter testPW = new PrintWriter(testW, true); int i = 0; for (Tree t : this) { if (i == 8) { t.pennPrint(devPW); } else if (i == 9) { t.pennPrint(testPW); } else { t.pennPrint(trainPW); } i = (i+1) % 10; } }
public void pennPrint(PrintStream ps, Function<Label,String> labelFormatter) { pennPrint(new PrintWriter(new OutputStreamWriter(ps), true), labelFormatter); }
public static void printTrainTree(PrintWriter pw, String message, Tree t) { PrintWriter myPW; if (pw == null) { myPW = new PrintWriter(System.out, true); } else { myPW = pw; } if (message != null && pw == null) { // hard coded to not print message if using file output! myPW.println(message); } // TODO FIXME: wtf is this shit boolean previousState = CategoryWordTag.printWordTag; CategoryWordTag.printWordTag = false; t.pennPrint(myPW); CategoryWordTag.printWordTag = previousState; }
/** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The basic deviation from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. * * @param ps The tree is printed to this {@code PrintStream} */ public void pennPrint(PrintStream ps) { pennPrint(new PrintWriter(new OutputStreamWriter(ps), true)); }
private void write(Tree t, PrintWriter pw) { if(taggedOutput) pw.println(ATBTreeUtils.taggedStringFromTree(t, removeEscapeTokens, wordTagDelim)); else t.pennPrint(pw); }
private static void testTransAndUntrans(CharacterLevelTagExtender e, Treebank tb, PrintWriter pw) { for (Tree tree : tb) { Tree oldTree = tree.treeSkeletonCopy(); e.transformTree(tree); e.untransformTree(tree); if (!tree.equals(oldTree)) { pw.println("NOT EQUAL AFTER UNTRANSFORMATION!!!"); pw.println(); oldTree.pennPrint(pw); pw.println(); tree.pennPrint(pw); pw.println("------------------"); } } }
public static void main(String[] args) { CategoryWordTag.printWordTag = false; String path = args[0]; List<Tree> trees = getTrees(path, 200, 219, 0, 10); trees.iterator().next().pennPrint(); Options op = new Options(); List<Tree> annotatedTrees = TreebankAnnotator.removeDependencyRoots(new TreebankAnnotator(op, path).annotateTrees(trees)); annotatedTrees.iterator().next().pennPrint(); }
/** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The basic deviation from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. * * @param pw The tree is printed to this {@code PrintWriter} */ public void pennPrint(PrintWriter pw) { pennPrint(pw, label -> (label.value() == null) ? "": label.value()); }
public static void main(String[] args) { CategoryWordTag.printWordTag = false; String path = args[0]; List<Tree> trees = TreebankAnnotator.getTrees(path, 200, 219, 0, 10); List<Tree> annotatedTrees = new TreebankAnnotator(new Options(), path).annotateTrees(trees); for (Tree tree : annotatedTrees) { System.out.println("ORIGINAL:\n"); tree.pennPrint(); System.out.println("CNFed:\n"); Tree cnfTree = new ToCNFTransformer().transformTree(tree); cnfTree.pennPrint(); System.out.println("UnCNFed:\n"); Tree unCNFTree = new FromCNFTransformer().transformTree(cnfTree); unCNFTree.pennPrint(); System.out.println("\n\n"); } }
private static void extractSubtrees(List<String> codeStrings, String treeFile) { List<Pair<Integer,Integer>> codes = new ArrayList<>(); for(String s : codeStrings) { Matcher m = codePattern.matcher(s); if(m.matches()) codes.add(new Pair<>(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)))); else throw new RuntimeException("Error: illegal node code " + s); } TreeReaderFactory trf = new TRegexTreeReaderFactory(); MemoryTreebank treebank = new MemoryTreebank(trf); treebank.loadPath(treeFile,null, true); for (Pair<Integer,Integer> code : codes) { Tree t = treebank.get(code.first()-1); t.getNodeNumber(code.second()).pennPrint(); } }
public static void main(String[] args) { TreebankLangParserParams tlpp = new EnglishTreebankParserParams(); Treebank tb = tlpp.memoryTreebank(); tb.loadPath(args[0]); for (Tree t : tb) { t.pennPrint(); } }
public static void main(String[] args) { // simple testing code Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new NoPunctuationHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * * @param args File to run on */ public static void main(String[] args) { if(args.length < 1) { System.out.printf("Usage: java %s tree_file%n", NegraPennTreeReaderFactory.class.getName()); return; } TreebankLanguagePack tlp = new NegraPennLanguagePack(); TreeReaderFactory trf = new NegraPennTreeReaderFactory(2,false,false,tlp); try { TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding())); for (Tree t; (t = tr.readTree()) != null; ) { t.pennPrint(); } tr.close(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
/** * Go through trees and determine their heads and print them. * Just for debuggin'. <br> * Usage: <code> * java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new CollinsHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.DybroFrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new DybroFrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new FrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * Go through trees and determine their heads and print them. * Just for debugging. <br> * Usage: <code> * java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath * </code> * * @param args The treebankFilePath */ public static void main(String[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.loadPath(args[0]); final HeadFinder chf = new AbishekFrenchHeadFinder(); treebank.apply(pt -> { pt.percolateHeads(chf); pt.pennPrint(); System.out.println(); }); }
/** * For testing: loads a treebank and prints the trees. */ public static void main(String[] args) { TreebankLangParserParams tlpp = new ChineseTreebankParserParams(); System.out.println("Default encoding is: " + tlpp.diskTreebank().encoding()); if (args.length < 2) { printlnErr("Usage: edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams treesPath fileRange"); } else { Treebank m = tlpp.diskTreebank(); m.loadPath(args[0], new NumberRangesFileFilter(args[1], false)); for (Tree t : m ) { t.pennPrint(tlpp.pw()); } System.out.println("There were " + m.size() + " trees."); } }