/** * Verify that training and tagging does not cause * runtime problems. */ @Test public void testTreeInsertParserTraining() throws Exception { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0); opennlp.tools.parser.Parser parser = ParserFactory.create(model); // Tests parsing to make sure the code does not has // a bug which fails always with a runtime exception parser.parse(Parse.parseParse("She was just another freighter from the " + "States and she seemed as commonplace as her name .")); // Test serializing and de-serializing model ByteArrayOutputStream outArray = new ByteArrayOutputStream(); model.serialize(outArray); outArray.close(); new ParserModel(new ByteArrayInputStream(outArray.toByteArray())); // TODO: compare both models } }
public static Parser create(ParserModel model, int beamSize, double advancePercentage) { if (ParserType.CHUNKING.equals(model.getParserType())) { return new opennlp.tools.parser.chunking.Parser(model, beamSize, advancePercentage); } else if (ParserType.TREEINSERT.equals(model.getParserType())) { return new opennlp.tools.parser.treeinsert.Parser(model, beamSize, advancePercentage); } else { throw new IllegalStateException("Unexpected ParserType: " + model.getParserType().name()); } }
Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (numNodes == 0) { if (!isBuilt(advanceNode)) { break; int originalZeroIndex = mapParseIndex(0,children,originalChildren); int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren); List<Parse> newParsesList = new ArrayList<>(); if (debugOn) System.out.println("building " + tag + " " + bprob + " c=" + cprobs[completeIndex]); if (cprobs[completeIndex] > probMass) { //just complete advances setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing complete node"); setIncomplete(newNode); newParse1.addProb(Math.log(1 - cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing incomplete node"); setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); newParsesList.add(newParse2); newParse2.addProb(Math.log(1 - cprobs[completeIndex])); setIncomplete(newNode2); //set incomplete for non-clone if (isComplete(advanceNode)) {
Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); Parse pstart = children[0]; Parse pend = children[children.length - 1]; rf = Parser.getRightFrontier(constituents[0], punctSet); if (trimFrontier) { int pi = rf.indexOf(parent);
private int indexOf(Parse child, Parse parent) { Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet); for (int ki = 0; ki < kids.length; ki++) { if (child == kids[ki]) { return ki; } } return -1; }
private void setIncomplete(Parse p) { if (!isBuilt(p)) { p.setLabel(Parser.INCOMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } }
rf = Parser.getRightFrontier(constituents[0], emptyPunctSet);
@Override protected Parse[] advanceChunks(Parse p, double minChunkScore) { Parse[] parses = super.advanceChunks(p, minChunkScore); for (Parse parse : parses) { Parse[] chunks = parse.getChildren(); for (int ci = 0; ci < chunks.length; ci++) { setComplete(chunks[ci]); } } return parses; }
private void setBuilt(Parse p) { String l = p.getLabel(); if (l == null) { p.setLabel(Parser.BUILT); } else { if (isComplete(p)) { p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } } }
Dictionary mdict = buildDictionary(parseSamples, rules, mlParams);
Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); Parse pstart = children[0]; Parse pend = children[children.length - 1]; rf = Parser.getRightFrontier(constituents[0], punctSet); if (trimFrontier) { int pi = rf.indexOf(parent);
private int nonPunctChildCount(Parse node) { return Parser.collapsePunctuation(node.getChildren(),punctSet).length; } /*
private void setComplete(Parse p) { String l = p.getLabel(); if (!isBuilt(p)) { p.setLabel(Parser.COMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } }
List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet); if (currentRightFrontier.size() != rightFrontier.size()) { System.err.println("fontiers mis-aligned: " + currentRightFrontier.size() + " != "
@Override protected Parse[] advanceChunks(Parse p, double minChunkScore) { Parse[] parses = super.advanceChunks(p, minChunkScore); for (Parse parse : parses) { Parse[] chunks = parse.getChildren(); for (int ci = 0; ci < chunks.length; ci++) { setComplete(chunks[ci]); } } return parses; }
private void setBuilt(Parse p) { String l = p.getLabel(); if (l == null) { p.setLabel(Parser.BUILT); } else { if (isComplete(p)) { p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } else { p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } } }
Dictionary mdict = buildDictionary(parseSamples, rules, mlParams);
Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (numNodes == 0) { if (!isBuilt(advanceNode)) { break; int originalZeroIndex = mapParseIndex(0,children,originalChildren); int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren); List<Parse> newParsesList = new ArrayList<>(); if (debugOn) System.out.println("building " + tag + " " + bprob + " c=" + cprobs[completeIndex]); if (cprobs[completeIndex] > probMass) { //just complete advances setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing complete node"); setIncomplete(newNode); newParse1.addProb(Math.log(1 - cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing incomplete node"); setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); newParsesList.add(newParse2); newParse2.addProb(Math.log(1 - cprobs[completeIndex])); setIncomplete(newNode2); //set incomplete for non-clone if (isComplete(advanceNode)) {
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException { CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next(); ParserModel model; if (ParserType.CHUNKING.equals(parserType)) { model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params); } else if (ParserType.TREEINSERT.equals(parserType)) { model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params); } else { throw new IllegalStateException("Unexpected parser type: " + parserType); } ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); Parse pstart = children[0]; Parse pend = children[children.length - 1]; rf = Parser.getRightFrontier(constituents[0], punctSet); if (trimFrontier) { int pi = rf.indexOf(parent);