public static Parse[] parseLine(String line, Parser parser, Tokenizer tokenizer, int numParses) { // fix some parens patterns line = untokenizedParenPattern1.matcher(line).replaceAll("$1 $2"); line = untokenizedParenPattern2.matcher(line).replaceAll("$1 $2"); // tokenize List<String> tokens = Arrays.asList( tokenizer.tokenize(line)); String text = String.join(" ", tokens); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); int start = 0; int i = 0; for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) { String tok = ti.next(); p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i)); start += tok.length() + 1; } Parse[] parses; if (numParses == 1) { parses = new Parse[]{parser.parse(p)}; } else { parses = parser.parse(p, numParses); } return parses; }
protected void process(CAS cas, AnnotationFS sentenceAnnotation) { FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType); ContainingConstraint containingConstraint = new ContainingConstraint(sentenceAnnotation); String sentence = sentenceAnnotation.getCoveredText(); Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator( allTokens.iterator(), containingConstraint); List<Span> tokenSpans = new LinkedList<>(); while (containingTokens.hasNext()) { AnnotationFS token = containingTokens.next(); tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(), token.getEnd() - sentenceAnnotation.getBegin())); } ParseConverter converter = new ParseConverter(sentence, tokenSpans.toArray(new Span[tokenSpans.size()])); Parse unparsedTree = converter.getParseForTagger(); if (unparsedTree.getChildCount() > 0) { Parse parse = mParser.parse(unparsedTree); // TODO: We need a strategy to handle the case that a full // parse could not be found. What to do in this case? parse = converter.transformParseFromTagger(parse); if (mLogger.isLoggable(Level.INFO)) { StringBuffer parseString = new StringBuffer(); parse.show(parseString); mLogger.log(Level.INFO, parseString.toString()); } createAnnotation(cas, sentenceAnnotation.getBegin(), parse); } }
/** * Verify that training and tagging does not cause * runtime problems. */ @Test public void testTreeInsertParserTraining() throws Exception { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0); opennlp.tools.parser.Parser parser = ParserFactory.create(model); // Tests parsing to make sure the code does not has // a bug which fails always with a runtime exception parser.parse(Parse.parseParse("She was just another freighter from the " + "States and she seemed as commonplace as her name .")); // Test serializing and de-serializing model ByteArrayOutputStream outArray = new ByteArrayOutputStream(); model.serialize(outArray); outArray.close(); new ParserModel(new ByteArrayInputStream(outArray.toByteArray())); // TODO: compare both models } }
public static Parse[] parseLine(String line, Parser parser, Tokenizer tokenizer, int numParses) { // fix some parens patterns line = untokenizedParenPattern1.matcher(line).replaceAll("$1 $2"); line = untokenizedParenPattern2.matcher(line).replaceAll("$1 $2"); // tokenize List<String> tokens = Arrays.asList( tokenizer.tokenize(line)); String text = String.join(" ", tokens); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); int start = 0; int i = 0; for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) { String tok = ti.next(); p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i)); start += tok.length() + 1; } Parse[] parses; if (numParses == 1) { parses = new Parse[]{parser.parse(p)}; } else { parses = parser.parse(p, numParses); } return parses; }
public static Parse[] parseLine(String line, Parser parser, Tokenizer tokenizer, int numParses) { // fix some parens patterns line = untokenizedParenPattern1.matcher(line).replaceAll("$1 $2"); line = untokenizedParenPattern2.matcher(line).replaceAll("$1 $2"); // tokenize List<String> tokens = Arrays.asList( tokenizer.tokenize(line)); String text = String.join(" ", tokens); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); int start = 0; int i = 0; for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) { String tok = ti.next(); p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i)); start += tok.length() + 1; } Parse[] parses; if (numParses == 1) { parses = new Parse[]{parser.parse(p)}; } else { parses = parser.parse(p, numParses); } return parses; }
Parse p = parser.parse(incompleteParse);
Parse p = parser.parse(incompleteParse);
parse = parser.parse(parse); annotate(parse, aJCas, null); } catch (Exception e) {
protected void process(CAS cas, AnnotationFS sentenceAnnotation) { FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType); ContainingConstraint containingConstraint = new ContainingConstraint(sentenceAnnotation); String sentence = sentenceAnnotation.getCoveredText(); Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator( allTokens.iterator(), containingConstraint); List<Span> tokenSpans = new LinkedList<>(); while (containingTokens.hasNext()) { AnnotationFS token = containingTokens.next(); tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(), token.getEnd() - sentenceAnnotation.getBegin())); } ParseConverter converter = new ParseConverter(sentence, tokenSpans.toArray(new Span[tokenSpans.size()])); Parse unparsedTree = converter.getParseForTagger(); if (unparsedTree.getChildCount() > 0) { Parse parse = mParser.parse(unparsedTree); // TODO: We need a strategy to handle the case that a full // parse could not be found. What to do in this case? parse = converter.transformParseFromTagger(parse); if (mLogger.isLoggable(Level.INFO)) { StringBuffer parseString = new StringBuffer(); parse.show(parseString); mLogger.log(Level.INFO, parseString.toString()); } createAnnotation(cas, sentenceAnnotation.getBegin(), parse); } }
Parse parseOutput = modelProvider.getResource().parse(parseInput);