public static Parse[] parseLine(String line, Parser parser, Tokenizer tokenizer, int numParses) { // fix some parens patterns line = untokenizedParenPattern1.matcher(line).replaceAll("$1 $2"); line = untokenizedParenPattern2.matcher(line).replaceAll("$1 $2"); // tokenize List<String> tokens = Arrays.asList( tokenizer.tokenize(line)); String text = String.join(" ", tokens); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); int start = 0; int i = 0; for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) { String tok = ti.next(); p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i)); start += tok.length() + 1; } Parse[] parses; if (numParses == 1) { parses = new Parse[]{parser.parse(p)}; } else { parses = parser.parse(p, numParses); } return parses; }
mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start, start + token.length()), opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
subPart.insert(constituent); return;
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }
/** * Converts the parse from the tagger back. * * @param parseFromTagger * @return the final parse */ Parse transformParseFromTagger(Parse parseFromTagger) { int start = parseFromTagger.getSpan().getStart(); int end = parseFromTagger.getSpan().getEnd(); Parse transformedParse = new Parse(mSentence, new Span( mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(), parseFromTagger.getProb(), parseFromTagger.getHeadIndex()); Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren(); for (Parse child : parseFromTaggerChildrens) { transformedParse.insert(transformParseFromTagger(child)); } return transformedParse; } }
newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
Parse top = new Parse(currentChunks[ci].getText(), new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0); top.insert(currentChunks[ci]);
Parse newParse1 = (Parse) p.clone(); Parse newNode = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead()); newParse1.insert(newNode); newParse1.addProb(Math.log(bprob)); newParsesList.add(newParse1); newParse2.insert(newNode2); newParse2.addProb(Math.log(bprob)); newParsesList.add(newParse2);
p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type)); chunk.isChunk(true); newParses[si].insert(chunk);
Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd()); if (nameSpan.equals(commonParent.getSpan())) { commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { Parse[] kids = commonParent.getChildren(); commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { Parse[] grandKids = kids[0].getChildren(); if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag, 1.0, commonParent.getHeadIndex()));
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
static Parse createIncompleteParse(String[] tokens) { // produce text Span[] tokenSpans = new Span[tokens.length]; StringBuilder textBuilder = new StringBuilder(); for (int i = 0; i < tokens.length; i++) { if (textBuilder.length() > 0) { textBuilder.append(' '); } int startOffset = textBuilder.length(); textBuilder.append(tokens[i]); tokenSpans[i] = new Span(startOffset, textBuilder.length()); } String text = textBuilder.toString(); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); for (int i = 0; i < tokenSpans.length; i++) { Span tokenSpan = tokenSpans[i]; p.insert(new Parse(text, new Span(tokenSpan.getStart(), tokenSpan.getEnd()), AbstractBottomUpParser.TOK_NODE, 0, i)); } return p; }
static Parse createIncompleteParse(String tokens[]) { // produce text Span tokenSpans[] = new Span[tokens.length]; StringBuilder textBuilder = new StringBuilder(); for (int i = 0; i < tokens.length; i++) { if (textBuilder.length() > 0) { textBuilder.append(' '); } int startOffset = textBuilder.length(); textBuilder.append(tokens[i]); tokenSpans[i] = new Span(startOffset, textBuilder.length()); } String text = textBuilder.toString(); Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); for (int i = 0; i < tokenSpans.length; i++) { Span tokenSpan = tokenSpans[i]; p.insert(new Parse(text, new Span(tokenSpan.getStart(), tokenSpan.getEnd()), AbstractBottomUpParser.TOK_NODE, 0, i)); } return p; }
/** * @param sentenceOffset begin offest character index for sentence * @param text text of the sentence * @param terminalArray [token] terminals in the sentence * @return open nlp Parse */ public static Parse ctakesTokensToOpennlpTokens( final int sentenceOffset, final String text, final FSArray terminalArray ) { // based on the first part of parseLine in the opennlp libraries final Parse sentenceParse = new Parse( text, new Span( 0, text .length() ), AbstractBottomUpParser.INC_NODE, 0, 0 ); for ( int i = 0; i < terminalArray.size(); i++ ) { final TerminalTreebankNode token = (TerminalTreebankNode)terminalArray.get( i ); final Span span = new Span( token.getBegin() - sentenceOffset, token.getEnd() - sentenceOffset ); sentenceParse.insert( new Parse( text, span, AbstractBottomUpParser.TOK_NODE, 0, i ) ); } return sentenceParse; }
/** * Converts the parse from the tagger back. * * @param parseFromTagger * @return the final parse */ Parse transformParseFromTagger(Parse parseFromTagger) { int start = parseFromTagger.getSpan().getStart(); int end = parseFromTagger.getSpan().getEnd(); Parse transformedParse = new Parse(mSentence, new Span( mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(), parseFromTagger.getProb(), parseFromTagger.getHeadIndex()); Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren(); for (Parse child : parseFromTaggerChildrens) { transformedParse.insert(transformParseFromTagger(child)); } return transformedParse; } }
/** * @param sentenceOffset begin offest character index for sentence * @param text text of the sentence * @param terminalArray [token] terminals in the sentence * @return open nlp Parse */ public static Parse ctakesTokensToOpennlpTokens( final int sentenceOffset, final String text, final FSArray terminalArray ) { // based on the first part of parseLine in the opennlp libraries final Parse sentenceParse = new Parse( text, new Span( 0, text .length() ), AbstractBottomUpParser.INC_NODE, 0, 0 ); for ( int i = 0; i < terminalArray.size(); i++ ) { final TerminalTreebankNode token = (TerminalTreebankNode)terminalArray.get( i ); final Span span = new Span( token.getBegin() - sentenceOffset, token.getEnd() - sentenceOffset ); sentenceParse.insert( new Parse( text, span, AbstractBottomUpParser.TOK_NODE, 0, i ) ); } return sentenceParse; }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }