Refine search
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
/** * Obtain {@code Span}s for every parse in the sentence. * @param parse the parse from which to obtain the spans * @return an array containing every span for the parse */ private static Span[] getConstituencySpans(final Parse parse) { Stack<Parse> stack = new Stack<>(); if (parse.getChildCount() > 0) { for (Parse child : parse.getChildren()) { stack.push(child); } } List<Span> consts = new ArrayList<>(); while (!stack.isEmpty()) { Parse constSpan = stack.pop(); if (!constSpan.isPosTag()) { Span span = constSpan.getSpan(); consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType())); for (Parse child : constSpan.getChildren()) { stack.push(child); } } } return consts.toArray(new Span[consts.size()]); }
start = span.getStart(); if (!type.equals(AbstractBottomUpParser.TOK_NODE)) { sb.append("("); Parse c = i.next(); Span s = c.span; if (start < s.getStart()) { sb.append(encodeToken(text.substring(start, s.getStart()))); c.show(sb); start = s.getEnd(); if (start < span.getEnd()) { sb.append(encodeToken(text.substring(start, span.getEnd())));
private void codeTree(Parse p,int[] levels) { Parse[] kids = p.getChildren(); StringBuilder levelsBuff = new StringBuilder(); levelsBuff.append("["); int[] nlevels = new int[levels.length + 1]; for (int li = 0; li < levels.length; li++) { nlevels[li] = levels[li]; levelsBuff.append(levels[li]).append("."); } for (int ki = 0; ki < kids.length; ki++) { nlevels[levels.length] = ki; System.out.println(levelsBuff.toString() + ki + "] " + kids[ki].getType() + " " + kids[ki].hashCode() + " -> " + kids[ki].getParent().hashCode() + " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText()); codeTree(kids[ki],nlevels); } }
/** * Assigns parent references for the specified parse so that they * are consistent with the children references. * @param p The parse whose parent references need to be assigned. */ public static void setParents(Parse p) { Parse[] children = p.getChildren(); for (int ci = 0; ci < children.length; ci++) { children[ci].setParent(p); setParents(children[ci]); } }
Parse startToken = tokens[nameTokenSpan.getStart()]; Parse endToken = tokens[nameTokenSpan.getEnd() - 1]; Parse commonParent = startToken.getCommonParent(endToken); Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd()); if (nameSpan.equals(commonParent.getSpan())) { commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { Parse[] kids = commonParent.getChildren(); boolean crossingKids = false; for (Parse kid : kids) { if (nameSpan.crosses(kid.getSpan())) { crossingKids = true; commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { if (commonParent.getType().equals("NP")) { Parse[] grandKids = kids[0].getChildren(); if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag, 1.0, commonParent.getHeadIndex()));
/** * Converts the parse from the tagger back. * * @param parseFromTagger * @return the final parse */ Parse transformParseFromTagger(Parse parseFromTagger) { int start = parseFromTagger.getSpan().getStart(); int end = parseFromTagger.getSpan().getEnd(); Parse transformedParse = new Parse(mSentence, new Span( mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(), parseFromTagger.getProb(), parseFromTagger.getHeadIndex()); Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren(); for (Parse child : parseFromTaggerChildrens) { transformedParse.insert(transformParseFromTagger(child)); } return transformedParse; } }
Parse[] children = p.getChildren(); String[] words = new String[children.length]; String[] ptags = new String[words.length]; words[i] = sp.getHead().getCoveredText(); ptags[i] = sp.getType(); Sequence[] cs = chunker.topKSequences(words, ptags,minChunkScore - p.getProb()); Parse[] newParses = new Parse[cs.length]; for (int si = 0, sl = cs.length; si < sl; si++) { newParses[si] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[si].getDerivation().append(si).append("."); String[] tags = cs[si].getOutcomes().toArray(new String[words.length]); cs[si].getProbs(probs); newParses[si].addProb(Math.log(probs[j])); if (type != null) { Parse p1 = p.getChildren()[start]; Parse p2 = p.getChildren()[end]; cons[ci] = p.getChildren()[ci + start]; Parse chunk = new Parse(p1.getText(), new Span(p1.getSpan().getStart(), p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type)); chunk.isChunk(true); newParses[si].insert(chunk);
public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) { Parse lastChild = parts.get(parseIndex); Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(), node.getSpan().getEnd()),lastChild.getType(),1, rules.getHead(new Parse[]{lastChild,node},lastChild.getType())); adjNode.parts.add(lastChild); if (node.prevPunctSet != null) { adjNode.parts.addAll(node.prevPunctSet); } adjNode.parts.add(node); parts.set(parseIndex,adjNode); return adjNode; }
Parse[] originalChildren = p.getChildren(); Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (advanceNode.getLabel() == null) { break; else if (startTypeMap.containsKey(advanceNode.getLabel())) { lastStartType = startTypeMap.get(advanceNode.getLabel()); lastStartNode = advanceNode; lastStartIndex = advanceNodeIndex; Parse newParse1 = (Parse) p.clone(); //clone parse if (createDerivationString) newParse1.getDerivation().append(max).append("-"); newParse1.setChild(originalAdvanceIndex,tag); newParse1.addProb(Math.log(bprob)); collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex, advanceNodeIndex), cprobs); newParse2 = (Parse) newParse1.clone(); if (createDerivationString) newParse2.getDerivation().append(1).append("."); newParse2.addProb(Math.log(cprobs[completeIndex])); Parse[] cons = new Parse[advanceNodeIndex - lastStartIndex + 1]; boolean flat = true; flat &= cons[0].isPosTag();
public void add(Parse daughter, HeadRules rules) { if (daughter.prevPunctSet != null) { parts.addAll(daughter.prevPunctSet); } parts.add(daughter); this.span = new Span(span.getStart(),daughter.getSpan().getEnd()); this.head = rules.getHead(getChildren(),type); this.headIndex = head.headIndex; }
public ArrayList<String> getVerbPhrases(Parse p) { ArrayList<String> verbPhrases = new ArrayList<String>(); Parse[] subparses = p.getChildren(); for (int pi = 0; pi < subparses.length; pi++) { if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi])) { Span _span = subparses[pi].getSpan(); verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd())); } else if (!((Parse) subparses[pi]).isPosTag()) verbPhrases.addAll(getNounPhrases(subparses[pi])); } return verbPhrases; }
String tokenString = tokens[i].getCoveredText(sentence).toString(); String escapedToken = escape(tokenString); tokenList[i] = escapedToken; int start = tokens[i].getStart(); mIndexMap.put(escapedStart, start); int end = tokens[i].getEnd(); mIndexMap.put(escapedEnd, end); mParseForTagger = new Parse(tokenizedSentence, new Span(0, tokenizedSentence.length()), "INC", 1, null); mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start, start + token.length()), opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) { Parse[] parseChildren = parse.getChildren(); AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length]; // do this for all children for (int i = 0; i < parseChildren.length; i++) { parseChildAnnotations[i] = createAnnotation(cas, offset, parseChildren[i]); } AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset + parse.getSpan().getStart(), offset + parse.getSpan().getEnd()); parseAnnotation.setStringValue(mTypeFeature, parse.getType()); if (probabilityFeature != null) { parseAnnotation.setDoubleValue(probabilityFeature, parse.getProb()); } ArrayFS childrenArray = cas.createArrayFS(parseChildAnnotations.length); childrenArray.copyFromArray(parseChildAnnotations, 0, 0, parseChildAnnotations.length); parseAnnotation.setFeatureValue(childrenFeature, childrenArray); cas.getIndexRepository().addFS(parseAnnotation); return parseAnnotation; }
currentChunks[ci] = (Parse) chunks[ci].clone(); currentChunks[ci].setPrevPunctuation(chunks[ci].getPreviousPunctuationSet()); currentChunks[ci].setNextPunctuation(chunks[ci].getNextPunctuationSet()); currentChunks[ci].setLabel(Parser.COMPLETE); chunks[ci].setLabel(Parser.COMPLETE); Parse parent = chunks[ci].getParent(); Parse prevParent = chunks[ci]; int off = 0; if (!chunks[ci].isPosTag()) { builtNodes.add(off++,chunks[ci]); while (!parent.getType().equals(AbstractBottomUpParser.TOP_NODE) && parent.getLabel() == null) { if (parent.getLabel() == null && !prevParent.getType().equals(parent.getType())) { if (debug) System.err.println("Build: " + parent.getType() + " for: " + currentChunks[ci]); if (etype == ParserEventTypeEnum.BUILD) { parseEvents.add(new Event(parent.getType(), buildContextGenerator.getContext(currentChunks, ci))); Parse newParent = new Parse(currentChunks[ci].getText(), currentChunks[ci].getSpan(),parent.getType(),1,0); newParent.add(currentChunks[ci],rules); newParent.setPrevPunctuation(currentChunks[ci].getPreviousPunctuationSet()); newParent.setNextPunctuation(currentChunks[ci].getNextPunctuationSet()); currentChunks[ci].setParent(newParent); currentChunks[ci] = newParent; newParent.setLabel(Parser.BUILT);
if (c == '(') { String rest = parse.substring(ci + 1); String type = getType(rest); if (type == null) { System.err.println("null type for: " + rest); String token = getToken(rest); stack.push(new Constituent(type, new Span(offset,offset))); if (token != null) { if (Objects.equals(type, "-NONE-") && gl != null) { new Span(offset, offset + token.length()))); text.append(token).append(" "); offset += token.length() + 1; Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0); for (int ci = 0; ci < cons.size(); ci++) { Constituent con = cons.get(ci); tokenIndex++; Parse c = new Parse(txt, con.getSpan(), type, 1,tokenIndex); p.insert(c);
Parse[] originalChildren = p.getChildren(); Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; if (children[0].isPosTag()) { return null; p.expandTopNode(children[0]); return new Parse[] { p }; double doneProb = bprobs[doneIndex]; if (debugOn) System.out.println("adi=" + advanceNodeIndex + " " + advanceNode.getType() + "." + advanceNode.getLabel() + " " + advanceNode + " choose build=" + (1 - doneProb) + " attach=" + doneProb); String tag = buildModel.getOutcome(max); if (!tag.equals(DONE)) { Parse newParse1 = (Parse) p.clone(); Parse newNode = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead()); newParse1.insert(newNode); newParse1.addProb(Math.log(bprob)); newParsesList.add(newParse1); if (checkComplete) { if (cprobs[completeIndex] > probMass) { //just complete advances setComplete(newNode); newParse1.addProb(Math.log(cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing complete node");
new Span(offset, offset + token.length()))); int start = unfinishedCon.getSpan().getStart(); if (start < offset) { cons.add(new Constituent(unfinishedCon.getLabel(), new Span(start, offset - 1))); Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0); for (int ci = 0; ci < cons.size(); ci++) { Constituent con = cons.get(ci); tokenIndex++; Parse c = new Parse(txt, con.getSpan(), type, 1,tokenIndex); p.insert(c);
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }