opennlp.tools.parser.Parse java code examples

Refine search

Span

public static void fixPossesives(Parse parse) {
 Parse[] tags = parse.getTagNodes();
 for (int ti = 0; ti < tags.length; ti++) {
  if (tags[ti].getType().equals("POS")) {
   if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) {
    int start = tags[ti + 1].getSpan().getStart();
    int end = tags[ti + 1].getSpan().getEnd();
    for (int npi = ti + 2; npi < tags.length; npi++) {
     if (tags[npi].getParent() == tags[npi - 1].getParent()) {
      end = tags[npi].getSpan().getEnd();
     }
     else {
      break;
     }
    }
    Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]);
    parse.insert(npPos);
   }
  }
 }
}

/**
 * Obtain {@code Span}s for every parse in the sentence.
 * @param parse the parse from which to obtain the spans
 * @return an array containing every span for the parse
 */
private static Span[] getConstituencySpans(final Parse parse) {
 Stack<Parse> stack = new Stack<>();
 if (parse.getChildCount() > 0) {
  for (Parse child : parse.getChildren()) {
   stack.push(child);
  }
 }
 List<Span> consts = new ArrayList<>();
 while (!stack.isEmpty()) {
  Parse constSpan = stack.pop();
  if (!constSpan.isPosTag()) {
   Span span = constSpan.getSpan();
   consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType()));
   for (Parse child : constSpan.getChildren()) {
    stack.push(child);
   }
  }
 }
 return consts.toArray(new Span[consts.size()]);
}

start = span.getStart();
if (!type.equals(AbstractBottomUpParser.TOK_NODE)) {
 sb.append("(");
 Parse c = i.next();
 Span s = c.span;
 if (start < s.getStart()) {
  sb.append(encodeToken(text.substring(start, s.getStart())));
 c.show(sb);
 start = s.getEnd();
if (start < span.getEnd()) {
 sb.append(encodeToken(text.substring(start, span.getEnd())));

private void codeTree(Parse p,int[] levels) {
 Parse[] kids = p.getChildren();
 StringBuilder levelsBuff = new StringBuilder();
 levelsBuff.append("[");
 int[] nlevels = new int[levels.length + 1];
 for (int li = 0; li < levels.length; li++) {
  nlevels[li] = levels[li];
  levelsBuff.append(levels[li]).append(".");
 }
 for (int ki = 0; ki < kids.length; ki++) {
  nlevels[levels.length] = ki;
  System.out.println(levelsBuff.toString() + ki + "] " + kids[ki].getType() +
    " " + kids[ki].hashCode() + " -> " + kids[ki].getParent().hashCode() +
    " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText());
  codeTree(kids[ki],nlevels);
 }
}

/**
 * Assigns parent references for the specified parse so that they
 * are consistent with the children references.
 * @param p The parse whose parent references need to be assigned.
 */
public static void setParents(Parse p) {
 Parse[] children = p.getChildren();
 for (int ci = 0; ci < children.length; ci++) {
  children[ci].setParent(p);
  setParents(children[ci]);
 }
}

Parse startToken = tokens[nameTokenSpan.getStart()];
Parse endToken = tokens[nameTokenSpan.getEnd() - 1];
Parse commonParent = startToken.getCommonParent(endToken);
 Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
 if (nameSpan.equals(commonParent.getSpan())) {
  commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex()));
 } else {
  Parse[] kids = commonParent.getChildren();
  boolean crossingKids = false;
  for (Parse kid : kids) {
   if (nameSpan.crosses(kid.getSpan())) {
    crossingKids = true;
   commonParent.insert(new Parse(commonParent.getText(), nameSpan,
     tag, 1.0, endToken.getHeadIndex()));
  } else {
   if (commonParent.getType().equals("NP")) {
    Parse[] grandKids = kids[0].getChildren();
    if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) {
     commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(),
       tag, 1.0, commonParent.getHeadIndex()));

 /**
  * Converts the parse from the tagger back.
  *
  * @param parseFromTagger
  * @return the final parse
  */
 Parse transformParseFromTagger(Parse parseFromTagger) {
  int start = parseFromTagger.getSpan().getStart();
  int end = parseFromTagger.getSpan().getEnd();
  Parse transformedParse = new Parse(mSentence, new Span(
    mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(),
    parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
  Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
  for (Parse child : parseFromTaggerChildrens) {
   transformedParse.insert(transformParseFromTagger(child));
  }
  return transformedParse;
 }
}

Parse[] children = p.getChildren();
String[] words = new String[children.length];
String[] ptags = new String[words.length];
 words[i] = sp.getHead().getCoveredText();
 ptags[i] = sp.getType();
Sequence[] cs = chunker.topKSequences(words, ptags,minChunkScore - p.getProb());
Parse[] newParses = new Parse[cs.length];
for (int si = 0, sl = cs.length; si < sl; si++) {
 newParses[si] = (Parse) p.clone(); //copies top level
 if (createDerivationString) newParses[si].getDerivation().append(si).append(".");
 String[] tags = cs[si].getOutcomes().toArray(new String[words.length]);
 cs[si].getProbs(probs);
   newParses[si].addProb(Math.log(probs[j]));
   if (type != null) {
    Parse p1 = p.getChildren()[start];
    Parse p2 = p.getChildren()[end];
      cons[ci] = p.getChildren()[ci + start];
    Parse chunk = new Parse(p1.getText(), new Span(p1.getSpan().getStart(),
      p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type));
    chunk.isChunk(true);
    newParses[si].insert(chunk);

public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) {
 Parse lastChild = parts.get(parseIndex);
 Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),
   node.getSpan().getEnd()),lastChild.getType(),1,
   rules.getHead(new Parse[]{lastChild,node},lastChild.getType()));
 adjNode.parts.add(lastChild);
 if (node.prevPunctSet != null) {
  adjNode.parts.addAll(node.prevPunctSet);
 }
 adjNode.parts.add(node);
 parts.set(parseIndex,adjNode);
 return adjNode;
}

Parse[] originalChildren = p.getChildren();
Parse[] children = collapsePunctuation(originalChildren,punctSet);
int numNodes = children.length;
 if (advanceNode.getLabel() == null) {
  break;
 else if (startTypeMap.containsKey(advanceNode.getLabel())) {
  lastStartType = startTypeMap.get(advanceNode.getLabel());
  lastStartNode = advanceNode;
  lastStartIndex = advanceNodeIndex;
 Parse newParse1 = (Parse) p.clone(); //clone parse
 if (createDerivationString) newParse1.getDerivation().append(max).append("-");
 newParse1.setChild(originalAdvanceIndex,tag);
 newParse1.addProb(Math.log(bprob));
   collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex,
   advanceNodeIndex), cprobs);
  newParse2 = (Parse) newParse1.clone();
  if (createDerivationString) newParse2.getDerivation().append(1).append(".");
  newParse2.addProb(Math.log(cprobs[completeIndex]));
  Parse[] cons = new Parse[advanceNodeIndex - lastStartIndex + 1];
  boolean flat = true;
  flat &= cons[0].isPosTag();

public void add(Parse daughter, HeadRules rules) {
 if (daughter.prevPunctSet != null) {
  parts.addAll(daughter.prevPunctSet);
 }
 parts.add(daughter);
 this.span = new Span(span.getStart(),daughter.getSpan().getEnd());
 this.head = rules.getHead(getChildren(),type);
 this.headIndex = head.headIndex;
}

public ArrayList<String> getVerbPhrases(Parse p)
{
  ArrayList<String> verbPhrases = new ArrayList<String>();
  Parse[] subparses = p.getChildren();
  for (int pi = 0; pi < subparses.length; pi++)
  {
    if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))
    {
      Span _span = subparses[pi].getSpan();
      verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
    }
    else if (!((Parse) subparses[pi]).isPosTag())
      verbPhrases.addAll(getNounPhrases(subparses[pi]));
  }
  return verbPhrases;
}

 String tokenString = tokens[i].getCoveredText(sentence).toString();
 String escapedToken = escape(tokenString);
 tokenList[i] = escapedToken;
 int start = tokens[i].getStart();
 mIndexMap.put(escapedStart, start);
 int end = tokens[i].getEnd();
 mIndexMap.put(escapedEnd, end);
mParseForTagger = new Parse(tokenizedSentence,
  new Span(0, tokenizedSentence.length()), "INC", 1, null);
 mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
   start + token.length()),
   opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));

protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) {
 Parse[] parseChildren = parse.getChildren();
 AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length];
 // do this for all children
 for (int i = 0; i < parseChildren.length; i++) {
  parseChildAnnotations[i] = createAnnotation(cas, offset, parseChildren[i]);
 }
 AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
   parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
 parseAnnotation.setStringValue(mTypeFeature, parse.getType());
 if (probabilityFeature != null) {
  parseAnnotation.setDoubleValue(probabilityFeature, parse.getProb());
 }
 ArrayFS childrenArray = cas.createArrayFS(parseChildAnnotations.length);
 childrenArray.copyFromArray(parseChildAnnotations, 0, 0, parseChildAnnotations.length);
 parseAnnotation.setFeatureValue(childrenFeature, childrenArray);
 cas.getIndexRepository().addFS(parseAnnotation);
 return parseAnnotation;
}

if (span.contains(ic)) {
  if (sp.getStart() >= ic.getEnd()) {
   break;
   pi--;
   constituent.parts.add(subPart);
   subPart.setParent(constituent);
  else if (sp.contains(ic)) {
   subPart.insert(constituent);
   return;
 constituent.setParent(this);

currentChunks[ci] = (Parse) chunks[ci].clone();
currentChunks[ci].setPrevPunctuation(chunks[ci].getPreviousPunctuationSet());
currentChunks[ci].setNextPunctuation(chunks[ci].getNextPunctuationSet());
currentChunks[ci].setLabel(Parser.COMPLETE);
chunks[ci].setLabel(Parser.COMPLETE);
Parse parent = chunks[ci].getParent();
Parse prevParent = chunks[ci];
int off = 0;
if (!chunks[ci].isPosTag()) {
 builtNodes.add(off++,chunks[ci]);
while (!parent.getType().equals(AbstractBottomUpParser.TOP_NODE) && parent.getLabel() == null) {
 if (parent.getLabel() == null && !prevParent.getType().equals(parent.getType())) {
  if (debug) System.err.println("Build: " + parent.getType() + " for: " + currentChunks[ci]);
  if (etype == ParserEventTypeEnum.BUILD) {
   parseEvents.add(new Event(parent.getType(),
     buildContextGenerator.getContext(currentChunks, ci)));
  Parse newParent = new Parse(currentChunks[ci].getText(),
    currentChunks[ci].getSpan(),parent.getType(),1,0);
  newParent.add(currentChunks[ci],rules);
  newParent.setPrevPunctuation(currentChunks[ci].getPreviousPunctuationSet());
  newParent.setNextPunctuation(currentChunks[ci].getNextPunctuationSet());
  currentChunks[ci].setParent(newParent);
  currentChunks[ci] = newParent;
  newParent.setLabel(Parser.BUILT);

 if (c == '(') {
  String rest = parse.substring(ci + 1);
  String type = getType(rest);
  if (type == null) {
   System.err.println("null type for: " + rest);
  String token = getToken(rest);
  stack.push(new Constituent(type, new Span(offset,offset)));
  if (token != null) {
   if (Objects.equals(type, "-NONE-") && gl != null) {
      new Span(offset, offset + token.length())));
    text.append(token).append(" ");
    offset += token.length() + 1;
Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0);
for (int ci = 0; ci < cons.size(); ci++) {
 Constituent con = cons.get(ci);
   tokenIndex++;
  Parse c = new Parse(txt, con.getSpan(), type, 1,tokenIndex);
  p.insert(c);

Parse[] originalChildren = p.getChildren();
Parse[] children = collapsePunctuation(originalChildren,punctSet);
int numNodes = children.length;
 if (children[0].isPosTag()) {
  return null;
  p.expandTopNode(children[0]);
  return new Parse[] { p };
double doneProb = bprobs[doneIndex];
if (debugOn)
 System.out.println("adi=" + advanceNodeIndex + " " + advanceNode.getType() + "."
   + advanceNode.getLabel() + " " + advanceNode + " choose build=" + (1 - doneProb)
   + " attach=" + doneProb);
  String tag = buildModel.getOutcome(max);
  if (!tag.equals(DONE)) {
   Parse newParse1 = (Parse) p.clone();
   Parse newNode = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead());
   newParse1.insert(newNode);
   newParse1.addProb(Math.log(bprob));
   newParsesList.add(newParse1);
   if (checkComplete) {
    if (cprobs[completeIndex] > probMass) { //just complete advances
     setComplete(newNode);
     newParse1.addProb(Math.log(cprobs[completeIndex]));
     if (debugOn) System.out.println("Only advancing complete node");

   new Span(offset, offset + token.length())));
int start = unfinishedCon.getSpan().getStart();
if (start < offset) {
 cons.add(new Constituent(unfinishedCon.getLabel(), new Span(start, offset - 1)));
Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0);
for (int ci = 0; ci < cons.size(); ci++) {
 Constituent con = cons.get(ci);
   tokenIndex++;
  Parse c = new Parse(txt, con.getSpan(), type, 1,tokenIndex);
  p.insert(c);

/**
 * Advances the parse by assigning it POS tags and returns multiple tag sequences.
 * @param p The parse to be tagged.
 * @return Parses with different POS-tag sequence assignments.
 */
protected Parse[] advanceTags(final Parse p) {
 Parse[] children = p.getChildren();
 String[] words = new String[children.length];
 double[] probs = new double[words.length];
 for (int i = 0,il = children.length; i < il; i++) {
  words[i] = children[i].getCoveredText();
 }
 Sequence[] ts = tagger.topKSequences(words);
 Parse[] newParses = new Parse[ts.length];
 for (int i = 0; i < ts.length; i++) {
  String[] tags = ts[i].getOutcomes().toArray(new String[words.length]);
  ts[i].getProbs(probs);
  newParses[i] = (Parse) p.clone(); //copies top level
  if (createDerivationString) newParses[i].getDerivation().append(i).append(".");
  for (int j = 0; j < words.length; j++) {
   Parse word = children[j];
   //System.err.println("inserting tag "+tags[j]);
   double prob = probs[j];
   newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j));
   newParses[i].addProb(Math.log(prob));
  }
 }
 return newParses;
}

Javadoc

Data structure for holding parse constituents.

Most used methods

getType
getChildren
Returns the child constituents of this constituent .
<init>
Creates a new parse node for this specified text and span of the specified type with the specified p
getSpan
Returns the character offsets for this constituent.
insert
Inserts the specified constituent into this parse based on its text span.This method assumes that th
show
Appends the specified string buffer with a string representation of this parse.
isPosTag
Indicates whether this parse node is a pos-tag.
getTagNodes
Returns the parse nodes which are children of this node and which are pos tags.
getText
Returns the text of the sentence over which this parse was formed.
getChildCount
Returns the number of children for this parse node.
getHeadIndex
Returns the index within a sentence of the head token for this parse.
parseParse
Parses the specified tree-bank style parse string and return a Parse structure for that string.

Popular in Java

Start an intent from android
addToBackStack (FragmentTransaction)
scheduleAtFixedRate (ScheduledExecutorService)
putExtra (Intent)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Top Sublime Text plugins

How to useParse in opennlp.tools.parser

Best Java code snippets using opennlp.tools.parser.Parse (Showing top 20 results out of 315)

Refine search

How to use
Parse
in
opennlp.tools.parser