@Override protected final Parse processSample(final Parse reference) { List<String> tokens = new ArrayList<>(); for (Parse token : reference.getTokenNodes()) { tokens.add(token.getSpan().getCoveredText(reference.getText()).toString()); } Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1); Parse prediction = null; if (predictions.length > 0) { prediction = predictions[0]; fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction)); } return prediction; }
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }
@Test public void testGetText() { Parse p = Parse.parseParse(PARSE_STRING); // TODO: Why does parse attaches a space to the end of the text ??? String expectedText = "She was just another freighter from the States , " + "and she seemed as commonplace as her name . "; Assert.assertEquals(expectedText, p.getText()); }
newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
Parse newParent = new Parse(currentChunks[ci].getText(), currentChunks[ci].getSpan(),parent.getType(),1,0); newParent.add(currentChunks[ci],rules); int attachNodeIndex = -1; if (ci == 0) { Parse top = new Parse(currentChunks[ci].getText(), new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0); top.insert(currentChunks[ci]);
if (!tag.equals(DONE)) { Parse newParse1 = (Parse) p.clone(); Parse newNode = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead()); newParse1.insert(newNode); newParse1.addProb(Math.log(bprob)); Parse newNode2 = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead()); newParse2.insert(newNode2); newParse2.addProb(Math.log(bprob));
Parse chunk = new Parse(p1.getText(), new Span(p1.getSpan().getStart(), p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type)); chunk.isChunk(true);
Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd()); if (nameSpan.equals(commonParent.getSpan())) { commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { Parse[] kids = commonParent.getChildren(); commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { Parse[] grandKids = kids[0].getChildren(); if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag, 1.0, commonParent.getHeadIndex()));
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
@Override protected final Parse processSample(final Parse reference) { List<String> tokens = new ArrayList<>(); for (Parse token : reference.getTokenNodes()) { tokens.add(token.getSpan().getCoveredText(reference.getText()).toString()); } Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1); Parse prediction = null; if (predictions.length > 0) { prediction = predictions[0]; fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction)); } return prediction; }
@Override protected final Parse processSample(final Parse reference) { List<String> tokens = new ArrayList<>(); for (Parse token : reference.getTokenNodes()) { tokens.add(token.getSpan().getCoveredText(reference.getText()).toString()); } Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1); Parse prediction = null; if (predictions.length > 0) { prediction = predictions[0]; fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction)); } return prediction; }
public ArrayList<String> getVerbPhrases(Parse p) { ArrayList<String> verbPhrases = new ArrayList<String>(); Parse[] subparses = p.getChildren(); for (int pi = 0; pi < subparses.length; pi++) { if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi])) { Span _span = subparses[pi].getSpan(); verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd())); } else if (!((Parse) subparses[pi]).isPosTag()) verbPhrases.addAll(getNounPhrases(subparses[pi])); } return verbPhrases; }
private void show(Parse p) { int start; start = p.getSpan().getStart(); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print("("); System.out.print(p.getType()); if (parseMap.containsKey(p)) { System.out.print("#"+parseMap.get(p)); } //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p)); System.out.print(" "); } Parse[] children = p.getChildren(); for (int pi=0,pn=children.length;pi<pn;pi++) { Parse c = children[pi]; Span s = c.getSpan(); if (start < s.getStart()) { System.out.print(p.getText().substring(start, s.getStart())); } show(c); start = s.getEnd(); } System.out.print(p.getText().substring(start, p.getSpan().getEnd())); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print(")"); } } }
private void show(Parse p) { int start; start = p.getSpan().getStart(); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print("("); System.out.print(p.getType()); if (parseMap.containsKey(p)) { System.out.print("#"+parseMap.get(p)); } //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p)); System.out.print(" "); } Parse[] children = p.getChildren(); for (int pi=0,pn=children.length;pi<pn;pi++) { Parse c = children[pi]; Span s = c.getSpan(); if (start < s.getStart()) { System.out.print(p.getText().substring(start, s.getStart())); } show(c); start = s.getEnd(); } System.out.print(p.getText().substring(start, p.getSpan().getEnd())); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print(")"); } } }
private void show(Parse p) { int start; start = p.getSpan().getStart(); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print("("); System.out.print(p.getType()); if (parseMap.containsKey(p)) { System.out.print("#" + parseMap.get(p)); } //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p)); System.out.print(" "); } Parse[] children = p.getChildren(); for (int pi = 0, pn = children.length; pi < pn;pi++) { Parse c = children[pi]; Span s = c.getSpan(); if (start < s.getStart()) { System.out.print(p.getText().substring(start, s.getStart())); } show(c); start = s.getEnd(); } System.out.print(p.getText().substring(start, p.getSpan().getEnd())); if (!p.getType().equals(Parser.TOK_NODE)) { System.out.print(")"); } } }
public ArrayList<String> getNounPhrases(Parse p) { ArrayList<String> nounphrases = new ArrayList<String>(); Parse[] subparses = p.getChildren(); for (int pi = 0; pi < subparses.length; pi++) { if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi])) { Span _span = subparses[pi].getSpan(); nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd())); } else if (!((Parse) subparses[pi]).isPosTag()) nounphrases.addAll(getNounPhrases(subparses[pi])); } return nounphrases; }
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }