public Constituent cloneForNewViewWithDestinationLabel(String newViewName, String Dlabel) { Constituent cloneC = new Constituent(this.labelsToScores, Dlabel, this.getConstituentScore(), newViewName, this.getTextAnnotation(), this.getStartSpan(), this.getEndSpan()); for (String k : this.getAttributeKeys()) { cloneC.addAttribute(k, this.getAttribute(k)); } return cloneC; }
public String transform(Constituent input) { return input.getSurfaceForm(); } };
public static Constituent getPredictedMentionHead(Constituent c){ return new Constituent(c.getLabel(), "HEAD", c.getTextAnnotation(), Integer.parseInt(c.getAttribute("EntityHeadStartSpan")), Integer.parseInt(c.getAttribute("EntityHeadEndSpan"))); }
@Override public Boolean transform(Constituent input) { return input.getEndSpan() <= c.getStartSpan(); } };
public String getPredicateSense(Constituent predicate) { if (predicate.hasAttribute(SenseIdentifer)) return predicate.getAttribute(SenseIdentifer); else return ""; }
@Override public List<Constituent> transform(Constituent input) { List<Constituent> list = new ArrayList<>(); TextAnnotation ta = input.getTextAnnotation(); for (int i = input.getStartSpan(); i < input.getEndSpan(); i++) { list.add(new Constituent("", "", ta, i, i + 1)); } return list; }
@Override protected void addView(TextAnnotation ta) throws AnnotatorException { assert ta.hasView(ViewNames.SENTENCE): "Sentences view didn't find . . . "; List<Constituent> sentences = ta.getView(ViewNames.SENTENCE).getConstituents(); View vu = new View(viewName, "ClausIEAnnotator", ta, 1.0); assert sentences.size() == ta.getNumberOfSentences(); for(Constituent sent : sentences) { String[] clausieResults = ClausieSplitter.split(sent.getSurfaceForm()); Constituent sentenceCons = new Constituent("sent-" + sent.getSentenceId(), viewName, ta, sent.getStartSpan(), sent.getEndSpan()); int propId = 0; for(String clausieSent : clausieResults) { sentenceCons.addAttribute("clauseIe:" + propId, clausieSent); propId++; } vu.addConstituent(sentenceCons); } ta.addView(viewName, vu); } }
public static void addHeadAttributes(Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS); for (int i = head.getStartSpan(); i < head.getEndSpan(); i++) { head.addAttribute("GAZ" + i, ((FlatGazetteers) gazetteers).annotateConstituent(tokenView.getConstituentsCoveringToken(i).get(0), false)); head.addAttribute("BC" + i, brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(i).get(0).toString())); } head.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(head)); }
public static boolean[] getStringPatternsBoolean(TextAnnotation s) { List<Constituent> list = s.getView(ViewNames.POS).getConstituents(); boolean cb = list.get(1).getLabel().contains("NN"); boolean what = list.get(0).getSurfaceForm().toLowerCase().contains("what"); boolean where = list.get(0).getSurfaceForm().toLowerCase().contains("where"); boolean when = list.get(0).getSurfaceForm().toLowerCase().contains("when"); String secondWordCapitalization = list.get(1).getSurfaceForm(); boolean whereOrWhenAndCapitapized = (where || when) && isCapitalized(secondWordCapitalization.substring(0, 1)); boolean containsBodyOrPart = s.text.contains(" body") || s.text.contains(" part "); boolean whatNationality = s.text.toLowerCase().contains("what") && s.text.contains("nationality"); return new boolean[]{ cb && what, whereOrWhenAndCapitapized, containsBodyOrPart}; }
@Override public List<Constituent> transform(Constituent input) { return Collections.singletonList(new Constituent("", "", input.getTextAnnotation(), input.getStartSpan(), input.getStartSpan() + 1)); }
private static Constituent createConstituentGivenMention(Annotation document, CorefChain chain, CorefChain.CorefMention m, TextAnnotation ta) { Pair<Integer, Integer> mentionCharSpan = getCharIntervalFromCorefMention(document, m.sentNum, m.startIndex, m.endIndex); List<Constituent> overlappingCons = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(mentionCharSpan.getFirst(), mentionCharSpan.getSecond()); int startIndex = overlappingCons.stream().min(Comparator.comparing(Constituent::getStartSpan)).get().getStartSpan(); int endIndex = overlappingCons.stream().max(Comparator.comparing(Constituent::getEndSpan)).get().getEndSpan(); Constituent c = new Constituent(String.valueOf(chain.getChainID()), viewName, ta, startIndex, endIndex); c.addAttribute("animacy", m.animacy.toString()); c.addAttribute("number", m.number.toString()); c.addAttribute("gender", m.gender.toString()); c.addAttribute("mentionType", m.mentionType.toString()); return c; } }
@Override public Set<Feature> getFeatures(Constituent c) throws EdisonException { SpanLabelView chunks = (SpanLabelView) c.getTextAnnotation().getView(viewName); List<Constituent> list = SpanLabelsHelper.getConstituentsInBetween(chunks, c.getStartSpan(), c.getEndSpan()); Collections.sort(list, TextAnnotationUtilities.constituentStartComparator); return FeatureNGramUtility.getLabelNgramsUnordered(list, ngramLength); }
@Override public Constituent createPredicate(TextAnnotation ta, String viewName, List<Tree<Pair<String, IntPair>>> yield) { Tree<Pair<String, IntPair>> l = yield.get(predicateTerminal); int start = l.getLabel().getSecond().getFirst(); Constituent predicate = new Constituent("Predicate", viewName, ta, start, start + 1); predicate.addAttribute(PropbankReader.LemmaIdentifier, lemma); predicate.addAttribute(PropbankReader.SenseIdentifier, sense); return predicate; }
@Override public List<Constituent> transform(Constituent input) { return Collections.singletonList(new Constituent("", "", input.getTextAnnotation(), input.getEndSpan() - 1, input.getEndSpan())); }
@Override public List<Constituent> transform(Constituent input) { int tokenId = input.getEndSpan(); TextAnnotation ta = input.getTextAnnotation(); Sentence sentence = ta.getSentence(input.getSentenceId()); if (tokenId < sentence.size()) { Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1); return Collections.singletonList(addPointerToSource(input, c)); } else return new ArrayList<>(); }
private static void writeConstituent(Constituent c, JsonObject cJ) { writeString("label", c.getLabel(), cJ); if (c.getConstituentScore() != 0) writeDouble("score", c.getConstituentScore(), cJ); writeInt("start", c.getStartSpan(), cJ); writeInt("end", c.getEndSpan(), cJ); writeAttributes(c, cJ); Map<String, Double> labelsToScores = c.getLabelsToScores(); if ( null != labelsToScores ) writeLabelsToScores(labelsToScores, cJ); }
@Override public Boolean transform(Pair<Constituent, SpanLabelView> input) { if (!input.getFirst().getLabel().equals(CLOTHES)) return true; TextAnnotation ta = input.getFirst().getTextAnnotation(); int last = input.getFirst().getEndSpan() - 1; return !ta.hasView(ViewNames.POS) || POSUtils.isPOSNoun(WordHelpers.getPOS(ta, last)); } });
private String getLemma(Constituent c) { if (c.hasAttribute(PredicateArgumentView.LemmaIdentifier)) { return c.getAttribute(PredicateArgumentView.LemmaIdentifier); } else if (c.getTextAnnotation().hasView(ViewNames.LEMMA)) { return WordHelpers.getLemma(c.getTextAnnotation(), c.getEndSpan() - 1); } else return null; }
/** * We want to reuse any constituent with the same label and token span and attributes. The * equals method checks all kinds of stuff, in going and out going relations, which are changing * as we construct the view. * @param c the constituent. * @return the disambiguation key for constituents. */ private String constituentDisambiguationKey(Constituent c) { StringBuffer sb = new StringBuffer(); sb.append(c.getLabel()); sb.append(':'); sb.append(c.getSpan().getFirst()); sb.append(':'); sb.append(c.getSpan().getSecond()); return c.getLabel()+":"+c.getStartSpan(); }
public String getStrippedNotation(Constituent c) { if (c == null) return "NULL"; String notation = c.getLabel().split("-")[0]; if (NERlexicalise) notation += "-" + getNamedEntityTag(c); if (POSlexicalise) { notation += "-"; IntPair span = c.getSpan(); TextAnnotation ta = c.getTextAnnotation(); for (int tokenId = span.getFirst(); tokenId < span.getSecond(); tokenId++) notation += " " + POSUtils.getPOS(ta, tokenId); } return notation; }