public static List<String> getPosConjLemma(TextAnnotation s) { List<Constituent> lemma = s.getView(ViewNames.LEMMA).getConstituents(); List<Constituent> pos = s.getView(ViewNames.LEMMA).getConstituents(); ArrayList output = new ArrayList(); for(int i = 0; i < lemma.size(); i++) { output.add(lemma.get(i) + "-" + pos.get(i)); } return output; }
/** * Convenience method for addConstituent(constituent, false) * @param constituent */ public void addConstituent(Constituent constituent){ this.addConstituent(constituent, false); }
public List<Constituent> getConstituentsCoveringTokens(Collection<Integer> tokenIds) { Set<Constituent> output = new HashSet<>(); for (int token : tokenIds) { output.addAll(getConstituentsCoveringToken(token)); } List<Constituent> list = new ArrayList<>(output); Collections.sort(list, TextAnnotationUtilities.constituentStartComparator); return list; }
public View getViewCoveringTokens(Collection<Integer> tokens, ITransformer<View, Double> scoreTransformer) { View restriction = createRestrictedView(scoreTransformer); if (restriction == null) return null; Queue<Constituent> constituentsToConsider = new LinkedList<>(); for (Constituent c : getConstituentsCoveringTokens(tokens)) { restriction.addConstituent(c); constituentsToConsider.add(c); } addRelatedConstituents(restriction, constituentsToConsider); return restriction; }
public View getRestrictedView(Predicate<Constituent> constituentPredicate, ITransformer<View, Double> scoreTransformer) { View restriction = createRestrictedView(scoreTransformer); if (restriction == null) return null; Queue<Constituent> constituentsToConsider = new LinkedList<>(); for (Constituent c : getFilteredConstituents(constituentPredicate)) { restriction.addConstituent(c); constituentsToConsider.add(c); } addRelatedConstituents(restriction, constituentsToConsider); return restriction; }
public List<Constituent> getwordskfrom(View TOKENS, int startspan, int endspan, int k) { // This assumes that span is only representing a token if (k == 0) { return TOKENS.getConstituentsCoveringSpan(startspan, endspan); } else if (k < 0) { int kprevindex = startspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (kprevindex < 0) { kprevindex = 0; } return TOKENS.getConstituentsCoveringSpan(kprevindex, startspan); } else { int knextindex = endspan + k; // Checking the token index specified by kprevindex is // valid (i.e. non-negative) if (knextindex > TOKENS.getEndSpan()) { knextindex = TOKENS.getEndSpan(); } return TOKENS.getConstituentsCoveringSpan(endspan, knextindex); } }
newVu = new TreeView(vu.viewName, vu.viewGenerator, newTA, vu.score); } else { newVu = new View(vu.viewName, vu.viewGenerator, newTA, vu.score); constituentsToCopy = vu.getConstituents(); else constituentsToCopy = vu.getConstituentsCoveringSpan(sourceStartTokenIndex, sourceEndTokenIndex + 1); newVu.addConstituent(newC); for (Relation r : vu.getRelations()) { newVu.addRelation(newR);
@Override protected void addView(TextAnnotation ta) throws AnnotatorException { assert ta.hasView(ViewNames.SENTENCE): "Sentences view didn't find . . . "; List<Constituent> sentences = ta.getView(ViewNames.SENTENCE).getConstituents(); View vu = new View(viewName, "ClausIEAnnotator", ta, 1.0); assert sentences.size() == ta.getNumberOfSentences(); for(Constituent sent : sentences) { String[] clausieResults = ClausieSplitter.split(sent.getSurfaceForm()); Constituent sentenceCons = new Constituent("sent-" + sent.getSentenceId(), viewName, ta, sent.getStartSpan(), sent.getEndSpan()); int propId = 0; for(String clausieSent : clausieResults) { sentenceCons.addAttribute("clauseIe:" + propId, clausieSent); propId++; } vu.addConstituent(sentenceCons); } ta.addView(viewName, vu); } }
/** * Consolidate {@link TextAnnotation}s that have the same text but separate gold views. This is * required because of the nature of the Semeval annotations (one annotation per example). * * @param tas The list of {@link TextAnnotation}s with the Semeval annotations * @return The consolidated list of {@link TextAnnotation}s */ private List<TextAnnotation> consolidate(List<TextAnnotation> tas) { List<TextAnnotation> consolidatedTAs = new ArrayList<>(); Map<Integer, List<TextAnnotation>> taMap = new HashMap<>(); for (TextAnnotation ta : tas) { int key = ta.getText().hashCode(); List<TextAnnotation> annotations = taMap.getOrDefault(key, new ArrayList<>()); annotations.add(ta); taMap.put(key, annotations); } for (int key : taMap.keySet()) { List<TextAnnotation> annotations = taMap.get(key); TextAnnotation ta1 = annotations.get(0); View view1 = ta1.getView(viewName); for (int i = 1; i < annotations.size(); i++) { TextAnnotation taI = annotations.get(i); View viewI = taI.getView(viewName); for (Constituent c : viewI.getConstituents()) view1.addConstituent(c); } consolidatedTAs.add(ta1); } return consolidatedTAs; }
public static void save(String origfolder, String romanfolder, String outfolder) throws IOException { if((new File(outfolder)).exists()) { CoNLLNerReader cnl = new CoNLLNerReader(romanfolder); while (cnl.hasNext()) { TextAnnotation ta = cnl.next(); View ner = ta.getView(ViewNames.NER_CONLL); TextAnnotation taorig = new CoNLLNerReader(origfolder + "/" + ta.getId()).next(); // this should overwrite the (empty) NER_CONLL view. taorig.addView(ViewNames.NER_CONLL, ner); View nerorig = taorig.getView(ViewNames.NER_CONLL); View sents = taorig.getView(ViewNames.SENTENCE); for (Constituent sent : sents.getConstituents()) { String sentid = getSentId(sent); if(nerorig.getConstituentsCovering(sent).size() == 0) continue; // how to rewrite this sentence as a textannotation. SentToConll(sent, outfolder); } } }else{ System.out.println(outfolder + " does not exist. Not doing anything."); } }
View ner = new View(ViewNames.NER_CONLL, "SentenceController",ta,1.0); ta.addView(ViewNames.NER_CONLL, ner); for(Constituent sent : sentview.getConstituents()){ String id = SentenceController.getSentId(sent); if(!this.containsKey(id)) {
/** * Extracts Part-of-speech tagging features of a given Constituent * The size of the Constituent should be 1 */ public static List<Pair<Integer, String>> getPOSFeatures (Constituent c) { List<Pair<Integer, String>> ret_features = new ArrayList<>(); View posView = c.getTextAnnotation().getView(ViewNames.POS); for (int i = -1; i < 3; i++){ int curId = c.getStartSpan() + i; if (curId < 0 || curId >= posView.getEndSpan()){ continue; } ret_features.add(new Pair<>(i, posView.getConstituentsCoveringToken(curId).get(0).getLabel())); } return ret_features; }
public static String printView(View view) throws IOException { StringBuilder bldr = new StringBuilder(); bldr.append( "View '" ); bldr.append( view.getViewName() ); bldr.append( "':\n" ); for ( Constituent c: view.getConstituents() ) { bldr.append( printConstituent( c ) ); } bldr.append( OUTPUT_SEPARATOR ); return bldr.toString(); }
public void removeAllRelations() { List<Constituent> allCorefCons = this.getConstituents(); for (Constituent c : allCorefCons) { for (Relation r : c.getIncomingRelations()) this.removeRelation(r); for (Relation r : c.getOutgoingRelations()) this.removeRelation(r); c.removeAllIncomingRelatons(); c.removeAllOutgoingRelaton(); } }
public View getViewCoveringSpan(int start, int end, ITransformer<View, Double> scoreTransformer) { View restriction = createRestrictedView(scoreTransformer); if (restriction == null) return null; Queue<Constituent> constituentsToConsider = new LinkedList<>(); for (Constituent c : getConstituentsCoveringSpan(start, end)) { restriction.addConstituent(c); constituentsToConsider.add(c); } addRelatedConstituents(restriction, constituentsToConsider); // logger.info(restriction); return restriction; }
/** * remove overlapping entities, put them in a View and add them to ta * @param ta * @param neConstituents */ private static void processEntities(TextAnnotation ta, List<Constituent> neConstituents, String viewName ) { neConstituents = removeOverlappingEntities( neConstituents ); View nerView = new View( viewName, NAME, ta, 1.0 ); for ( Constituent c : neConstituents ) nerView.addConstituent( c ); ta.addView( viewName, nerView ); }
public static void addExtentAttributes(Constituent extent, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet){ View tokenView = extent.getTextAnnotation().getView(ViewNames.TOKENS); extent.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotateConstituent(extent, false)); extent.addAttribute("BC", brownClusters.getPrefixesCombined(extent.toString())); if (extent.getStartSpan() - 1 > tokenView.getStartSpan()) { extent.addAttribute("BCm1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() - 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } if (extent.getStartSpan() + 1 < tokenView.getEndSpan()) { extent.addAttribute("BCp1", brownClusters.getPrefixesCombined(tokenView.getConstituentsCoveringToken(extent.getStartSpan() + 1).get(0).toString())); } else { extent.addAttribute("BCm1", ","); } extent.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, extent)); extent.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, extent)); }
private void addRelatedConstituents(View restriction, Queue<Constituent> constituentsToConsider) { while (!constituentsToConsider.isEmpty()) { Constituent top = constituentsToConsider.remove(); for (Relation r : top.getIncomingRelations()) { Constituent source = r.getSource(); if (source.getStartSpan() < 0 && !restriction.containsConstituent(source)) { restriction.addConstituent(source); constituentsToConsider.add(source); } } } for (Relation r : this.relations) { if (restriction.containsConstituent(r.getSource()) && restriction.containsConstituent(r.getTarget())) { restriction.addRelation(r); } } }
/** * Get the constituents which cover the input constituent {@code c}. * * @param c A constituent, not necessarily of this text annotation. * @return A list of constituents, which cover the same tokens as the input */ public List<Constituent> getConstituentsCovering(Constituent c) { return this.getConstituentsCoveringSpan(c.getStartSpan(), c.getEndSpan()); }
/** * Given a mention, it returns the list of canonical mentions of the coref chains which overlap * with the input constituent * * @param c the input constituent * @return canonical consittuents of the overlalpping chains with the input constituent */ public HashSet<Constituent> getOverlappingChainsCanonicalMentions(Constituent c) { List<Constituent> overlappingCons = c.getView().getConstituentsCovering(c); HashSet<Constituent> canonicalCons = new HashSet<>(); for (Constituent cc : overlappingCons) canonicalCons.addAll(getCanonicalEntitySetViaRelation(cc)); return canonicalCons; }