Refine search
@Override public void printAnswers(List<CoreLabel> doc, PrintWriter out) { for (CoreLabel wi : doc) { String answer = wi.get(CoreAnnotations.AnswerAnnotation.class); String goldAnswer = wi.get(CoreAnnotations.GoldAnswerAnnotation.class); out.println(wi.word() + "\t" + goldAnswer + "\t" + answer); } out.println(); }
private static List<String> getContentWords(Mention m) { List<String> words = new ArrayList<>(); for (int i = m.startIndex; i < m.endIndex; i++) { CoreLabel cl = m.sentenceWords.get(i); String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class); if (POS.equals("NN") || POS.equals("NNS") || POS.equals("NNP") || POS.equals("NNPS")) { words.add(cl.word().toLowerCase()); } } return words; } }
public int tokenToLocation(CoreLabel token) { CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get( token.get(CoreAnnotations.SentenceIndexAnnotation.class)); return sentence.get(CoreAnnotations.TokenBeginAnnotation.class) + token.get(CoreAnnotations.IndexAnnotation.class) - 1; }
private int sample(Map<String, DataInstance> sents, Random r, Random rneg, double perSelectNeg, double perSelectRand, int numrand, List<Pair<String, Integer>> chosen, RVFDataset<String, String> dataset){ for (Entry<String, DataInstance> en : sents.entrySet()) { CoreLabel[] sent = en.getValue().getTokens().toArray(new CoreLabel[0]); for (int i = 0; i < sent.length; i++) { CoreLabel l = sent[i]; boolean chooseThis = false; if (l.get(answerClass).equals(answerLabel)){ chooseThis = true; } else if ((!l.get(answerClass).equals("O") || negativeWords.contains(l .word().toLowerCase())) && getRandomBoolean(r, perSelectNeg)) { chooseThis = true; } else if (getRandomBoolean(r, perSelectRand)) { numrand++; chooseThis = true; } else chooseThis = false; if (chooseThis) { chosen.add(new Pair(en.getKey(), i)); RVFDatum<String, String> d = getDatum(sent, i); dataset.add(d, en.getKey(), Integer.toString(i)); } } } return numrand; }
protected static void extractPremarkedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); int beginIndex = -1; for(CoreLabel w : sent) { MultiTokenTag t = w.get(CoreAnnotations.MentionTokenAnnotation.class); if (t != null) { beginIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1; int endIndex = w.get(CoreAnnotations.IndexAnnotation.class); if (beginIndex >= 0) { IntPair mSpan = new IntPair(beginIndex, endIndex); int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIndex, endIndex, dependency, new ArrayList<>(sent.subList(beginIndex, endIndex))); mentions.add(m); mentionSpanSet.add(mSpan); beginIndex = -1; } else { SieveCoreferenceSystem.logger.warning("Start of marked mention not found in sentence: " + t + " at tokenIndex=" + (w.get(CoreAnnotations.IndexAnnotation.class)-1)+ " for " + s.get(CoreAnnotations.TextAnnotation.class));
private static Set<String> getPropers(Mention m) { Set<String> propers = new HashSet<>(); for (int i = m.startIndex; i < m.endIndex; i++) { CoreLabel cl = m.sentenceWords.get(i); String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class); String word = cl.word().toLowerCase(); if (PROPERS.contains(POS)) { propers.add(word); } } return propers; }
private static boolean findSpeaker(Document doc, int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) { List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class); for(int i = startIndex ; i < endIndex ; i++) { CoreLabel cl = sent.get(i); if(cl.get(CoreAnnotations.UtteranceAnnotation.class)!=0) continue; String lemma = cl.lemma(); String word = cl.word(); if(dict.reportVerb.contains(lemma) && cl.tag().startsWith("V")) { // find subject SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); if (dependency == null) { dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); } IndexedWord w = dependency.getNodeByWordPattern(word); if (w != null) { if(findSubject(doc, dependency, w, sentNum, utterNum)) return true; for(IndexedWord p : dependency.getPathToRoot(w)) { if(!p.tag().startsWith("V") && !p.tag().startsWith("MD")) break; if(findSubject(doc, dependency, p, sentNum, utterNum)) return true; // handling something like "was talking", "can tell" } } else { Redwood.log("debug-preprocessor", "Cannot find node in dependency for word " + word); } } } return false; }
private static boolean hasSpeakerAnnotations(Annotation annotation) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel t : sentence.get(CoreAnnotations.TokensAnnotation.class)) { if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) { return true; } } } return false; }
/** * Outputs a partial CONLL-U file with token information (form, lemma, POS) * but without any dependency information. * * @param sentence * @return */ public String printPOSAnnotations(CoreMap sentence) { StringBuilder sb = new StringBuilder(); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_"); String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_"); String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_"); String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class)); String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_"); sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(), lemma, upos , pos, featuresString, "_", "_", "_", misc)); } sb.append("\n"); return sb.toString(); }
PrintStream os = null; for (CoreMap sentence : sentences) { String myDocid = sentence.get(CoreAnnotations.DocIDAnnotation.class); if(docid == null || ! myDocid.equals(docid)){ if(os != null){ String w = word.word().replaceAll("[ \t\n]+", "_"); String t = word.get(CoreAnnotations.PartOfSpeechAnnotation.class); String l = word.get(CoreAnnotations.AnswerAnnotation.class); String nl = l; if(! alreadyBIO && ! l.equals("O")){
/** Write a standard CoNLL format output file. * * @param doc The document: A List of CoreLabel * @param out Where to send the answers to */ @Override @SuppressWarnings({"StringEquality", "StringContatenationInLoop"}) public void printAnswers(List<CoreLabel> doc, PrintWriter out) { // boolean tagsMerged = flags.mergeTags; // boolean useHead = flags.splitOnHead; if ( ! "iob1".equalsIgnoreCase(flags.entitySubclassification)) { deEndify(doc); } for (CoreLabel fl : doc) { String word = fl.word(); if (word == BOUNDARY) { // Using == is okay, because it is set to constant out.println(); } else { String gold = fl.getString(CoreAnnotations.GoldAnswerAnnotation.class); String guess = fl.get(CoreAnnotations.AnswerAnnotation.class); // log.info(word + "\t" + gold + "\t" + guess)); String pos = fl.getString(CoreAnnotations.PartOfSpeechAnnotation.class); String chunk = fl.getString(CoreAnnotations.ChunkAnnotation.class); out.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' + gold + '\t' + guess); } } }
private Gender getGender(Dictionaries dict, List<String> mStr) { int len = mStr.size(); char firstLetter = headWord.get(CoreAnnotations.TextAnnotation.class).charAt(0); if(len > 1 && Character.isUpperCase(firstLetter) && nerString.startsWith("PER")) { int firstNameIdx = len-2; String secondToLast = mStr.get(firstNameIdx); if(firstNameIdx > 1 && (secondToLast.length()==1 || (secondToLast.length()==2 && secondToLast.endsWith(".")))) { firstNameIdx--; } for(int i = 0 ; i <= firstNameIdx ; i++){ if(dict.genderNumber.containsKey(mStr.subList(i, len))) return dict.genderNumber.get(mStr.subList(i, len)); } // find converted string with ! (e.g., "dr. martin luther king jr. boulevard" -> "dr. !") List<String> convertedStr = new ArrayList<>(2); convertedStr.add(mStr.get(firstNameIdx)); convertedStr.add("!"); if (dict.genderNumber.containsKey(convertedStr)) return dict.genderNumber.get(convertedStr); if (dict.genderNumber.containsKey(mStr.subList(firstNameIdx, firstNameIdx+1))) return dict.genderNumber.get(mStr.subList(firstNameIdx, firstNameIdx+1)); } if (mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len)); return null; }
private static int findEndOfNERAnnotation(List<CoreLabel> tokens, int start) { String type = tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class); while (start < tokens.size() && type.equals(tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class))) start++; return start; }
private static List<String> getMainTokenStrs(List<CoreLabel> tokens) { List<String> mainTokenStrs = new ArrayList<>(tokens.size()); for (CoreLabel token:tokens) { String text = token.get(CoreAnnotations.TextAnnotation.class); if (!text.isEmpty() && ( text.length() >= 4 || Character.isUpperCase(text.charAt(0))) ) { mainTokenStrs.add(text); } } return mainTokenStrs; }
public String longestNNPEndsWithHead (){ String ret = ""; for (int i = headIndex; i >=startIndex ; i--){ String pos = sentenceWords.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class); if(!pos.startsWith("NNP")) break; if(!ret.equals("")) ret = " "+ret; ret = sentenceWords.get(i).get(CoreAnnotations.TextAnnotation.class)+ret; } return ret; }