@Override public void setLemma(String lemma) { label.setLemma(lemma); }
cl.setWord(word); cl.setValue(word); cl.setLemma(lemma); cl.setTag(tag); nextList.add(cl);
verb.get(verb.size() - 1).setWord("to"); verb.get(verb.size() - 1).setValue("to"); verb.get(verb.size() - 1).setLemma("to"); verb.get(verb.size() - 1).setTag("IN");
private static void addLemmata(Tree tree) { tree.yield().forEach(l -> { CoreLabel w = (CoreLabel) l; if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
sentence.get(sentence.size() - 1).setWord("of"); sentence.get(sentence.size() - 1).setValue("of"); sentence.get(sentence.size() - 1).setLemma("of"); sentence.get(sentence.size() - 1).setTag("IN");
token.setEndPosition(beginChar + words.get(i).length()); beginChar += words.get(i).length() + 1; token.setLemma(lemmas.get(i)); token.setTag(pos.get(i)); token.setNER(ner.get(i));
tokens.get().get(i).setLemma(values.get(i));
/** * Create a sentence as a List of {@code CoreLabel} objects from * a List of other label objects. * * @param words The words to make it from * @return The Sentence */ public static List<CoreLabel> toCoreLabelList(List<? extends HasWord> words) { List<CoreLabel> sent = new ArrayList<>(words.size()); for (HasWord word : words) { CoreLabel cl = new CoreLabel(); if (word instanceof Label) { cl.setValue(((Label) word).value()); } cl.setWord(word.word()); if (word instanceof HasTag) { cl.setTag(((HasTag) word).tag()); } if (word instanceof HasLemma) { cl.setLemma(((HasLemma) word).lemma()); } sent.add(cl); } return sent; }
coreLabel.setWord(escapedLemma); coreLabel.setValue(escapedLemma); coreLabel.setLemma(lemma);
c.setLemma(toksplit); c.setValue(toksplit); c.set(CoreAnnotations.TextAnnotation.class, toksplit);
((HasWord) leafNode.label()).setWord(s); if (leafNode.label() instanceof CoreLabel && lemmas != null) { ((CoreLabel) leafNode.label()).setLemma(lemmas.get(i)); ((HasWord) leafNode.label()).setWord(leafStr); if (leafNode.label() instanceof CoreLabel && lemmas != null) { ((CoreLabel) leafNode.label()).setLemma(lemmas.get(0));
/** * Create a mock node, to be added to the dependency tree but which is not part of the original sentence. * * @param toCopy The CoreLabel to copy from initially. * @param word The new word to add. * @param POS The new part of speech to add. * * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well as a new index). */ @SuppressWarnings("UnusedDeclaration") private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) { CoreLabel mock = new CoreLabel(toCopy); mock.setWord(word); mock.setLemma(word); mock.setValue(word); mock.setNER("O"); mock.setTag(POS); mock.setIndex(sentenceLength + 5); return mock; }
/** * A funky little helper method to interpret each token of the sentence as an HTML string, and translate it back to text. * Note that this is <b>in place</b>. */ public void unescapeHTML() { // Change in the protobuf for (int i = 0; i < sentence.length(); ++i) { CoreNLPProtos.Token.Builder token = sentence.rawToken(i); token.setWord(StringUtils.unescapeHtml3(token.getWord())); token.setLemma(StringUtils.unescapeHtml3(token.getLemma())); } // Change in the annotation CoreMap cm = sentence.document.asAnnotation().get(CoreAnnotations.SentencesAnnotation.class).get(sentence.sentenceIndex()); for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) { token.setWord(StringUtils.unescapeHtml3(token.word())); token.setLemma(StringUtils.unescapeHtml3(token.lemma())); } } }
if (proto.hasCoarseNER()) { word.set(CoreAnnotations.CoarseNamedEntityTagAnnotation.class, proto.getCoarseNER()); } if (proto.hasFineGrainedNER()) { word.set(CoreAnnotations.FineGrainedNamedEntityTagAnnotation.class, proto.getFineGrainedNER()); } if (proto.hasLemma()) { word.setLemma(proto.getLemma()); } if (proto.hasBeginChar()) { word.setBeginPosition(proto.getBeginChar()); } if (proto.hasEndChar()) { word.setEndPosition(proto.getEndChar()); }
@Override public void setLemma(String lemma) { label.setLemma(lemma); }
@Override public void setLemma(String lemma) { label.setLemma(lemma); }
private static void addLemmata(Tree tree) { tree.yield().forEach(l -> { CoreLabel w = (CoreLabel) l; if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
private static void addLemmata(Tree tree) { tree.yield().forEach(l -> { CoreLabel w = (CoreLabel) l; if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
public static CoreLabel tokenToWord(Token aToken) { CoreLabel t = new CoreLabel(); t.setOriginalText(aToken.getCoveredText()); t.setWord(aToken.getText()); t.setBeginPosition(aToken.getBegin()); t.setEndPosition(aToken.getEnd()); if (aToken.getLemma() != null) { t.setLemma(aToken.getLemma().getValue()); } else { t.setLemma(aToken.getText()); } if (aToken.getPos() != null) { t.setTag(aToken.getPos().getPosValue()); } return t; }
/** * A funky little helper method to interpret each token of the sentence as an HTML string, and translate it back to text. * Note that this is <b>in place</b>. */ public void unescapeHTML() { // Change in the protobuf for (int i = 0; i < sentence.length(); ++i) { CoreNLPProtos.Token.Builder token = sentence.rawToken(i); token.setWord(StringUtils.unescapeHtml3(token.getWord())); token.setLemma(StringUtils.unescapeHtml3(token.getLemma())); } // Change in the annotation CoreMap cm = sentence.document.asAnnotation().get(CoreAnnotations.SentencesAnnotation.class).get(sentence.sentenceIndex()); for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) { token.setWord(StringUtils.unescapeHtml3(token.word())); token.setLemma(StringUtils.unescapeHtml3(token.lemma())); } } }