@Override public void setTag(String tag) { label.setTag(tag); }
public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); tokens.add(cl); } return tokens; }
/** * Takes a sentence composed of CoreLabels and add the tags to the * CoreLabels, modifying the input sentence. If reuseTags is set to * true, any tags supplied with the CoreLabels are taken as correct. */ public void tagCoreLabels(List<CoreLabel> sentence, boolean reuseTags) { List<TaggedWord> taggedWords = tagSentence(sentence, reuseTags); if (taggedWords.size() != sentence.size()) throw new AssertionError("Tagged word list not the same length " + "as the original sentence"); for (int i = 0, size = sentence.size(); i < size; ++i) { sentence.get(i).setTag(taggedWords.get(i).tag()); } }
public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags, String[] answers) { assert tags.length == words.length; assert answers.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.AnswerAnnotation.class, answers[i]); tokens.add(cl); } return tokens; }
public static List<CoreLabel> toCoreLabelListWithCharacterOffsets(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); int offset = 0; for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset); offset += words[i].length(); cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset); offset++; // assume one space between words :-) tokens.add(cl); } return tokens; }
@Override public void doOneFailedSentence(Annotation annotation, CoreMap sentence) { final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); Tree tree = ParserUtils.xTree(words); for (CoreLabel word : words) { if (word.tag() == null) { word.setTag("XX"); } } List<Tree> trees = Generics.newArrayList(1); trees.add(tree); finishSentence(sentence, trees); }
/** * Set the tags of the original tokens and the leaves if they * aren't already set. */ private static void setMissingTags(CoreMap sentence, Tree tree) { List<TaggedWord> taggedWords = null; List<Label> leaves = null; List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (int i = 0, size = tokens.size(); i < size; ++i) { CoreLabel token = tokens.get(i); if (token.tag() == null) { if (taggedWords == null) { taggedWords = tree.taggedYield(); } if (leaves == null) { leaves = tree.yield(); } token.setTag(taggedWords.get(i).tag()); Label leaf = leaves.get(i); if (leaf instanceof HasTag) { ((HasTag) leaf).setTag(taggedWords.get(i).tag()); } } } }
private static void replacePOSTags(Tree tree) { List<Label> yield = tree.yield(); List<Label> preYield = tree.preTerminalYield(); assert yield.size() == preYield.size(); MorphoFeatureSpecification spec = new FrenchMorphoFeatureSpecification(); for(int i = 0; i < yield.size(); i++) { // Morphological Analysis String morphStr = ((CoreLabel) yield.get(i)).originalText(); if (morphStr == null || morphStr.equals("")) { morphStr = preYield.get(i).value(); // POS subcategory String subCat = ((CoreLabel) yield.get(i)).category(); if (subCat != null && subCat != "") { morphStr += "-" + subCat + "--"; } else { morphStr += "---"; } } MorphoFeatures feats = spec.strToFeatures(morphStr); if(feats.getAltTag() != null && !feats.getAltTag().equals("")) { CoreLabel cl = (CoreLabel) preYield.get(i); cl.setValue(feats.getAltTag()); cl.setTag(feats.getAltTag()); } } }
private <TOKEN extends CoreLabel> void annotateTokens(List<TOKEN> tokens) { // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too List<CoreLabel> words = new ArrayList<>(); for (CoreLabel token : tokens) { CoreLabel word = new CoreLabel(); word.setWord(token.word()); word.setNER(token.ner()); word.setTag(token.tag()); // copy fields potentially set by SUTime NumberSequenceClassifier.transferAnnotations(token, word); words.add(word); } doOneSentence(words); // TODO: If collapsed is set, tokens for entities are collapsed into one node then // (words.size() != tokens.size() and the logic below just don't work!!! for (int i = 0; i < words.size(); i++) { String ner = words.get(i).ner(); tokens.get(i).setNER(ner); tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, words.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)); } }
/** * Create a sentence as a List of {@code CoreLabel} objects from * a List of other label objects. * * @param words The words to make it from * @return The Sentence */ public static List<CoreLabel> toCoreLabelList(List<? extends HasWord> words) { List<CoreLabel> sent = new ArrayList<>(words.size()); for (HasWord word : words) { CoreLabel cl = new CoreLabel(); if (word instanceof Label) { cl.setValue(((Label) word).value()); } cl.setWord(word.word()); if (word instanceof HasTag) { cl.setTag(((HasTag) word).tag()); } if (word instanceof HasLemma) { cl.setLemma(((HasLemma) word).lemma()); } sent.add(cl); } return sent; }
"with part-of-speech tag annotations"); label.setTag(((HasTag) wd).tag());
if(feats.getAltTag() != null && !feats.getAltTag().equals("")) { label.setValue(feats.getAltTag()); label.setTag(feats.getAltTag());
/** * Create a mock node, to be added to the dependency tree but which is not part of the original sentence. * * @param toCopy The CoreLabel to copy from initially. * @param word The new word to add. * @param POS The new part of speech to add. * * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well as a new index). */ @SuppressWarnings("UnusedDeclaration") private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) { CoreLabel mock = new CoreLabel(toCopy); mock.setWord(word); mock.setLemma(word); mock.setValue(word); mock.setNER("O"); mock.setTag(POS); mock.setIndex(sentenceLength + 5); return mock; }
/** * Only works on English, as it is hard coded for using the * Morphology class, which is English-only */ public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) { List<TaggedWord> tagged; if (getOp().testOptions.preTag) { Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger(); tagged = tagger.apply(tokens); } else { Tree tree = parse(tokens); tagged = tree.taggedYield(); } Morphology morpha = new Morphology(); List<CoreLabel> lemmas = Generics.newArrayList(); for (TaggedWord token : tagged) { CoreLabel label = new CoreLabel(); label.setWord(token.word()); label.setTag(token.tag()); morpha.stem(label); lemmas.add(label); } return lemmas; }
/** * Create a new {@code TreeGraphNode} having the same tree * structure and label values as an existing tree (but no shared * storage). Operates recursively to construct an entire * subtree. * * @param t the tree to copy * @param parent the parent node */ protected TreeGraphNode(Tree t, TreeGraphNode parent) { this.parent = parent; Tree[] tKids = t.children(); int numKids = tKids.length; children = new TreeGraphNode[numKids]; for (int i = 0; i < numKids; i++) { children[i] = new TreeGraphNode(tKids[i], this); if (t.isPreTerminal()) { // add the tags to the leaves children[i].label.setTag(t.label().value()); } } this.label = (CoreLabel) mlf.newLabel(t.label()); }
private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) { if (isPreTerminal()) { // usually this will fill in all the usual keys for a token CoreLabel taggedWord = new CoreLabel(firstChild().label()); // but in case this just came from reading a tree that just has a value for words if (taggedWord.word() == null) { taggedWord.setWord(firstChild().value()); } final String tag = (value() == null) ? "" : value(); // set value and tag to the tag taggedWord.setValue(tag); taggedWord.setTag(tag); taggedWord.setIndex(termIdx); ty.add(taggedWord); return termIdx + 1; } else { for (Tree kid : getChildrenAsList()) termIdx = kid.taggedLabeledYield(ty, termIdx); } return termIdx; }
label.setTag(pos); } else if (aggressiveNormalization && isMultiWordCandidate(t)) {
private CoreLabel getCoreLabel(int labelIndex) { if (originalCoreLabels[labelIndex] != null) { CoreLabel terminalLabel = originalCoreLabels[labelIndex]; if (terminalLabel.value() == null && terminalLabel.word() != null) { terminalLabel.setValue(terminalLabel.word()); } return terminalLabel; } String wordStr = wordIndex.get(words[labelIndex]); CoreLabel terminalLabel = new CoreLabel(); terminalLabel.setValue(wordStr); terminalLabel.setWord(wordStr); terminalLabel.setBeginPosition(beginOffsets[labelIndex]); terminalLabel.setEndPosition(endOffsets[labelIndex]); if (originalTags[labelIndex] != null) { terminalLabel.setTag(originalTags[labelIndex].tag()); } return terminalLabel; }
/** * Copies only the fields required for numeric entity extraction into the new CoreLabel. * * @param src Source CoreLabel to copy. */ private static CoreLabel copyCoreLabel(CoreLabel src, Integer startOffset, Integer endOffset) { CoreLabel dst = new CoreLabel(); dst.setWord(src.word()); dst.setTag(src.tag()); if (src.containsKey(CoreAnnotations.OriginalTextAnnotation.class)) { dst.set(CoreAnnotations.OriginalTextAnnotation.class, src.get(CoreAnnotations.OriginalTextAnnotation.class)); } if(startOffset == null){ dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, src.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); } else { dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startOffset); } if(endOffset == null){ dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, src.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endOffset); } transferAnnotations(src, dst); return dst; }
@Override public Label newLabel(Label oldLabel) { if (oldLabel instanceof CoreLabel) { return new CoreLabel((CoreLabel)oldLabel); } else { //Map the old interfaces to the correct key/value pairs //Don't need to worry about HasIndex, which doesn't appear in any legacy code CoreLabel label = new CoreLabel(); if (oldLabel instanceof HasWord) label.setWord(((HasWord) oldLabel).word()); if (oldLabel instanceof HasTag) label.setTag(((HasTag) oldLabel).tag()); if (oldLabel instanceof HasOffset) { label.setBeginPosition(((HasOffset) oldLabel).beginPosition()); label.setEndPosition(((HasOffset) oldLabel).endPosition()); } if (oldLabel instanceof HasCategory) label.setCategory(((HasCategory) oldLabel).category()); if (oldLabel instanceof HasIndex) label.setIndex(((HasIndex) oldLabel).index()); label.setValue(oldLabel.value()); return label; } }