@Override public void setWord(String word) { label.setWord(word); }
public static List<CoreLabel> toCoreLabelList(String... words) { List<CoreLabel> tokens = new ArrayList<>(words.length); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setWord(word); tokens.add(cl); } return tokens; }
public static List<CoreLabel> toCoreLabelList(List<String> words) { List<CoreLabel> tokens = new ArrayList<>(words.size()); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setWord(word); tokens.add(cl); } return tokens; }
public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); tokens.add(cl); } return tokens; }
public static List<HasWord> toWordList(String... words) { List<HasWord> sent = new ArrayList<>(); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setValue(word); cl.setWord(word); sent.add(cl); } return sent; }
/** * Create a sentence as a List of {@code CoreLabel} objects from * an array (or varargs) of String objects. * * @param words The words to make it from * @return The Sentence */ public static List<CoreLabel> toCoreLabelList(String... words) { List<CoreLabel> sent = new ArrayList<>(words.length); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setValue(word); cl.setWord(word); sent.add(cl); } return sent; }
public UnnamedDependency(String regent, String dependent) { if (regent == null || dependent == null) { throw new IllegalArgumentException("governor or dependent cannot be null"); } CoreLabel headLabel = new CoreLabel(); headLabel.setValue(regent); headLabel.setWord(regent); this.regent = headLabel; CoreLabel depLabel = new CoreLabel(); depLabel.setValue(dependent); depLabel.setWord(dependent); this.dependent = depLabel; regentText = regent; dependentText = dependent; }
public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags, String[] answers) { assert tags.length == words.length; assert answers.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.AnswerAnnotation.class, answers[i]); tokens.add(cl); } return tokens; }
/** * Splits a compound marked by the lexer. */ private CoreLabel processCompound(CoreLabel cl) { cl.remove(ParentAnnotation.class); String[] parts = cl.word().replaceAll("-", " - ").split("\\s+"); for (String part : parts) { CoreLabel newLabel = new CoreLabel(cl); newLabel.setWord(part); newLabel.setValue(part); newLabel.set(OriginalTextAnnotation.class, part); compoundBuffer.add(newLabel); } return compoundBuffer.remove(0); }
public static List<CoreLabel> toCoreLabelListWithCharacterOffsets(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); int offset = 0; for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset); offset += words[i].length(); cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset); offset++; // assume one space between words :-) tokens.add(cl); } return tokens; }
/** This is provided as a simple way to make a CoreLabel for a word from a String. * It's often useful in fixup or test code. It sets all three of the Text, OriginalText, * and Value annotations to the given value. * * @param word The word string to make a CoreLabel for * @return A CoreLabel for this word string */ public static CoreLabel wordFromString(String word) { CoreLabel cl = new CoreLabel(); cl.setWord(word); cl.setOriginalText(word); cl.setValue(word); return cl; }
/** * Splits a compound marked by the lexer. */ private CoreLabel processCompound(CoreLabel cl) { cl.remove(ParentAnnotation.class); String[] parts = pSpace.split(pDash.matcher(cl.word()).replaceAll(" - ")); int lengthAccum = 0; for (String part : parts) { CoreLabel newLabel = new CoreLabel(cl); newLabel.setWord(part); newLabel.setValue(part); newLabel.setBeginPosition(cl.beginPosition() + lengthAccum); newLabel.setEndPosition(cl.beginPosition() + lengthAccum + part.length()); newLabel.set(OriginalTextAnnotation.class, part); compoundBuffer.add(newLabel); lengthAccum += part.length(); } return compoundBuffer.remove(0); }
/** * Copy Constructor - relies on {@link CoreLabel} copy constructor * It will set the value, and if the word is not set otherwise, set * the word to the value. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(Label w) { if (w instanceof CoreLabel) { this.label = (CoreLabel) w; } else { label = new CoreLabel(w); if (label.word() == null) { label.setWord(label.value()); } } }
private static CoreLabel initCoreLabel(String token) { CoreLabel label = new CoreLabel(); label.setWord(token); label.setValue(token); label.set(CoreAnnotations.TextAnnotation.class, token); label.set(CoreAnnotations.ValueAnnotation.class, token); return label; }
private CoreLabel makeXmlToken(String tokenText, boolean doNormalization, int charOffsetBegin, int charOffsetEnd) { CoreLabel token = new CoreLabel(); token.setOriginalText(tokenText); if (separatorPattern.matcher(tokenText).matches()) { // Map to CoreNLP newline token tokenText = AbstractTokenizer.NEWLINE_TOKEN; } else if (doNormalization && normalizeSpace) { tokenText = tokenText.replace(' ', '\u00A0'); // change space to non-breaking space } token.setWord(tokenText); token.setValue(tokenText); token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charOffsetBegin); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charOffsetEnd); if (VERBOSE) { log.info("Adding token " + token.toShorterString()); } return token; }
/** Copies the CoreLabel cl with the new word part */ private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition, int endPosition) { CoreLabel newLabel = new CoreLabel(cl); newLabel.setWord(part); newLabel.setValue(part); newLabel.setBeginPosition(beginPosition); newLabel.setEndPosition(endPosition); newLabel.set(OriginalTextAnnotation.class, part); return newLabel; }
/** Copies the CoreLabel cl with the new word part */ private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition, int endPosition) { CoreLabel newLabel = new CoreLabel(cl); newLabel.setWord(part); newLabel.setValue(part); newLabel.setBeginPosition(beginPosition); newLabel.setEndPosition(endPosition); newLabel.set(OriginalTextAnnotation.class, part); return newLabel; }
/** * Constructs a CoreLabel as a String with a corresponding BEGIN and END position, * when the original OriginalTextAnnotation is different from TextAnnotation * (Does not take substring). */ public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) { CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel(); cl.setValue(tokenText); cl.setWord(tokenText); cl.setOriginalText(originalText); if(addIndices) { cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin); cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length); } return cl; }
public List<CoreLabel> segmentStringToTokenList(String line) { List<CoreLabel> tokenList = CollectionUtils.makeList(); List<CoreLabel> labeledSequence = segmentStringToIOB(line); for (IntPair span : IOBUtils.TokenSpansForIOB(labeledSequence)) { CoreLabel token = new CoreLabel(); String text = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker, span.getSource(), span.getTarget()); token.setWord(text); token.setValue(text); token.set(CoreAnnotations.TextAnnotation.class, text); token.set(CoreAnnotations.ArabicSegAnnotation.class, "1"); int start = labeledSequence.get(span.getSource()).beginPosition(); int end = labeledSequence.get(span.getTarget() - 1).endPosition(); token.setOriginalText(line.substring(start, end)); token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end); tokenList.add(token); } return tokenList; }
private CoreLabel getCoreLabel(int labelIndex) { if (originalCoreLabels[labelIndex] != null) { CoreLabel terminalLabel = originalCoreLabels[labelIndex]; if (terminalLabel.value() == null && terminalLabel.word() != null) { terminalLabel.setValue(terminalLabel.word()); } return terminalLabel; } String wordStr = wordIndex.get(words[labelIndex]); CoreLabel terminalLabel = new CoreLabel(); terminalLabel.setValue(wordStr); terminalLabel.setWord(wordStr); terminalLabel.setBeginPosition(beginOffsets[labelIndex]); terminalLabel.setEndPosition(endOffsets[labelIndex]); if (originalTags[labelIndex] != null) { terminalLabel.setTag(originalTags[labelIndex].tag()); } return terminalLabel; }