edu.stanford.nlp.ling.CoreLabel.setOriginalText java code examples

@Override
public void setOriginalText(String originalText) {
 label.setOriginalText(originalText);
}

/** This is provided as a simple way to make a CoreLabel for a word from a String.
 *  It's often useful in fixup or test code. It sets all three of the Text, OriginalText,
 *  and Value annotations to the given value.
 *
 *  @param word The word string to make a CoreLabel for
 *  @return A CoreLabel for this word string
 */
public static CoreLabel wordFromString(String word) {
 CoreLabel cl = new CoreLabel();
 cl.setWord(word);
 cl.setOriginalText(word);
 cl.setValue(word);
 return cl;
}

private CoreLabel makeXmlToken(String tokenText, boolean doNormalization, int charOffsetBegin, int charOffsetEnd) {
 CoreLabel token = new CoreLabel();
 token.setOriginalText(tokenText);
 if (separatorPattern.matcher(tokenText).matches()) {
  // Map to CoreNLP newline token
  tokenText = AbstractTokenizer.NEWLINE_TOKEN;
 } else if (doNormalization && normalizeSpace) {
  tokenText = tokenText.replace(' ', '\u00A0'); // change space to non-breaking space
 }
 token.setWord(tokenText);
 token.setValue(tokenText);
 token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charOffsetBegin);
 token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charOffsetEnd);
 if (VERBOSE) {
  log.info("Adding token " + token.toShorterString());
 }
 return token;
}

CoreLabel cl = (CoreLabel) newChunk;
cl.setValue(cl.word());
cl.setOriginalText(cl.word());

 ((CoreLabel) t.label()).setValue(toks[0].trim().intern());
 ((CoreLabel) t.label()).setWord(toks[0].trim().intern());
 ((CoreLabel) t.label()).setOriginalText(toks[1].trim().intern());
} else {
 System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n",this.getClass().getName(),t.label().getClass().getName());

/**
 * Handles verbs with attached suffixes, marked by the lexer:
 *
 * Escribamosela =&gt; Escribamo + se + la =&gt; escribamos + se + la
 * Sentaos =&gt; senta + os =&gt; sentad + os
 * Damelo =&gt; da + me + lo
 *
 */
private CoreLabel processVerb(CoreLabel cl) {
 cl.remove(ParentAnnotation.class);
 SpanishVerbStripper.StrippedVerb stripped = verbStripper.separatePronouns(cl.word());
 if (stripped == null) {
  return cl;
 }
 // Split the CoreLabel into separate labels, tracking changing begin + end
 // positions.
 int stemEnd = cl.beginPosition() + stripped.getOriginalStem().length();
 int lengthRemoved = 0;
 for (String pronoun : stripped.getPronouns()) {
  int beginOffset = stemEnd + lengthRemoved;
  compoundBuffer.add(copyCoreLabel(cl, pronoun, beginOffset));
  lengthRemoved += pronoun.length();
 }
 CoreLabel stem = copyCoreLabel(cl, stripped.getStem(), cl.beginPosition(), stemEnd);
 stem.setOriginalText(stripped.getOriginalStem());
 return stem;
}

/**
 * Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
 * when the original OriginalTextAnnotation is different from TextAnnotation
 * (Does not take substring).
 */
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
 CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
 cl.setValue(tokenText);
 cl.setWord(tokenText);
 cl.setOriginalText(originalText);
 if(addIndices) {
  cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
  cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length);
 }
 return cl;
}

public List<CoreLabel> segmentStringToTokenList(String line) {
 List<CoreLabel> tokenList = CollectionUtils.makeList();
 List<CoreLabel> labeledSequence = segmentStringToIOB(line);
 for (IntPair span : IOBUtils.TokenSpansForIOB(labeledSequence)) {
  CoreLabel token = new CoreLabel();
  String text = IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker,
    span.getSource(), span.getTarget());
  token.setWord(text);
  token.setValue(text);
  token.set(CoreAnnotations.TextAnnotation.class, text);
  token.set(CoreAnnotations.ArabicSegAnnotation.class, "1");
  int start = labeledSequence.get(span.getSource()).beginPosition();
  int end = labeledSequence.get(span.getTarget() - 1).endPosition();
  token.setOriginalText(line.substring(start, end));
  token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, start);
  token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
  tokenList.add(token);
 }
 return tokenList;
}

String morphAnalysis = lemmaMorph.second();
if (lemma.equals(toks[0])) {
 cl.setOriginalText(toks[1].trim().intern());
} else {
 cl.setOriginalText(newMorphAnalysis.intern());

if (proto.hasBefore()) { word.setBefore(proto.getBefore()); }
if (proto.hasAfter()) { word.setAfter(proto.getAfter()); }
if (proto.hasOriginalText()) { word.setOriginalText(proto.getOriginalText()); }
if (proto.hasNer()) { word.setNER(proto.getNer()); }
if (proto.hasCoarseNER()) { word.set(CoreAnnotations.CoarseNamedEntityTagAnnotation.class, proto.getCoarseNER()); }

@Override
public void setOriginalText(String originalText) {
 label.setOriginalText(originalText);
}

@Override
public void setOriginalText(String originalText) {
 label.setOriginalText(originalText);
}

/** This is provided as a simple way to make a CoreLabel for a word from a String.
 *  It's often useful in fixup or test code. It sets all three of the Text, OriginalText,
 *  and Value annotations to the given value.
 *
 *  @param word The word string to make a CoreLabel for
 *  @return A CoreLabel for this word string
 */
public static CoreLabel wordFromString(String word) {
 CoreLabel cl = new CoreLabel();
 cl.setWord(word);
 cl.setOriginalText(word);
 cl.setValue(word);
 return cl;
}

/** This is provided as a simple way to make a CoreLabel for a word from a String.
 *  It's often useful in fixup or test code. It sets all three of the Text, OriginalText,
 *  and Value annotations to the given value.
 *
 *  @param word The word string to make a CoreLabel for
 *  @return A CoreLabel for this word string
 */
public static CoreLabel wordFromString(String word) {
 CoreLabel cl = new CoreLabel();
 cl.setWord(word);
 cl.setOriginalText(word);
 cl.setValue(word);
 return cl;
}

public static List<CoreLabel> stanfordTokenize(String str) {
  TokenizerFactory<? extends HasWord> tf = PTBTokenizer.coreLabelFactory();
  // ptb3Escaping=false -> '(' not converted as '-LRB-', Dont use it, it will cause Dependency resolution err.
  Tokenizer<? extends HasWord> originalWordTokenizer = tf.getTokenizer(new StringReader(str), "ptb3Escaping=false");
  Tokenizer<? extends HasWord> tokenizer = tf.getTokenizer(new StringReader(str));
  List<? extends HasWord> originalTokens = originalWordTokenizer.tokenize();
  List<? extends HasWord> tokens = tokenizer.tokenize();
  // Curse you Stanford!
  List<CoreLabel> coreLabels = new ArrayList<>(tokens.size());
  for (int i = 0; i < tokens.size(); i++) {
    CoreLabel coreLabel = new CoreLabel();
    coreLabel.setWord(tokens.get(i).word());
    coreLabel.setOriginalText(originalTokens.get(i).word());
    coreLabel.setValue(tokens.get(i).word());
    coreLabel.setBeginPosition(((CoreLabel) tokens.get(i)).beginPosition());
    coreLabel.setEndPosition(((CoreLabel) tokens.get(i)).endPosition());
    coreLabels.add(coreLabel);
  }
  return coreLabels;
}

private CoreLabel makeXmlToken(String tokenText, boolean doNormalization, int charOffsetBegin, int charOffsetEnd) {
 CoreLabel token = new CoreLabel();
 token.setOriginalText(tokenText);
 if (separatorPattern.matcher(tokenText).matches()) {
  // Map to CoreNLP newline token
  tokenText = AbstractTokenizer.NEWLINE_TOKEN;
 } else if (doNormalization && normalizeSpace) {
  tokenText = tokenText.replace(' ', '\u00A0'); // change space to non-breaking space
 }
 token.setWord(tokenText);
 token.setValue(tokenText);
 token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charOffsetBegin);
 token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charOffsetEnd);
 if (VERBOSE) {
  log.info("Adding token " + token.toShorterString());
 }
 return token;
}

/**
 * Constructs a CoreLabel as a String with a corresponding BEGIN and END position.
 * (Does not take substring).
 */
public CoreLabel makeToken(String str, int begin, int length) {
 CoreLabel cl;
 if (addIndices) {
  cl = new CoreLabel(8); // Save a reallocation, as there will be at least 5 keys
 } else {
  cl = new CoreLabel();
 }
 cl.setWord(str);
 cl.setOriginalText(str);
 if (addIndices) {
  cl.set(CharacterOffsetBeginAnnotation.class, begin);
  cl.set(CharacterOffsetEndAnnotation.class, begin+length);
 }
 return cl;
}

/**
 * Constructs a CoreLabel as a String with a corresponding BEGIN and END position, 
 * when the original OriginalTextAnnotation is different from TextAnnotation
 * (Does not take substring).
 */
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
 CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
 cl.setValue(tokenText);
 cl.setWord(tokenText);
 cl.setOriginalText(originalText);
 if(addIndices) {
  cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
  cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length);
 }
 return cl;
}

/**
 * Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
 * when the original OriginalTextAnnotation is different from TextAnnotation
 * (Does not take substring).
 */
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
 CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
 cl.setValue(tokenText);
 cl.setWord(tokenText);
 cl.setOriginalText(originalText);
 if(addIndices) {
  cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
  cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length);
 }
 return cl;
}

/**
 * Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
 * when the original OriginalTextAnnotation is different from TextAnnotation
 * (Does not take substring).
 */
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
 CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
 cl.setValue(tokenText);
 cl.setWord(tokenText);
 cl.setOriginalText(originalText);
 if(addIndices) {
  cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
  cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length);
 }
 return cl;
}

Popular methods of CoreLabel

get
set
word
beginPosition
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex

Popular in Java

Running tasks concurrently on multiple threads
findViewById (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
notifyDataSetChanged (ArrayAdapter)
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
Menu (java.awt)
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
JComboBox (javax.swing)
Top 12 Jupyter Notebook extensions

How to use setOriginalTextmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.setOriginalText (Showing top 20 results out of 315)

How to use
setOriginalText
method
in
edu.stanford.nlp.ling.CoreLabel