edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder.createTextAnnotationFromTokens java code examples

/**
 * The default way to create a {@link TextAnnotation} from pre-tokenized text.
 * 
 * @param tokenizedSentences A list of sentences, each one being an array of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromTokens(List<String[]> tokenizedSentences) {
  return createTextAnnotationFromTokens("", "", tokenizedSentences);
}

/**
 * The default way to create a {@link TextAnnotation} from pre-tokenized text.
 * 
 * @param tokenizedSentences A list of sentences, each one being an array of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromTokens(List<String[]> tokenizedSentences) {
  return createTextAnnotationFromTokens("", "", tokenizedSentences);
}

public static TextAnnotation createFromTokenizedString(String text) {
  return BasicTextAnnotationBuilder.createTextAnnotationFromTokens(Collections
      .singletonList(text.split(" ")));
}

public static TextAnnotation createFromTokenizedString(String text) {
  return BasicTextAnnotationBuilder.createTextAnnotationFromTokens(Collections
      .singletonList(text.split(" ")));
}

public TextAnnotation preProcess(List<String[]> text) throws AnnotatorException {
  TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(text);
  addViewsFromAnnotatorService(ta);
  return ta;
}

/**
 * A way to create a {@link TextAnnotation} from pre-tokenized text from Python
 *
 * @param tokenizedSentences A list of sentences, each one being an list of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<Object>> tokenizedSentences) {
  // This function takes List<List<Object>> to be able to run with cogcomp-nlpy (using pyjnius)
  // Convert the inner lists to String arrays
  // Call the default TextAnnotation builder function
  List<String[]> tokenizedSentences_formatted = new ArrayList<String[]>();
  // Converting inner list to array
  for (List<Object> sentence : tokenizedSentences) {
    String[] sentence_array = new String[sentence.size()];
    int token_idx = 0;
    for (Object w : sentence) {
      sentence_array[token_idx] = (String) w;
      token_idx += 1;
    }
    tokenizedSentences_formatted.add(sentence_array);
  }
  return createTextAnnotationFromTokens("", "", tokenizedSentences_formatted);
}

public TextAnnotation preProcess(List<String[]> text) throws AnnotatorException {
  TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(text);
  addViewsFromAnnotatorService(ta);
  return ta;
}

/**
 * A way to create a {@link TextAnnotation} from pre-tokenized text from Python
 *
 * @param tokenizedSentences A list of sentences, each one being an list of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<Object>> tokenizedSentences) {
  // This function takes List<List<Object>> to be able to run with cogcomp-nlpy (using pyjnius)
  // Convert the inner lists to String arrays
  // Call the default TextAnnotation builder function
  List<String[]> tokenizedSentences_formatted = new ArrayList<String[]>();
  // Converting inner list to array
  for (List<Object> sentence : tokenizedSentences) {
    String[] sentence_array = new String[sentence.size()];
    int token_idx = 0;
    for (Object w : sentence) {
      sentence_array[token_idx] = (String) w;
      token_idx += 1;
    }
    tokenizedSentences_formatted.add(sentence_array);
  }
  return createTextAnnotationFromTokens("", "", tokenizedSentences_formatted);
}

TextAnnotation annotate(String corpusId, String sentId, String[] tokens)
    throws AnnotatorException {
  // Ignore the root token
  List<String[]> words =
      Collections.singletonList(Arrays.copyOfRange(tokens, 1, tokens.length));
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, sentId, words);
  ta.addView(pos);
  ta.addView(lemma);
  ta.addView(chunk);
  return ta;
}

TextAnnotation annotate(String corpusId, String sentId, String[] tokens)
    throws AnnotatorException {
  // Ignore the root token
  List<String[]> words =
      Collections.singletonList(Arrays.copyOfRange(tokens, 1, tokens.length));
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, sentId, words);
  ta.addView(pos);
  ta.addView(lemma);
  ta.addView(chunk);
  return ta;
}

public static void main(String[] args) throws EdisonException, AnnotatorException {
  gazetteersInstance.ignoreGazetteer("Weapons.gz");
  gazetteersInstance.ignoreGazetteer("Weapons.Missile.gz");
  List<String[]> sentences =
      Arrays.asList("I live in Chicago , Illinois .".split("\\s+"),
          "I met George Bush .".split("\\s+"));
  TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentences);
  gazetteersInstance.addView(ta);
  System.out.println(ta.toString());
  System.out.println(ta.getView(gazetteersInstance.getViewName()).toString());
}

/**
 * Create a new {@link TextAnnotation} from a single line of bracketed text
 *
 * @param line The bracketed string to be processed
 * @param lineId The ID of the {@link TextAnnotation}
 * @return A {@link TextAnnotation} with a populated {@link ViewNames#POS} view
 */
public TextAnnotation createTextAnnotation(String line, String lineId) {
  String[] wordPOSPairs = splitWordsPattern.split(line.substring(1, line.length() - 1));
  List<String> words = new ArrayList<>(wordPOSPairs.length);
  List<String> pos = new ArrayList<>(wordPOSPairs.length);
  for (String wordPOSPair : wordPOSPairs) {
    String[] split = whitespacePattern.split(wordPOSPair);
    words.add(split[1]);
    pos.add(split[0]);
  }
  List<String[]> tokenizedSentences =
      Collections.singletonList(words.toArray(new String[words.size()]));
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusName, lineId,
          tokenizedSentences);
  TokenLabelView posView = new TokenLabelView(ViewNames.POS, ta);
  for (int i = 0; i < pos.size(); i++)
    posView.addTokenLabel(i, pos.get(i), 1.0);
  ta.addView(ViewNames.POS, posView);
  return ta;
}

/**
 * Create a new {@link TextAnnotation} from a single line of bracketed text
 *
 * @param line The bracketed string to be processed
 * @param lineId The ID of the {@link TextAnnotation}
 * @return A {@link TextAnnotation} with a populated {@link ViewNames#POS} view
 */
public TextAnnotation createTextAnnotation(String line, String lineId) {
  String[] wordPOSPairs = splitWordsPattern.split(line.substring(1, line.length() - 1));
  List<String> words = new ArrayList<>(wordPOSPairs.length);
  List<String> pos = new ArrayList<>(wordPOSPairs.length);
  for (String wordPOSPair : wordPOSPairs) {
    String[] split = whitespacePattern.split(wordPOSPair);
    words.add(split[1]);
    pos.add(split[0]);
  }
  List<String[]> tokenizedSentences =
      Collections.singletonList(words.toArray(new String[words.size()]));
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusName, lineId,
          tokenizedSentences);
  TokenLabelView posView = new TokenLabelView(ViewNames.POS, ta);
  for (int i = 0; i < pos.size(); i++)
    posView.addTokenLabel(i, pos.get(i), 1.0);
  ta.addView(ViewNames.POS, posView);
  return ta;
}

public static void main(String[] args) throws EdisonException, AnnotatorException {
  gazetteersInstance.ignoreGazetteer("Weapons.gz");
  gazetteersInstance.ignoreGazetteer("Weapons.Missile.gz");
  List<String[]> sentences =
      Arrays.asList("I live in Chicago , Illinois .".split("\\s+"),
          "I met George Bush .".split("\\s+"));
  TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(sentences);
  gazetteersInstance.addView(ta);
  System.out.println(ta.toString());
  System.out.println(ta.getView(gazetteersInstance.getViewName()).toString());
}

    Collections.singletonList(tokens.toArray(new String[tokens.size()]));
TextAnnotation ta =
    BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId,
        String.valueOf(taId), tokenizedSentence);
addGoldView(ta, labels);

    Collections.singletonList(tokens.toArray(new String[tokens.size()]));
TextAnnotation ta =
    BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId,
        String.valueOf(taId), tokenizedSentence);
addGoldView(ta, labels);

BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id,
    Collections.singletonList(text));

BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id,
    Collections.singletonList(text));

protected TextAnnotation initializeDummySentenceVerb() {
  List<String[]> listOfTokens = new ArrayList<>();
  listOfTokens.add(new String[] {"I", "do", "."});
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens);
  TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0);
  tlv.addTokenLabel(0, "PRP", 1d);
  tlv.addTokenLabel(1, "VBP", 1d);
  tlv.addTokenLabel(2, ".", 1d);
  ta.addView(ViewNames.POS, tlv);
  ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d));
  SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d);
  chunks.addSpanLabel(0, 1, "NP", 1d);
  chunks.addSpanLabel(1, 2, "VP", 1d);
  ta.addView(ViewNames.SHALLOW_PARSE, chunks);
  TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d);
  view.addTokenLabel(0, "i", 1d);
  view.addTokenLabel(1, "do", 1d);
  view.addTokenLabel(2, ".", 1d);
  ta.addView(ViewNames.LEMMA, view);
  return ta;
}

protected TextAnnotation initializeDummySentenceVerb() {
  List<String[]> listOfTokens = new ArrayList<>();
  listOfTokens.add(new String[] {"I", "do", "."});
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens);
  TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0);
  tlv.addTokenLabel(0, "PRP", 1d);
  tlv.addTokenLabel(1, "VBP", 1d);
  tlv.addTokenLabel(2, ".", 1d);
  ta.addView(ViewNames.POS, tlv);
  ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d));
  SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d);
  chunks.addSpanLabel(0, 1, "NP", 1d);
  chunks.addSpanLabel(1, 2, "VP", 1d);
  ta.addView(ViewNames.SHALLOW_PARSE, chunks);
  TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d);
  view.addTokenLabel(0, "i", 1d);
  view.addTokenLabel(1, "do", 1d);
  view.addTokenLabel(2, ".", 1d);
  ta.addView(ViewNames.LEMMA, view);
  return ta;
}

Javadoc

The default way to create a TextAnnotation from pre-tokenized text.

Popular methods of BasicTextAnnotationBuilder

Popular in Java

Creating JSON documents from java classes using gson
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
findViewById (Activity)
onRequestPermissionsResult (Fragment)
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top Vim plugins

How to use createTextAnnotationFromTokensmethodin edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder

Best Java code snippets using edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder.createTextAnnotationFromTokens (Showing top 20 results out of 315)

How to use
createTextAnnotationFromTokens
method
in
edu.illinois.cs.cogcomp.annotation.BasicTextAnnotationBuilder