org.apache.uima.fit.util.JCasUtil.toText java code examples

  @Override
  public List<Feature> extract(JCas jcas)
  // TODO: not adapted for focus annotations
  {

    List<Feature> featList = new ArrayList<Feature>();
    List<String> tokens = JCasUtil.toText(JCasUtil.select(jcas, Token.class));
    int nrOfTokens = tokens.size();

    Pattern p = Pattern.compile("^[a-zA-Z0-9]*[0-9]+[a-zA-Z0-9]*$");

    int pmatches = 0;

    for (String t : tokens) {
      Matcher m = p.matcher(t);
      if (m.find()) {
        pmatches++;
        System.out.println(t + " matches Words With Numbers");
      }
    }
    featList.add(new Feature(FEATURE_NAME, (double) pmatches / nrOfTokens));

    return featList;
  }
}

  @Override
  public List<Feature> extract(JCas jcas)
  {

    List<Feature> featList = new ArrayList<Feature>();
    List<String> tokens = JCasUtil.toText(JCasUtil.select(jcas, Token.class));
    int nrOfTokens = tokens.size();

    Pattern p = Pattern.compile("^[a-zA-Z0-9]*[0-9]+[a-zA-Z0-9]*$");

    int pmatches = 0;

    for (String t : tokens) {
      Matcher m = p.matcher(t);
      if (m.find()) {
        pmatches++;
        System.out.println(t + " matches Words With Numbers");
      }
    }
    featList.add(new Feature(FEATURE_NAME, (double) pmatches / nrOfTokens));

    return featList;
  }
}

@Override
public List<Feature> extract(JCas jcas)
  throws TextClassificationException
{
  if (topicFilePath == null || topicFilePath.isEmpty()) {
    System.out.println("Path to word list must be set!");
  }
  List<String> topics = null;
  List<Feature> featList = new ArrayList<Feature>();
  List<String> tokens = JCasUtil.toText(JCasUtil.select(jcas, Token.class));
  try {
    topics = FileUtils.readLines(new File(topicFilePath));
    for (String t : topics) {
      featList.addAll(countWordHits(t, tokens));
    }
  }
  catch (IOException e) {
    e.printStackTrace();
  }
  return featList;
}

@Override
public List<Feature> extract(JCas jcas)
  // TODO: not adapted for focus annotations
  throws TextClassificationException
{
  if (topicFilePath == null || topicFilePath.isEmpty()) {
    System.out.println("Path to word list must be set!");
  }
  List<String> topics = null;
  List<Feature> featList = new ArrayList<Feature>();
  List<String> tokens = JCasUtil.toText(JCasUtil.select(jcas, Token.class));
  try {
    topics = FileUtils.readLines(new File(topicFilePath));
    for (String t : topics) {
      featList.addAll(countWordHits(t, tokens));
    }
  }
  catch (IOException e) {
    e.printStackTrace();
  }
  return featList;
}

public static void assertToken(String[] aExpected, Collection<Token> aActual)
{
  if (aExpected == null) {
    return;
  }
  List<String> expected = asList(aExpected);
  List<String> actual = toText(aActual);
  System.out.printf("%-20s - Expected: %s%n", "Tokens", asCopyableString(expected));
  System.out.printf("%-20s - Actual  : %s%n", "Tokens", asCopyableString(actual));
  assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}

public static void assertCoreference(String[][] aExpected, Collection<CoreferenceChain> aActual)
{
  List<CoreferenceChain> actual = new ArrayList<CoreferenceChain>(aActual);
  for (String[] i : aExpected) {
    System.out.printf("%-20s - Expected: %s%n", "Coreference",
        asCopyableString(asList(i)));
  }
  for (CoreferenceChain i : actual) {
    System.out.printf("%-20s - Actual  : %s%n", "Coreference",
        asCopyableString(toText(i.links())));
  }
  if (aExpected.length == aActual.size()) {
    for (int i = 0; i < actual.size(); i++) {
      assertEquals(asCopyableString(asList(aExpected[i]), true),
          asCopyableString(toText(actual.get(i).links()), true));
    }
  }
  else {
    fail("Expected [" + aExpected.length + "] chains but found " + aActual.size() + "]");
  }
}

public static void assertCoreference(String[][] aExpected, Collection<CoreferenceChain> aActual)
{
  List<CoreferenceChain> actual = new ArrayList<CoreferenceChain>(aActual);
  for (String[] i : aExpected) {
    System.out.printf("%-20s - Expected: %s%n", "Coreference",
        asCopyableString(asList(i)));
  }
  for (CoreferenceChain i : actual) {
    System.out.printf("%-20s - Actual  : %s%n", "Coreference",
        asCopyableString(toText(i.links())));
  }
  if (aExpected.length == aActual.size()) {
    for (int i = 0; i < actual.size(); i++) {
      assertEquals(asCopyableString(asList(aExpected[i]), true),
          asCopyableString(toText(actual.get(i).links()), true));
    }
  }
  else {
    fail("Expected [" + aExpected.length + "] chains but found " + aActual.size() + "]");
  }
}

public static void assertToken(String[] aExpected, Collection<Token> aActual)
{
  if (aExpected == null) {
    return;
  }
  List<String> expected = asList(aExpected);
  List<String> actual = toText(aActual);
  System.out.printf("%-20s - Expected: %s%n", "Tokens", asCopyableString(expected));
  System.out.printf("%-20s - Actual  : %s%n", "Tokens", asCopyableString(actual));
  assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}

public static void assertSentence(String[] aExpected, Collection<Sentence> aActual)
{
  if (aExpected == null) {
    return;
  }
  List<String> expected = asList(aExpected);
  List<String> actual = toText(aActual);
  System.out.printf("%-20s - Expected: %s%n", "Sentences", asCopyableString(expected));
  System.out.printf("%-20s - Actual  : %s%n", "Sentences", asCopyableString(actual));
  assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}

public static void assertSentence(String[] aExpected, Collection<Sentence> aActual)
{
  if (aExpected == null) {
    return;
  }
  List<String> expected = asList(aExpected);
  List<String> actual = toText(aActual);
  System.out.printf("%-20s - Expected: %s%n", "Sentences", asCopyableString(expected));
  System.out.printf("%-20s - Actual  : %s%n", "Sentences", asCopyableString(actual));
  assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}

@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget)
  throws TextClassificationException
{
  if (topicFilePath == null || topicFilePath.isEmpty()) {
    throw new TextClassificationException("Path to word list must be set!");
  }
  List<String> topics = null;
  Set<Feature> features = new HashSet<Feature>();
  List<String> tokens = JCasUtil.toText(JCasUtil.selectCovered(jcas, Token.class, aTarget));
  try {
    topics = FileUtils.readLines(new File(topicFilePath), "utf-8");
    for (String t : topics) {
      features.addAll(countWordHits(t, tokens));
    }
  }
  catch (IOException e) {
    throw new TextClassificationException(e);
  }
  return features;
}

@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget)
  throws TextClassificationException
{
  if (topicFilePath == null || topicFilePath.isEmpty()) {
    throw new TextClassificationException("Path to word list must be set!");
  }
  List<String> topics = null;
  Set<Feature> features = new HashSet<Feature>();
  List<String> tokens = JCasUtil.toText(JCasUtil.selectCovered(jcas, Token.class, aTarget));
  try {
    topics = FileUtils.readLines(new File(topicFilePath), "utf-8");
    for (String t : topics) {
      features.addAll(countWordHits(t, tokens));
    }
  }
  catch (IOException e) {
    throw new TextClassificationException(e);
  }
  return features;
}

  public static FrequencyDistribution<String> getDocumentSkipNgrams(
      JCas jcas,
      boolean lowerCaseNGrams,
      boolean filterPartialMatches,
      int minN,
      int maxN,
      int skipN,
      Set<String> stopwords)
  {
    FrequencyDistribution<String> documentNgrams = new FrequencyDistribution<String>();
    for (Sentence s : select(jcas, Sentence.class)) {
      for (List<String> ngram : new SkipNgramStringListIterable(
          toText(selectCovered(Token.class, s)), minN, maxN, skipN))
      {
        if (passesNgramFilter(ngram, stopwords, filterPartialMatches)) {
          String ngramString = StringUtils.join(ngram, NGRAM_GLUE);
          if (lowerCaseNGrams) {
            ngramString = ngramString.toLowerCase();
          }
          documentNgrams.inc(ngramString);
        }
      }
    }
    return documentNgrams;
  }
}

public static FrequencyDistribution<String> getDocumentNgrams(
    JCas jcas,
    boolean lowerCaseNGrams,
    boolean filterPartialMatches,
    int minN,
    int maxN,
    Set<String> stopwords)
{
  FrequencyDistribution<String> documentNgrams = new FrequencyDistribution<String>();
  for (Sentence s : select(jcas, Sentence.class)) {
    // TODO parameterize type
    for (List<String> ngram : new NGramStringListIterable(toText(selectCovered(Token.class,
        s)), minN, maxN)) {
      if (passesNgramFilter(ngram, stopwords, filterPartialMatches)) {
        String ngramString = StringUtils.join(ngram, NGRAM_GLUE);
        if (lowerCaseNGrams) {
          ngramString = ngramString.toLowerCase();
        }
        documentNgrams.inc(ngramString);
      }
    }
  }
  return documentNgrams;
}

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
 for (Annotation window : JCasUtil.select(jCas, this.windowClass)) {
  List<TOKEN_TYPE> tokens = this.tokenOps.selectTokens(jCas, window);
  if (tokens.size() <= 0) {
   return;
  }
  List<String> tokenStrings = JCasUtil.toText(tokens);
  // As of version 1.3.0, ClearNLP does all processing through its own dependency tree
  // structure
  DEPTree clearNlpDepTree = new DEPTree(tokenStrings);
  this.tagger.process(clearNlpDepTree);
  // Note the ClearNLP counts index 0 as the sentence dependency node, so the POS tag indices
  // are shifted by one from the token indices
  for (int i = 0; i < tokens.size(); i++) {
   TOKEN_TYPE token = tokens.get(i);
   DEPNode node = clearNlpDepTree.get(i+1);
   this.tokenOps.setPos(jCas, token, node.getPOSTag());
  }
 }
}

public static FrequencyDistribution<String> getDocumentSkipNgrams(JCas jcas,
    boolean lowerCaseNGrams, boolean filterPartialMatches, int minN, int maxN, int skipN,
    Set<String> stopwords)
{
  FrequencyDistribution<String> documentNgrams = new FrequencyDistribution<String>();
  for (Sentence s : select(jcas, Sentence.class)) {
    for (List<String> ngram : new SkipNgramStringListIterable(toText(selectCovered(
        Token.class, s)), minN, maxN, skipN)) {
      if (lowerCaseNGrams) {
        ngram = lower(ngram);
      }
      if (passesNgramFilter(ngram, stopwords, filterPartialMatches)) {
        String ngramString = StringUtils.join(ngram, NGRAM_GLUE);
        documentNgrams.inc(ngramString);
      }
    }
  }
  return documentNgrams;
}

forms.addAll(JCasUtil.toText(tokens));

public static FrequencyDistribution<String> getDocumentSkipNgrams(JCas jcas, Annotation anno,
    boolean lowerCaseNGrams, boolean filterPartialMatches, int minN, int maxN, int skipN,
    Set<String> stopwords)
{
  FrequencyDistribution<String> documentNgrams = new FrequencyDistribution<String>();
  for (Sentence s : selectCovered(jcas, Sentence.class, anno)) {
    for (List<String> ngram : new SkipNgramStringListIterable(
        toText(selectCovered(Token.class, s)), minN, maxN, skipN)) {
      if (lowerCaseNGrams) {
        ngram = lower(ngram);
      }
      if (passesNgramFilter(ngram, stopwords, filterPartialMatches)) {
        String ngramString = StringUtils.join(ngram, NGRAM_GLUE);
        documentNgrams.inc(ngramString);
      }
    }
  }
  return documentNgrams;
}

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
 for (Annotation window : JCasUtil.select(jCas, this.windowClass)) {
  List<TOKEN_TYPE> tokens = this.tokenOps.selectTokens(jCas, window);
  List<String> tokenStrings = JCasUtil.toText(tokens);
  // All processing in ClearNLP goes through the DEPTree structures,
  // so populate it with token and POS tag info
  DEPTree depTree = new DEPTree(tokenStrings);
  for (int i = 1; i < depTree.size(); i++) {
   TOKEN_TYPE token = tokens.get(i - 1);
   DEPNode node = depTree.get(i);
   node.setPOSTag(this.tokenOps.getPos(jCas, token));
  }
  // Run the morphological analyzer
  this.mpAnalyzer.process(depTree);
  // Pull out lemmas and stuff them back into the CAS tokens
  for (int i = 1; i < depTree.size(); i++) {
   TOKEN_TYPE token = tokens.get(i - 1);
   DEPNode node = depTree.get(i);
   this.tokenOps.setLemma(jCas, token, node.getLemma());
  }
 }
}

List<String> tokenStrings = JCasUtil.toText(tokens);
for (SpellingAnomaly anomaly : JCasUtil.selectCovered(jcas, SpellingAnomaly.class,
    sentence)) {

Javadoc

Fetch the text covered by the specified annotations and return it as a list of strings.

Popular methods of JCasUtil

select
Convenience method select all feature structure from the given type from a list.
selectCovered
Get a list of annotations of the given annotation type constrained by a 'covering' annotation. Itera
selectSingle
Get the single instance of the specified type from the JCas.
indexCovered
Create an index for quickly lookup up the annotations covered by a particular annotation. This is pr
getType
Get the CAS type for the given JCas wrapper class type.
exists
Test if a JCas contains an annotation of the given type.
selectFollowing
Returns the n annotations following the given annotation
selectPreceding
Returns the n annotations preceding the given annotation
indexCovering
Create an index for quickly lookup up the annotations covering a particular annotation. This is pref
selectCovering
Get a list of annotations of the given annotation type constraint by a certain annotation. Iterates
iterator
Get an iterator over the given feature structure type.
selectBetween
Get a list of annotations of the given annotation type located between two annotations. Does not use

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
getExternalFilesDir (Context)
startActivity (Activity)
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Notification (javax.management)
JTable (javax.swing)
Top 12 Jupyter Notebook extensions

How to use toTextmethodin org.apache.uima.fit.util.JCasUtil

Best Java code snippets using org.apache.uima.fit.util.JCasUtil.toText (Showing top 20 results out of 315)

How to use
toText
method
in
org.apache.uima.fit.util.JCasUtil