org.apache.uima.fit.util.CasUtil.indexCovered java code examples

private List<POSSample> extractPosSamples(List<CAS> aCasses)
{
  List<POSSample> posSamples = new ArrayList<>();
  for (CAS cas : aCasses) {
    Type sentenceType = getType(cas, Sentence.class);
    Type tokenType = getType(cas, Token.class);
    Map<AnnotationFS, Collection<AnnotationFS>> sentences =
      indexCovered(cas, sentenceType, tokenType);
    for (Map.Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) {
      AnnotationFS sentence = e.getKey();
      Collection<AnnotationFS> tokens = e.getValue();
      
      createPosSample(cas, sentence, tokens).map(posSamples::add);
    }
  }
  
  LOG.debug("Extracted {} POS samples", posSamples.size());
  
  return posSamples;
}

private List<NameSample> extractNameSamples(List<CAS> aCasses)
{
  List<NameSample> nameSamples = new ArrayList<>();
  for (CAS cas : aCasses) {
    Type sentenceType = getType(cas, Sentence.class);
    Type tokenType = getType(cas, Token.class);
    Map<AnnotationFS, Collection<AnnotationFS>> sentences =
      indexCovered(cas, sentenceType, tokenType);
    for (Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) {
      AnnotationFS sentence = e.getKey();
      Collection<AnnotationFS> tokens = e.getValue();
      NameSample nameSample = createNameSample(cas, sentence, tokens);
      if (nameSample.getNames().length > 0) {
        nameSamples.add(nameSample);
      }
    }
  }
  return nameSamples;
}

private Collection<ImmutablePair<String, Collection<AnnotationFS>>> extractNamedEntities(
  List<CAS> aCasList)
{
  Type tokenType = org.apache.uima.fit.util.CasUtil
    .getType(aCasList.get(0), recommender.getLayer().getName());
  Feature feature = tokenType.getFeatureByBaseName(recommender.getFeature().getName());
  Collection<ImmutablePair<String, Collection<AnnotationFS>>> nameSamples = new HashSet<>();
  for (CAS cas : aCasList) {
    Collection<AnnotationFS> namesPerDocument = new ArrayList<>();
    Type sentenceType = getType(cas, Sentence.class);
    Map<AnnotationFS, Collection<AnnotationFS>> sentences = indexCovered(cas, sentenceType,
      tokenType);
    for (Map.Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) {
      Collection<AnnotationFS> tokens = e.getValue().stream()
        // If the identifier has not been set
        .filter(a -> a.getStringValue(feature) == null)
        .collect(Collectors.toSet());
      namesPerDocument.addAll(tokens);
    }
    // TODO #176 use the document Id once it is available in the CAS
    nameSamples.add(
      new ImmutablePair<>(DocumentMetaData.get(cas).getDocumentUri(), namesPerDocument));
  }
  return nameSamples;
}

private Collection<ImmutablePair<String, Collection<AnnotationFS>>> extractNamedEntities(
  List<CAS> aCasList)
{
  Type tokenType = org.apache.uima.fit.util.CasUtil
    .getType(aCasList.get(0), recommender.getLayer().getName());
  Feature feature = tokenType.getFeatureByBaseName(recommender.getFeature().getName());
  Collection<ImmutablePair<String, Collection<AnnotationFS>>> nameSamples = new HashSet<>();
  for (CAS cas : aCasList) {
    Collection<AnnotationFS> namesPerDocument = new ArrayList<>();
    Type sentenceType = getType(cas, Sentence.class);
    Map<AnnotationFS, Collection<AnnotationFS>> sentences = indexCovered(cas, sentenceType,
      tokenType);
    for (Map.Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) {
      Collection<AnnotationFS> tokens = e.getValue().stream()
        // If the identifier has not been set
        .filter(a -> a.getStringValue(feature) == null)
        .collect(Collectors.toSet());
      namesPerDocument.addAll(tokens);
    }
    // TODO #176 use the document Id once it is available in the CAS
    nameSamples.add(
      new ImmutablePair<>(DocumentMetaData.get(cas).getDocumentUri(), namesPerDocument));
  }
  return nameSamples;
}

Map<AnnotationFS, Collection<AnnotationFS>> idxSentences = indexCovered(
Map<AnnotationFS, Collection<AnnotationFS>> idxProperties = indexCovered(
Map<AnnotationFS, Collection<AnnotationFS>> idxNeurons = indexCovered(

private List<DocumentSample> extractSamples(List<CAS> aCasses)
{
  List<DocumentSample> samples = new ArrayList<>();
  for (CAS cas : aCasses) {
    Type sentenceType = getType(cas, Sentence.class);
    Type tokenType = getType(cas, Token.class);
    Map<AnnotationFS, Collection<AnnotationFS>> sentences =
      indexCovered(cas, sentenceType, tokenType);
    for (Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) {
      AnnotationFS sentence = e.getKey();
      Collection<AnnotationFS> tokens = e.getValue();
      String[] tokenTexts = tokens.stream()
        .map(AnnotationFS::getCoveredText)
        .toArray(String[]::new);
      
      Type annotationType = getType(cas, layerName);
      Feature feature = annotationType.getFeatureByBaseName(featureName);
      
      for (AnnotationFS annotation : selectCovered(annotationType, sentence)) {
        String label = annotation.getFeatureValueAsString(feature);
        DocumentSample nameSample = new DocumentSample(
            label != null ? label : NO_CATEGORY, tokenTexts);
        if (nameSample.getCategory() != null) {
          samples.add(nameSample);
        }
      }
    }
  }
  return samples;
}

Map<AnnotationFS, Collection<AnnotationFS>> idxSentences = indexCovered(
Map<AnnotationFS, Collection<AnnotationFS>> idxProperties = indexCovered(
Map<AnnotationFS, Collection<AnnotationFS>> idxNeurons = indexCovered(

   JCas jCas, Class<? extends T> type, Class<? extends S> coveredType) {
return cast(CasUtil
    .indexCovered(jCas.getCas(), getType(jCas, type), getType(jCas, coveredType)));

public IobEncoder(CAS aCas, Type aType, Feature aValueFeature, boolean aIob1)
{
  iob1 = aIob1;
  
  // fill map for whole JCas in order to efficiently encode IOB
  iobBeginMap = new Int2ObjectOpenHashMap<String>();
  iobInsideMap = new Int2ObjectOpenHashMap<String>();
  Map<AnnotationFS, Collection<AnnotationFS>> idx = CasUtil.indexCovered(aCas, aType,
      CasUtil.getType(aCas, Token.class));
  
  String lastValue = null;
  for (AnnotationFS chunk : CasUtil.select(aCas, aType)) {
    String value = chunk.getStringValue(aValueFeature);
    for (AnnotationFS token : idx.get(chunk)) {
      if (
          token.getBegin() == chunk.getBegin() && 
          (!iob1 || (lastValue != null && lastValue.equals(value)))
      ) {
        iobBeginMap.put(token.getBegin(), value);
      }
      else {
        iobInsideMap.put(token.getBegin(), value);
      }
    }
    
    lastValue = value;
  }
}

public IobEncoder(CAS aCas, Type aType, Feature aValueFeature, boolean aIob1)
{
  iob1 = aIob1;
  
  // fill map for whole JCas in order to efficiently encode IOB
  iobBeginMap = new Int2ObjectOpenHashMap<String>();
  iobInsideMap = new Int2ObjectOpenHashMap<String>();
  Map<AnnotationFS, Collection<AnnotationFS>> idx = CasUtil.indexCovered(aCas, aType,
      CasUtil.getType(aCas, Token.class));
  
  String lastValue = null;
  for (AnnotationFS chunk : CasUtil.select(aCas, aType)) {
    String value = chunk.getStringValue(aValueFeature);
    for (AnnotationFS token : idx.get(chunk)) {
      if (
          token.getBegin() == chunk.getBegin() && 
          (!iob1 || (lastValue != null && lastValue.equals(value)))
      ) {
        iobBeginMap.put(token.getBegin(), value);
      }
      else {
        iobInsideMap.put(token.getBegin(), value);
      }
    }
    
    lastValue = value;
  }
}

Javadoc

Create an index for quickly lookup up the annotations covered by a particular annotation. This is preferable to using #selectCovered(CAS,Type,int,int) because the overhead of scanning the CAS occurs only when the index is build. Subsequent lookups to the index are fast. The order of entries in the map is not defined. However, lists of covered annotations in the map are guaranteed to be in the same order as in the UIMA default annotation index.

The method only returns properly covered annotations, that is annotations where the begin/end offsets are equal to the 'covering' annotation or where both the begin/end are included in the span of the 'covering' annotation. Partially overlapping annotations are not returned.

When querying for the annotations covered by a given annotation, the given annotation itself is never returned, even if it is of the queried type. *

Popular methods of CasUtil

select
Convenience method to iterator over all annotations of a given type.
getType
Get the CAS type for the given name.
selectCovered
Get a list of annotations of the given annotation type constraint by a certain annotation. Iterates
getAnnotationType
Get the CAS type for the given name making sure it is or inherits from Annotation.
selectFS
Convenience method to iterator over all feature structures of a given type.
selectAt
Get all annotations of the given type at the specified offsets.
toText
Fetch the text covered by the specified annotations and return it as a list of strings.
indexCovering
Create an index for quickly lookup up the annotations covering a particular annotation. This is pref
iterator
Get an iterator over the given annotation type.
selectSingleRelative
Return an annotation preceding or following of a given reference annotation.
getView
Convenience method to get the specified view or create a new view if the requested view does not exi
selectAll
Convenience method to iterator over all annotations.

Popular in Java

Making http requests using okhttp
runOnUiThread (Activity)
setContentView (Activity)
getExternalFilesDir (Context)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
JOptionPane (javax.swing)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Github Copilot alternatives

How to use indexCoveredmethodin org.apache.uima.fit.util.CasUtil

Best Java code snippets using org.apache.uima.fit.util.CasUtil.indexCovered (Showing top 10 results out of 315)

How to use
indexCovered
method
in
org.apache.uima.fit.util.CasUtil