sentenceWithPos.append(token.getCoveredText()); sentenceWithPos.append('\\'); sentenceWithPos.append(token.getStringValue(this.posFeature)); sentenceWithPos.append(' ');
private String getTokenText(AnnotationFS token) { if (tokenTextFeature == null) { return token.getCoveredText(); } else { return token.getStringValue(tokenTextFeature); } } }
/** * @param token * tokenClass to look up * @return true if in includedTokenClasses or if both includedTokenClasses and * excludedTokenClasses are unset, of if excludedTokenClasses does not contain an entry * for tokenClass parameter */ public boolean checkTokenClass(AnnotationFS token) { boolean returnValue = true; if (tokenClassFeature != null) { String tokenClass = token.getStringValue(tokenClassFeature); if (tokenClass != null) { returnValue = isOK_TokenClass(tokenClass); } } // System.err.println ("checkTokenClass, token = " + token.getCoveredText() + ", returnValue: " // + returnValue); return returnValue; }
private Collection<ImmutablePair<String, Collection<AnnotationFS>>> extractNamedEntities( List<CAS> aCasList) { Type tokenType = org.apache.uima.fit.util.CasUtil .getType(aCasList.get(0), recommender.getLayer().getName()); Feature feature = tokenType.getFeatureByBaseName(recommender.getFeature().getName()); Collection<ImmutablePair<String, Collection<AnnotationFS>>> nameSamples = new HashSet<>(); for (CAS cas : aCasList) { Collection<AnnotationFS> namesPerDocument = new ArrayList<>(); Type sentenceType = getType(cas, Sentence.class); Map<AnnotationFS, Collection<AnnotationFS>> sentences = indexCovered(cas, sentenceType, tokenType); for (Map.Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) { Collection<AnnotationFS> tokens = e.getValue().stream() // If the identifier has not been set .filter(a -> a.getStringValue(feature) == null) .collect(Collectors.toSet()); namesPerDocument.addAll(tokens); } // TODO #176 use the document Id once it is available in the CAS nameSamples.add( new ImmutablePair<>(DocumentMetaData.get(cas).getDocumentUri(), namesPerDocument)); } return nameSamples; }
@Override public void visitEndTag(Tag tag) { String name = getName(tag); AnnotationFS found = null; for (int i = annotationStack.size() - 1; i >= 0; i--) { AnnotationFS each = annotationStack.get(i); Feature nameFeature = each.getType().getFeatureByBaseName("name"); String eachName = each.getStringValue(nameFeature); if (name.equals(eachName)) { int endOffset = getEndOffset(tag); Feature endFeature = each.getType().getFeatureByBaseName("end"); each.setIntValue(endFeature, endOffset); found = each; break; } } if (found != null) { annotationStack.remove(found); annotations.add(found); } }
private Collection<ImmutablePair<String, Collection<AnnotationFS>>> extractNamedEntities( List<CAS> aCasList) { Type tokenType = org.apache.uima.fit.util.CasUtil .getType(aCasList.get(0), recommender.getLayer().getName()); Feature feature = tokenType.getFeatureByBaseName(recommender.getFeature().getName()); Collection<ImmutablePair<String, Collection<AnnotationFS>>> nameSamples = new HashSet<>(); for (CAS cas : aCasList) { Collection<AnnotationFS> namesPerDocument = new ArrayList<>(); Type sentenceType = getType(cas, Sentence.class); Map<AnnotationFS, Collection<AnnotationFS>> sentences = indexCovered(cas, sentenceType, tokenType); for (Map.Entry<AnnotationFS, Collection<AnnotationFS>> e : sentences.entrySet()) { Collection<AnnotationFS> tokens = e.getValue().stream() // If the identifier has not been set .filter(a -> a.getStringValue(feature) == null) .collect(Collectors.toSet()); namesPerDocument.addAll(tokens); } // TODO #176 use the document Id once it is available in the CAS nameSamples.add( new ImmutablePair<>(DocumentMetaData.get(cas).getDocumentUri(), namesPerDocument)); } return nameSamples; }
private File getOutputFile(CAS cas) { if (StringUtils.isBlank(outputLocation)) { return null; } Type sdiType = cas.getTypeSystem().getType(RutaEngine.SOURCE_DOCUMENT_INFORMATION); String filename = "output.modified.html"; File file = new File(outputLocation, filename); if (sdiType != null) { FSIterator<AnnotationFS> sdiit = cas.getAnnotationIndex(sdiType).iterator(); if (sdiit.isValid()) { AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName(); if (!name.endsWith(".modified.html")) { name = name + ".modified.html"; } file = new File(outputLocation, name); } } return file; }
private File getOutputFile(CAS cas) { if (StringUtils.isBlank(outputLocation)) { return null; } Type sdiType = cas.getTypeSystem().getType(TextMarkerEngine.SOURCE_DOCUMENT_INFORMATION); String filename = "output.modified.html"; File file = new File(outputLocation, filename); if (sdiType != null) { FSIterator<AnnotationFS> sdiit = cas.getAnnotationIndex(sdiType).iterator(); if (sdiit.isValid()) { AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName(); if (!name.endsWith(".modified.html")) { name = name + ".modified.html"; } file = new File(outputLocation, name); } } return file; }
public void visitEndTag(Tag tag) { String name = getName(tag); AnnotationFS found = null; for (int i = annotationStack.size() - 1; i >= 0; i--) { AnnotationFS each = (AnnotationFS) annotationStack.get(i); // // Java 6: // Iterator<AnnotationFS> descendingIterator = annotationStack.descendingIterator(); // while (descendingIterator.hasNext()) { // AnnotationFS each = (AnnotationFS) descendingIterator.next(); Feature nameFeature = each.getType().getFeatureByBaseName("name"); String eachName = each.getStringValue(nameFeature); if (name.equals(eachName)) { int endOffset = getEndOffset(tag); Feature endFeature = each.getType().getFeatureByBaseName("end"); each.setIntValue(endFeature, endOffset); found = each; break; } } if (found != null) { annotationStack.remove(found); annotations.add(found); } }
AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName();
AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName();
public void processCas(CAS cas) throws ResourceProcessException { FSIndex categoryIndex = cas.getAnnotationIndex(mCategoryType); if (categoryIndex.size() > 0) { AnnotationFS categoryAnnotation = (AnnotationFS) categoryIndex.iterator().next(); // add to event collection DocumentSample sample = new DocumentSample( categoryAnnotation.getStringValue(mCategoryFeature), cas.getDocumentText()); documentSamples.add(sample); } }
AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName();
private Object getValue(AnnotationFS annotation) { switch(valueType) { case TYPE_BOOLEAN: return annotation.getBooleanValue(getFeature(annotation)); case TYPE_STRING: return annotation.getStringValue(getFeature(annotation)); case TYPE_INT: return annotation.getIntValue(getFeature(annotation)); case TYPE_FLOAT: return annotation.getFloatValue(getFeature(annotation)); default: throw new IllegalStateException("Unknown value type: " + this.valueType); } }
AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName();
AnnotationFS annotationFS = sdiit.get(); Feature uriFeature = sdiType.getFeatureByBaseName("uri"); String stringValue = annotationFS.getStringValue(uriFeature); File f = new File(stringValue); String name = f.getName();
public IobEncoder(CAS aCas, Type aType, Feature aValueFeature, boolean aIob1) { iob1 = aIob1; // fill map for whole JCas in order to efficiently encode IOB iobBeginMap = new Int2ObjectOpenHashMap<String>(); iobInsideMap = new Int2ObjectOpenHashMap<String>(); Map<AnnotationFS, Collection<AnnotationFS>> idx = CasUtil.indexCovered(aCas, aType, CasUtil.getType(aCas, Token.class)); String lastValue = null; for (AnnotationFS chunk : CasUtil.select(aCas, aType)) { String value = chunk.getStringValue(aValueFeature); for (AnnotationFS token : idx.get(chunk)) { if ( token.getBegin() == chunk.getBegin() && (!iob1 || (lastValue != null && lastValue.equals(value))) ) { iobBeginMap.put(token.getBegin(), value); } else { iobInsideMap.put(token.getBegin(), value); } } lastValue = value; } }
/** * search for a {@link DocumentMetadata} annotation in given CAS and return * its 'sourceUri' feature value * * @param cas * @return sourceUri value or null if there is no a DocumentMetadata * annotation */ public static String getDocumentUri(CAS cas) { TypeSystem ts = cas.getTypeSystem(); Type docMetaType = ts.getType(getMetadataTypeName()); if (docMetaType == null) { return null; } Feature sourceUriFeat; try { sourceUriFeat = featureExist(docMetaType, "sourceUri"); } catch (AnalysisEngineProcessException e) { throw new IllegalStateException(e); } FSIterator<AnnotationFS> dmIter = cas.getAnnotationIndex(docMetaType).iterator(); if (dmIter.hasNext()) { AnnotationFS docMeta = dmIter.next(); return docMeta.getStringValue(sourceUriFeat); } else { return null; } }
public IobEncoder(CAS aCas, Type aType, Feature aValueFeature, boolean aIob1) { iob1 = aIob1; // fill map for whole JCas in order to efficiently encode IOB iobBeginMap = new Int2ObjectOpenHashMap<String>(); iobInsideMap = new Int2ObjectOpenHashMap<String>(); Map<AnnotationFS, Collection<AnnotationFS>> idx = CasUtil.indexCovered(aCas, aType, CasUtil.getType(aCas, Token.class)); String lastValue = null; for (AnnotationFS chunk : CasUtil.select(aCas, aType)) { String value = chunk.getStringValue(aValueFeature); for (AnnotationFS token : idx.get(chunk)) { if ( token.getBegin() == chunk.getBegin() && (!iob1 || (lastValue != null && lastValue.equals(value))) ) { iobBeginMap.put(token.getBegin(), value); } else { iobInsideMap.put(token.getBegin(), value); } } lastValue = value; } }
public DictionaryToken(AnnotationFS annotation, Feature tokenTypeFeature, Feature tokenClassFeature) { super(); if (tokenTypeFeature == null) { this.setTokenTypeFeatureDefined(false); } else { this.setTokenTypeFeatureDefined(true); this.setType(annotation.getIntValue(tokenTypeFeature)); } if (tokenClassFeature == null) { this.setTokenClassFeatureDefined(false); } else { this.setTokenClassFeatureDefined(true); this.setTokenClass(annotation.getStringValue(tokenClassFeature)); } this.setText(annotation.getCoveredText()); }