/** * If the sentences have to be completely annotated for training, it removes all sentences which * are only partially annotated. * * @param data * All sentences containing at least one annotation != null. * @return All sentences completely annotated, if necessary, or data otherwise. */ private List<List<AnnotationObject>> trimData(List<List<AnnotationObject>> data) { List<List<AnnotationObject>> result = new LinkedList<>(); for (List<AnnotationObject> sentence : data) { boolean isComplete = true; for (AnnotationObject ao : sentence) { if (ao.getLabel() == null) { isComplete = false; break; } } if (isComplete) { result.add(sentence); } } return result; }
public static Map<List<String>, String> getAnnotationMappingForSentence( List<AnnotationObject> sentence) { Map<List<String>, String> result = new HashMap<>(); for (List<AnnotationObject> entry : getNamedEntities(sentence)) { List<String> tokens = new LinkedList<>(); String annotationLabel = null; for (AnnotationObject ao : entry) { annotationLabel = ao.getLabel(); tokens.add(ao.getCoveredText()); } if (annotationLabel != null) { result.put(Collections.unmodifiableList(tokens), annotationLabel); } } return result; } }
public static List<List<AnnotationObject>> getNamedEntities(List<AnnotationObject> sentence) { List<List<AnnotationObject>> result = new LinkedList<>(); String currentAnnotation = ""; List<AnnotationObject> tokens = new LinkedList<>(); for (AnnotationObject ao : sentence) { String annotation = ao.getLabel(); if (currentAnnotation.isEmpty()) { if (annotation != null && !annotation.isEmpty()) { currentAnnotation = annotation; tokens.add(ao); } } else if (currentAnnotation.equals(annotation)) { tokens.add(ao); } else if (annotation != null && !annotation.isEmpty()) { result.add(Collections.unmodifiableList(tokens)); tokens = new LinkedList<>(); tokens.add(ao); currentAnnotation = annotation; } } // add the last annotation as well if not empty. if (!currentAnnotation.isEmpty()) { result.add(Collections.unmodifiableList(tokens)); } return result; }
private static void printAnnotationObject(StringBuilder sb, AnnotationObject ao, String delimiter) { if (ao == null) { return; } Offset offset = ao.getOffset(); sb.append(offset.getBeginCharacter()).append("..").append(offset.getEndCharacter()); sb.append(delimiter); sb.append(offset.getBeginToken()).append("..").append(offset.getEndToken()); sb.append(delimiter); sb.append(ao.getLabel()); sb.append(delimiter); sb.append(ao.getCoveredText()); }
private static void printAnnotationObjectInverted(StringBuilder sb, AnnotationObject ao, String delimiter) { if (ao == null) { return; } Offset offset = ao.getOffset(); sb.append(ao.getCoveredText()); sb.append(delimiter); sb.append(ao.getLabel()); sb.append(delimiter); sb.append(offset.getBeginToken()).append("..").append(offset.getEndToken()); sb.append(delimiter); sb.append(offset.getBeginCharacter()).append("..").append(offset.getEndCharacter()); } }