@Override public CodingAnnotationStudy clone() { CodingAnnotationStudy result = new CodingAnnotationStudy(getRaterCount()); for (ICodingAnnotationItem item : getItems()) { CodingAnnotationItem newItem = new CodingAnnotationItem(raters.size()); for (IAnnotationUnit unit : item.getUnits()) newItem.addUnit(result.createUnit(result.items.size(), unit.getRaterIdx(), unit.getCategory())); result.items.add(newItem); } for (Object category : getCategories()) result.addCategory(category); return result; }
/** Creates a new {@link CodingAnnotationItem} which has been coded with * the given annotation categories. Note that the order of the categories * must correspond to the raters' indexes. Use null to represent missing * annotations, Invoking <code>addItem("A", "B", null, "A")</code> * indicates an annotation item which has been coded as category "A" * by rater 0 and 3 and as category "B" by rater 1. Rater 2 did not * assign any category to the item. The method is a shorthand for * {@link #addItemAsArray(Object[])}. */ public ICodingAnnotationItem addItem(final Object... annotations) { return addItemAsArray(annotations); }
/** Initializes and empty annotation study for a coding task with the given * number of raters. The basic setup of a coding study is assigning * categories to units with fixed boundaries. */ public CodingAnnotationStudy(int raterCount) { this(); for (int raterIdx = 0; raterIdx < raterCount; raterIdx++) addRater(Integer.toString(raterIdx)); }
/** Returns a clone of the current annotation study in which all categories * are replaced by the given nullCategory except the categories matching * the specified keepCategory. */ public CodingAnnotationStudy stripCategories(final Object keepCategory, final Object nullCategory) { CodingAnnotationStudy result = new CodingAnnotationStudy(getRaterCount()); for (ICodingAnnotationItem item : getItems()) { CodingAnnotationItem newItem = new CodingAnnotationItem(raters.size()); for (IAnnotationUnit unit : item.getUnits()) { Object newCategory; if (!keepCategory.equals(unit.getCategory())) newCategory = nullCategory; else newCategory = keepCategory; newItem.addUnit(result.createUnit(result.items.size(), unit.getRaterIdx(), newCategory)); } result.items.add(newItem); } return result; }
/** Returns a clone of the current annotation study which contains * only the annotation units of the raters with the given indexes. * All other units will be removed. This method is useful for * converting an annotation study with multiple raters into a * (pairwise) annotation study with two raters. */ public CodingAnnotationStudy extractRaters(final int... raters) { CodingAnnotationStudy result = new CodingAnnotationStudy(raters.length); for (ICodingAnnotationItem item : getItems()) { CodingAnnotationItem newItem = new CodingAnnotationItem(raters.length); for (int r = 0; r < raters.length; r++) { IAnnotationUnit unit = item.getUnit(raters[r]); newItem.addUnit(result.createUnit(result.items.size(), r, unit.getCategory())); } result.items.add(newItem); } return result; }
List<ConfigurationSet> pluralitySets = new ArrayList<>(); List<ConfigurationSet> irrelevantSets = new ArrayList<>(); CodingAnnotationStudy study = new CodingAnnotationStudy(users.size()); study.addItemAsArray(values);
protected double doCalculateItemAgreement(final ICodingAnnotationItem item) { Map<Object, Integer> annotationsPerCategory = CodingAnnotationStudy.countTotalAnnotationsPerCategory(item); double result = 0.0; for (Integer count : annotationsPerCategory.values()) result += count * (count - 1); int raterCount = item.getRaterCount(); if (raterCount <= 1) return 0.0; else return result / (double) (raterCount - 1.0); }
/** Print the coincidence matrix for the given annotation item. */ public void print(final PrintStream out, final ICodingAnnotationStudy study, final ICodingAnnotationItem item) { Map<Object, Map<Object, Double>> coincidence = CodingAnnotationStudy.countCategoryCoincidence(item); doPrint(out, study, coincidence); }
protected double calculateMaximumObservedAgreement() { Map<Object, int[]> annotationsPerCategory = CodingAnnotationStudy.countAnnotationsPerCategory(study); BigDecimal result = new BigDecimal(0); for (Object category : study.getCategories()) { int[] annotations = annotationsPerCategory.get(category); int min = -1; for (int rater = 0; rater < study.getRaterCount(); rater++) if (annotations[rater] < min || min < 0) min = annotations[rater]; if (min > 0) result = result.add(new BigDecimal(min)); } result = result.divide(new BigDecimal(study.getItemCount()), MathContext.DECIMAL128); return result.doubleValue(); }
/** Creates a new {@link CodingAnnotationItem} which has been coded with * the given annotation categories. Note that the order of the categories * must correspond to the raters' indexes. Use null to represent missing * annotations, Invoking <code>addItem(new Object[]{"A", "B", null, * "A"})</code> indicates an annotation item which has been coded as * category "A" by rater 0 and 3 and as category "B" by rater 1. Rater 2 * did not assign any category to the item. */ public ICodingAnnotationItem addItemAsArray(final Object[] annotations) { int itemIdx = items.size(); CodingAnnotationItem item = new CodingAnnotationItem(raters.size()); for (int raterIdx = 0; raterIdx < annotations.length; raterIdx++) item.addUnit(createUnit(itemIdx, raterIdx, annotations[raterIdx])); items.add(item); return item; }
Map<Object, Integer> nk = CodingAnnotationStudy.countTotalAnnotationsPerCategory((ICodingAnnotationStudy) study); Integer v;
/** Print the coincidence matrix for the given coding study. */ public void print(final PrintStream out, final ICodingAnnotationStudy study) { Map<Object, Map<Object, Double>> coincidence = CodingAnnotationStudy.countCategoryCoincidence(study); doPrint(out, study, coincidence); }
/** Calculates the expected inter-rater agreement that assumes a * different probability distribution for all raters. * @throws NullPointerException if the annotation study is null. * @throws ArithmeticException if there are no items in the * annotation study. */ @Override public double calculateExpectedAgreement() { Map<Object, int[]> annotationsPerCategory = CodingAnnotationStudy.countAnnotationsPerCategory(study); BigDecimal result = new BigDecimal(0); for (Object category : study.getCategories()) { int[] annotations = annotationsPerCategory.get(category); BigDecimal prod = new BigDecimal(1); for (int rater = 0; rater < study.getRaterCount(); rater++) prod = prod.multiply(new BigDecimal(annotations[rater])); result = result.add(prod); } result = result.divide(new BigDecimal(study.getItemCount()).pow(2), MathContext.DECIMAL128); return result.doubleValue(); }
/** Shorthand for invoking {@link #addItem(Object...)} with the same * parameters multiple times. This method is useful for modeling * annotation data based on a contingency table. */ public void addMultipleItems(int times, final Object... values) { for (int i = 0; i < times; i++) addItemAsArray(values); }
/** Calculates the expected inter-rater agreement that assumes the same * distribution for all raters and annotations. * @throws NullPointerException if the annotation study is null. * @throws ArithmeticException if there are no items in the * annotation study. */ @Override public double calculateExpectedAgreement() { Map<Object, Integer> annotationsPerCategory = CodingAnnotationStudy.countTotalAnnotationsPerCategory(study); BigDecimal result = new BigDecimal(0); for (Object category : study.getCategories()) result = result.add( new BigDecimal(annotationsPerCategory.get(category)).pow(2)); result = result.divide(new BigDecimal(4).multiply( new BigDecimal(study.getItemCount()).pow(2)), MathContext.DECIMAL128); return result.doubleValue(); }
@Override public double calculateObservedDisagreement() { ensureDistanceFunction(); if (coincidenceMatrix == null) coincidenceMatrix = CodingAnnotationStudy.countCategoryCoincidence(study); double n = 0.0; double result = 0.0; for (Entry<Object, Map<Object, Double>> cat1 : coincidenceMatrix.entrySet()) for (Entry<Object, Double> cat2 : cat1.getValue().entrySet()) { result += cat2.getValue() * distanceFunction.measureDistance(study, cat1.getKey(), cat2.getKey()); n += cat2.getValue(); } result /= n; return result; }
/** Calculates the expected inter-rater agreement that assumes the same * distribution for all raters and annotations. * @throws NullPointerException if the annotation study is null. * @throws ArithmeticException if there are no items in the * annotation study. */ @Override public double calculateExpectedAgreement() { Map<Object, int[]> annotationsPerCategory = CodingAnnotationStudy.countAnnotationsPerCategory(study); BigDecimal result = new BigDecimal(0); for (Object category : study.getCategories()) { int[] annotationCounts = annotationsPerCategory.get(category); for (int m = 0; m < study.getRaterCount(); m++) for (int n = m + 1; n < study.getRaterCount(); n++) result = result.add(new BigDecimal(annotationCounts[m]) .multiply(new BigDecimal(annotationCounts[n]))); } result = result.multiply(new BigDecimal(2)); result = result.divide(new BigDecimal(study.getRaterCount()) .multiply(new BigDecimal(study.getRaterCount() - 1)) .multiply(new BigDecimal(study.getItemCount()).pow(2)), MathContext.DECIMAL128); return result.doubleValue(); }
/** Calculates the expected inter-rater agreement that assumes the same * distribution for all raters and annotations. * @throws NullPointerException if the annotation study is null. * @throws ArithmeticException if there are no items in the * annotation study. */ @Override public double calculateExpectedAgreement() { Map<Object, BigDecimal> categoryProbability = new HashMap<Object, BigDecimal>(); for (ICodingAnnotationItem item : study.getItems()) { Map<Object, Integer> annotationsPerCategory = CodingAnnotationStudy.countTotalAnnotationsPerCategory(item); for (Entry<Object, Integer> counts : annotationsPerCategory.entrySet()) { BigDecimal p = new BigDecimal(counts.getValue()).divide( new BigDecimal(item.getRaterCount()), MathContext.DECIMAL128); BigDecimal value = categoryProbability.get(counts.getKey()); if (value != null) p = p.add(value); categoryProbability.put(counts.getKey(), p); } } BigDecimal result = new BigDecimal(0); for (BigDecimal p : categoryProbability.values()) result = result.add(p.pow(2)); result = result.divide( new BigDecimal(study.getItemCount()).pow(2), MathContext.DECIMAL128); return result.doubleValue(); }
/** Returns a two dimensional map of category pairs and their co-occurrence * frequencies for the given annotation study. */ // Category x Category -> # public static Map<Object, Map<Object, Double>> countCategoryCoincidence(final ICodingAnnotationStudy study) { Map<Object, Map<Object, Double>> result = new HashMap<Object, Map<Object, Double>>(); for (ICodingAnnotationItem item : study.getItems()) { Map<Object, Map<Object, Double>> itemMatrix = countCategoryCoincidence(item); for (Entry<Object, Map<Object, Double>> itemCat : itemMatrix.entrySet()) { Map<Object, Double> resultCat = result.get(itemCat.getKey()); if (resultCat == null) { resultCat = new HashMap<Object, Double>(); result.put(itemCat.getKey(), resultCat); } for (Entry<Object, Double> itemEntry : itemCat.getValue().entrySet()) { Double resultEntry = resultCat.get(itemEntry.getKey()); if (resultEntry == null) resultEntry = 0.0; resultCat.put(itemEntry.getKey(), resultEntry + itemEntry.getValue()); } } } return result; }
/** Calculates the observed inter-rater agreement for the annotation * study that was passed to the class constructor and the currently * assigned distance function. * @throws NullPointerException if the study is null. * @throws ArithmeticException if the study does not contain any item or * the number of raters is smaller than 2. */ public double calculateObservedDisagreement() { ensureDistanceFunction(); double result = 0.0; for (ICodingAnnotationItem item : study.getItems()) { Map<Object, Integer> annotationsPerCategory = CodingAnnotationStudy.countTotalAnnotationsPerCategory(item); for (Entry<Object, Integer> category1 : annotationsPerCategory.entrySet()) for (Entry<Object, Integer> category2 : annotationsPerCategory.entrySet()) { if (category1.getValue() == null) continue; if (category2.getValue() == null) continue; result += category1.getValue() * category2.getValue() * distanceFunction.measureDistance(study, category1.getKey(), category2.getKey()); } } result /= (double) (study.getItemCount() * study.getRaterCount() * (study.getRaterCount() - 1)); return result; }