@Override public boolean containsKey(int idx) { return v.containsKey(idx); }
public static double jaccardIndex(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { int intersect = 0; int union = 0; for (Map.Entry<Integer, Integer> e : vec1.entrySet()) { if (stopWordList.contains(e.getKey())) { continue; } if (vec2.containsKey(e.getKey())) { intersect++; } union++; } for (Map.Entry<Integer, Integer> e : vec2.entrySet()) { if (stopWordList.contains(e.getKey())) { continue; } if (!vec1.containsKey(e.getValue())) { union++; } } return union == 0 ? 0.0 : (double) intersect / union; }
public static double diceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int v1sum = vec1.size(); final int v2sum = vec2.size(); if (v1sum == 0 || v2sum == 0) { return 0; } int v12 = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (vec2.containsKey(i)) { v12++; } } return 2.0 * (double) v12 / (v1sum + v2sum); }
public static double rogersTanimoto(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int N = vec1.length(); assert (vec2.length() == vec1.length()); int diff = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec2.containsKey(i)) { diff++; } } for (Integer i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec1.containsKey(i)) { diff++; } } return (double) (N - diff) / (double) (N + diff); }
public static double dfJaccardCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> df, final StopWordList stopWordList) { double num = 0.0, denom = 0.0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } if (vec2.containsKey(i)) { num += (1.0 - df.doubleValue(i)); } if (!vec2.containsKey(i)) { denom += (1.0 - df.doubleValue(i)); } } for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } denom += (1.0 - df.doubleValue(i)); } return denom == 0.0 ? 0.0 : num / denom; }
if (!vec1.containsKey(i)) { ab -= v1sum * mu.doubleValue(i) * v2i;
public static double dfDiceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> df, final StopWordList stopWordList) { double num = 0.0, denom = 0.0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } if (vec2.containsKey(i)) { num += (1.0 - df.doubleValue(i)); } denom += (1.0 - df.doubleValue(i)); } for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } denom += (1.0 - df.doubleValue(i)); } return denom == 0.0 ? 0.0 : (2.0 * num / denom); }