double sim = comparator.compare(v1, v2);
public double compare(String s1, String s2) { if (s1.equals(s2)) return 1.0; // tokenize String[] t1 = StringUtils.split(s1); String[] t2 = StringUtils.split(s2); // ensure that t1 is shorter than or same length as t2 if (t1.length > t2.length) { String[] tmp = t2; t2 = t1; t1 = tmp; } // find best matches for each token in t1 double sum = 0; for (int ix1 = 0; ix1 < t1.length; ix1++) { double highest = 0; for (int ix2 = 0; ix2 < t2.length; ix2++) highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2])); sum += highest; } return (sum * 2) / (t1.length + t2.length); } }
double highest = 0; for (int ix2 = 0; ix2 < t2.length; ix2++) highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2]));
double d = comp.compare(v1, v2); double p = prop.compare(v1, v2); System.out.println("'" + v1 + "' ~ '" + v2 + "': " + d +
/** * Returns the probability that the records v1 and v2 came from represent * the same entity, based on high and low probability settings etc. */ public double compare(String v1, String v2) { // FIXME: it should be possible here to say that, actually, we // didn't learn anything from comparing these two values, so that // probability is set to 0.5. if (comparator == null) return 0.5; // we ignore properties with no comparator double sim = comparator.compare(v1, v2); if (sim >= 0.5) return ((high - 0.5) * (sim * sim)) + 0.5; else return low; }
/** * Returns the probability that the records v1 and v2 came from represent * the same entity, based on high and low probability settings etc. * * @param v1 1st String * @param v2 2nd String * @param high max probability * @param low min probability * @param comparator the comparator to use * @return the computed probability */ private static double compare( final String v1, final String v2, final double high, final double low, final Comparator comparator) { if (comparator == null) { return AVERAGE_SCORE; // we ignore properties with no comparator } double sim = comparator.compare(v1, v2); if (sim < AVERAGE_SCORE) { return low; } else { return ((high - AVERAGE_SCORE) * (sim * sim)) + AVERAGE_SCORE; } }
public double compare(String s1, String s2) { if (s1.equals(s2)) return 1.0; // tokenize String[] t1 = StringUtils.split(s1); String[] t2 = StringUtils.split(s2); // ensure that t1 is shorter than or same length as t2 if (t1.length > t2.length) { String[] tmp = t2; t2 = t1; t1 = tmp; } // find best matches for each token in t1 double sum = 0; for (int ix1 = 0; ix1 < t1.length; ix1++) { double highest = 0; for (int ix2 = 0; ix2 < t2.length; ix2++) highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2])); sum += highest; } return (sum * 2) / (t1.length + t2.length); } }
double highest = 0; for (int ix2 = 0; ix2 < t2.length; ix2++) highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2]));
double d = comp.compare(v1, v2); double p = prop.compare(v1, v2); System.out.println("'" + v1 + "' ~ '" + v2 + "': " + d +