@Signature public Integer levenshteinDistance(Environment env, Memory other, @Optional("null") Memory threshold) { LevenshteinDistance distance = new LevenshteinDistance( threshold.isNull() ? null : threshold.toInteger() ); return distance.apply(text, other.toString()); }
/** * Whether {@code term} is at X Lenvenstein of a {@code value} * with X=: * - 0 for strings of one or two characters * - 1 for strings of three, four or five characters * - 2 for strings of more than five characters * @param value * @param term * @return true if {@code term} is similar to {@code value} */ private static boolean isFuzzy(String term, String value){ int distance; term = term.trim(); if (term.length() < 3) { distance = 0; } else if (term.length() < 6) { distance = 1; } else { distance = 2; } return LevenshteinDistance.getDefaultInstance().apply(value, term)<=distance; }
/** * Compares two Strings with respect to the base String, by Levenshtein distance. * <p/> * The input that is the closest match to the base String will sort before the other. * * @param a an input to compare relative to the base. * @param b an input to compare relative to the base. * * @return -1 if {@code a} is closer to the base than {@code b}; 1 if {@code b} is * closer to the base than {@code a}; 0 if both {@code a} and {@code b} are * equally close to the base. */ @Override public int compare(String a, String b) { // shortcuts if (a.equals(b)) { return 0; // comparing the same value; don't bother } else if (a.equals(base)) { return -1; // a is equal to the base, so it's always first } else if (b.equals(base)) { return 1; // b is equal to the base, so it's always first } // determine which of the two is closer to the base and order it first return Integer.compare(LEVENSHTEIN_DISTANCE.apply(a, base), LEVENSHTEIN_DISTANCE.apply(b, base)); }
@Override public void setup() { d = new org.apache.commons.text.similarity.LevenshteinDistance(); }
@Test public void testGetThresholdDirectlyAfterObjectInstantiation() { assertThat(new LevenshteinDistance().getThreshold()).isNull(); }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
public static Stream<Arguments> parameters() { return Stream.of( Arguments.of(new LevenshteinDistance(), "elephant", "hippo", 7), Arguments.of(new LevenshteinDistance(), "hippo", "elephant", 7), Arguments.of(new LevenshteinDistance(), "hippo", "zzzzzzzz", 8), Arguments.of( new SimilarityScore<Boolean>() { @Override public Boolean apply(final CharSequence left, final CharSequence right) { return left == right || (left != null && left.equals(right)); } }, "Bob's your uncle.", "Every good boy does fine.", false )); }
@ParameterizedTest @MethodSource("parameters") public void test(final Integer threshold, final CharSequence left, final CharSequence right, final Integer distance) { final LevenshteinDistance metric = new LevenshteinDistance(threshold); assertThat(metric.apply(left, right)).isEqualTo(distance); }
@Test public void testGetLevenshteinDistance_StringString() { assertThat(UNLIMITED_DISTANCE.apply("", "")).isEqualTo(0); assertThat(UNLIMITED_DISTANCE.apply("", "a")).isEqualTo(1); assertThat(UNLIMITED_DISTANCE.apply("aaapppp", "")).isEqualTo(7); assertThat(UNLIMITED_DISTANCE.apply("frog", "fog")).isEqualTo(1); assertThat(UNLIMITED_DISTANCE.apply("fly", "ant")).isEqualTo(3); assertThat(UNLIMITED_DISTANCE.apply("elephant", "hippo")).isEqualTo(7); assertThat(UNLIMITED_DISTANCE.apply("hippo", "elephant")).isEqualTo(7); assertThat(UNLIMITED_DISTANCE.apply("hippo", "zzzzzzzz")).isEqualTo(8); assertThat(UNLIMITED_DISTANCE.apply("zzzzzzzz", "hippo")).isEqualTo(8); assertThat(UNLIMITED_DISTANCE.apply("hello", "hallo")).isEqualTo(1); }
@Test public void testJavadocExample() { final EditDistance<Integer> metric = new LevenshteinDistance(); final String target = "Apache"; final EditDistanceFrom<Integer> metricFrom = new EditDistanceFrom<>(metric, target); String mostSimilar = null; Integer shortestDistance = null; for (final String test : new String[] {"Appaloosa", "a patchy", "apple" }) { final Integer distance = metricFrom.apply(test); if (shortestDistance == null || distance < shortestDistance) { shortestDistance = distance; mostSimilar = test; } } assertThat(mostSimilar).isEqualTo("a patchy"); assertThat(shortestDistance).isEqualTo(4); }
assertThat(new LevenshteinDistance(0).apply("", "")).isEqualTo(0); assertThat(new LevenshteinDistance(8).apply("aaapppp", "")).isEqualTo(7); assertThat(new LevenshteinDistance(7).apply("aaapppp", "")).isEqualTo(7); assertThat(new LevenshteinDistance(6).apply("aaapppp", "")).isEqualTo(-1); assertThat(new LevenshteinDistance(0).apply("b", "a")).isEqualTo(-1); assertThat(new LevenshteinDistance(0).apply("a", "b")).isEqualTo(-1); assertThat(new LevenshteinDistance(0).apply("aa", "aa")).isEqualTo(0); assertThat(new LevenshteinDistance(2).apply("aa", "aa")).isEqualTo(0); assertThat(new LevenshteinDistance(2).apply("aaa", "bbb")).isEqualTo(-1); assertThat(new LevenshteinDistance(3).apply("aaa", "bbb")).isEqualTo(3); assertThat(new LevenshteinDistance(10).apply("aaaaaa", "b")).isEqualTo(6); assertThat(new LevenshteinDistance(8).apply("aaapppp", "b")).isEqualTo(7); assertThat(new LevenshteinDistance(4).apply("a", "bbb")).isEqualTo(3); assertThat(new LevenshteinDistance(7).apply("aaapppp", "b")).isEqualTo(7); assertThat(new LevenshteinDistance(3).apply("a", "bbb")).isEqualTo(3); assertThat(new LevenshteinDistance(2).apply("a", "bbb")).isEqualTo(-1); assertThat(new LevenshteinDistance(2).apply("bbb", "a")).isEqualTo(-1); assertThat(new LevenshteinDistance(6).apply("aaapppp", "b")).isEqualTo(-1); assertThat(new LevenshteinDistance(1).apply("a", "bbb")).isEqualTo(-1); assertThat(new LevenshteinDistance(1).apply("bbb", "a")).isEqualTo(-1);
@UserFunction @Description("apoc.text.levenshteinDistance(text1, text2) - compare the given strings with the Levenshtein distance algorithm.") public Long levenshteinDistance(final @Name("text1") String text1, @Name("text2")final String text2) { if (text1 == null || text2 == null) { return null; } return (long)levenshteinDistance.apply(text1, text2); }
@Test public void testEquivalence() { final EditDistance<Integer> metric = new LevenshteinDistance(); final String left = "Apache"; final String right = "a patchy"; final Integer distance = 4; final EditDistanceFrom<Integer> metricFrom = new EditDistanceFrom<>(metric, left); assertThat(metricFrom.apply(right)).isEqualTo(distance); assertThat(metricFrom.apply(right)).isEqualTo(metric.apply(left, right)); }
@Test public void testApplyThrowsIllegalArgumentExceptionAndCreatesLevenshteinDistanceTakingInteger() { assertThatIllegalArgumentException().isThrownBy(() -> new LevenshteinDistance(0).apply(null, null)); }
@Test public void testGetLevenshteinDistance_StringNullInt() { assertThatIllegalArgumentException().isThrownBy(() -> UNLIMITED_DISTANCE.apply("a", null)); }
public static Stream<Arguments> parameters() { return Stream.of( Arguments.of(new HammingDistance(), "Sam I am.", "Ham I am.", 1), Arguments.of(new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2), Arguments.of(new HammingDistance(), "Hamming", "Hamming", 0), Arguments.of(new LevenshteinDistance(), "Apache", "a patchy", 4), Arguments.of(new LevenshteinDistance(), "go", "no go", 3), Arguments.of(new LevenshteinDistance(), "go", "go", 0), Arguments.of(new LevenshteinDistance(4), "Apache", "a patchy", 4), Arguments.of(new LevenshteinDistance(4), "go", "no go", 3), Arguments.of(new LevenshteinDistance(0), "go", "go", 0), Arguments.of( new EditDistance<Boolean>() { @Override public Boolean apply(final CharSequence left, final CharSequence right) { return left == right || (left != null && left.equals(right)); } }, "Bob's your uncle.", "Every good boy does fine.", false)); }
@Override public double calculate(CharSequence s, CharSequence t) { org.apache.commons.text.similarity.LevenshteinDistance distance = new org.apache.commons.text.similarity.LevenshteinDistance(); return distance.apply(s, t); }
@Test public void testGetLevenshteinDistance_NullString() { assertThatIllegalArgumentException().isThrownBy(() -> UNLIMITED_DISTANCE.apply("a", null)); }
@Test public void testConstructorWithNegativeThreshold() { assertThatIllegalArgumentException().isThrownBy(() -> new LevenshteinDistance(-1)); }
LevenshteinDistance lev = new LevenshteinDistance(); l.setLevMatchLabel(lev.apply(mention, altLabel)); l.setLevContext(lev.apply(tokensToString(finalMentionContext), altLabel)); l.setLevTypedString(lev.apply(aTypedString, altLabel));