/** * Checks if all the search terms in the given list matches at least one keyword. * @return true if all search terms matches at least one keyword, or false if otherwise. */ boolean matchesSearchTerms(String[] searchTerms) { for (String term : searchTerms) { if (keywords.stream().noneMatch((t) -> t.contains(term) || DISTANCE.apply(t, term) > 0.9)) { return false; } } return true; } }
@Signature public Double jaroWinklerDistance(Environment env, Memory other) { JaroWinklerDistance distance = new JaroWinklerDistance(); return distance.apply(text, other.toString()); }
@Override public void setup() { d = new org.apache.commons.text.similarity.JaroWinklerDistance(); }
@BeforeAll public static void setUp() { distance = new JaroWinklerDistance(); }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Test public void testGetJaroWinklerDistance_StringString() { assertEquals(0.92499d, distance.apply("frog", "fog"), 0.00001d); assertEquals(0.0d, distance.apply("fly", "ant"), 0.00000000000000000001d); assertEquals(0.44166d, distance.apply("elephant", "hippo"), 0.00001d); assertEquals(0.90666d, distance.apply("ABC Corporation", "ABC Corp"), 0.00001d); assertEquals(0.95251d, distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.00001d); assertEquals(0.942d, distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.00001d); assertEquals(0.898018d, distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00001d); assertEquals(0.971428d, distance.apply("/opt/software1", "/opt/software2"), 0.00001d); assertEquals(0.941666d, distance.apply("aaabcd", "aaacdb"), 0.00001d); assertEquals(0.911111d, distance.apply("John Horn", "John Hopkins"), 0.00001d); }
@UserFunction @Description( "apoc.text.jaroWinklerDistance(text1, text2) - compare the given strings with the Jaro-Winkler distance algorithm." ) public Double jaroWinklerDistance(final @Name("text1") String text1, @Name("text2")final String text2) { if (text1 == null || text2 == null) { return null; } return jaroWinklerDistance.apply(text1, text2); }
@Test public void testGetJaroWinklerDistance_NullString() { assertThatIllegalArgumentException().isThrownBy(() -> { distance.apply(null, "clear"); }); }
@Test public void testGetJaroWinklerDistance_StringNull() { assertThatIllegalArgumentException().isThrownBy(() -> { distance.apply(" ", null); }); }
@Test public void testGetJaroWinklerDistance_NullNull() { assertThatIllegalArgumentException().isThrownBy(() -> { distance.apply(null, null); }); }
@Override public boolean match(WrappedBooking wrappedBooking) { switch (rule.getSimilarityMatchType()) { case IBAN: return StringUtils.equalsIgnoreCase(rule.getExpression(), wrappedBooking.getBankConnection()); case REFERENCE_NAME: if (wrappedBooking.getReferenceName() != null) { Double result = JARO_WINKLER.apply(rule.getExpression(), wrappedBooking.getReferenceName().toLowerCase()); if (result >= MIN_DISTANCE) { log.debug("similarity expression {} compared with {} resulted in score {}", rule.getExpression(), wrappedBooking.getReferenceName().toLowerCase(), result); } return result >= MIN_DISTANCE; } return false; case PURPOSE: Double result = JARO_WINKLER.apply(rule.getExpression(), normalize(wrappedBooking.getPurpose())); if (result >= MIN_DISTANCE) { log.debug("similarity expression {} compared with {} resulted in score {}", rule.getExpression(), normalize(wrappedBooking.getPurpose()), result); } return result >= MIN_DISTANCE; } throw new IllegalArgumentException("missing match type"); }