@Experimental protected List<String> reorderSuggestions(List<String> suggestions, String word) { // WORK IN PROGRESS if (languageModel == null) { return suggestions; } BaseLanguageModel lm = (BaseLanguageModel) languageModel; List<Integer> levenshteinDistances = suggestions.stream().map(suggestion -> StringUtils.getLevenshteinDistance(word, suggestion)).collect(Collectors.toList()); List<Long> frequencies = suggestions.stream().map(lm::getCount).collect(Collectors.toList()); Long frequenciesSum = frequencies.stream().reduce((a, b) -> a + b).orElse(1L); List<Float> normalizedFrequencies = frequencies.stream().map(f -> (float) f / frequenciesSum).collect(Collectors.toList()); System.out.println("frequencies: " + frequencies + " / normalized: " + normalizedFrequencies); List<Pair<String, Float>> scoredSuggestions = new ArrayList<>(suggestions.size()); for (int i = 0; i < suggestions.size(); i++) { float score = (1f / normalizedFrequencies.get(i)) * levenshteinDistances.get(i); scoredSuggestions.add(Pair.of(suggestions.get(i), score)); } scoredSuggestions.sort(Comparator.comparing(Pair::getRight)); System.out.println("Before reordering: " + suggestions.subList(0, 5) + " / After: " + scoredSuggestions.subList(0, 5)); return scoredSuggestions.stream().map(Pair::getLeft).collect(Collectors.toList()); } }
@Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_NullString() throws Exception { StringUtils.getLevenshteinDistance("a", null); }
@Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_NullStringInt() throws Exception { StringUtils.getLevenshteinDistance(null, "a", 0); }
@Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_StringStringNegativeInt() throws Exception { StringUtils.getLevenshteinDistance("a", "a", -1); }
@Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_StringNull() throws Exception { StringUtils.getLevenshteinDistance(null, "a"); }
@Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_StringNullInt() throws Exception { StringUtils.getLevenshteinDistance("a", null, 0); }
@Test public void testGetLevenshteinDistance_StringString() { assertEquals(0, StringUtils.getLevenshteinDistance("", "")); assertEquals(1, StringUtils.getLevenshteinDistance("", "a")); assertEquals(7, StringUtils.getLevenshteinDistance("aaapppp", "")); assertEquals(1, StringUtils.getLevenshteinDistance("frog", "fog")); assertEquals(3, StringUtils.getLevenshteinDistance("fly", "ant")); assertEquals(7, StringUtils.getLevenshteinDistance("elephant", "hippo")); assertEquals(7, StringUtils.getLevenshteinDistance("hippo", "elephant")); assertEquals(8, StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz")); assertEquals(8, StringUtils.getLevenshteinDistance("zzzzzzzz", "hippo")); assertEquals(1, StringUtils.getLevenshteinDistance("hello", "hallo")); }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { String str0 = getStringValue(arguments, 0, converters); String str1 = getStringValue(arguments, 1, converters); if (str0 == null || str1 == null) { return null; } int dist = StringUtils.getLevenshteinDistance(str0, str1); output.set(dist); return output; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { String str0 = getStringValue(arguments, 0, converters); String str1 = getStringValue(arguments, 1, converters); if (str0 == null || str1 == null) { return null; } int dist = StringUtils.getLevenshteinDistance(str0, str1); output.set(dist); return output; }
assertEquals(0, StringUtils.getLevenshteinDistance("", "", 0)); assertEquals(7, StringUtils.getLevenshteinDistance("aaapppp", "", 8)); assertEquals(7, StringUtils.getLevenshteinDistance("aaapppp", "", 7)); assertEquals(-1, StringUtils.getLevenshteinDistance("aaapppp", "", 6)); assertEquals(-1, StringUtils.getLevenshteinDistance("b", "a", 0)); assertEquals(-1, StringUtils.getLevenshteinDistance("a", "b", 0)); assertEquals(0, StringUtils.getLevenshteinDistance("aa", "aa", 0)); assertEquals(0, StringUtils.getLevenshteinDistance("aa", "aa", 2)); assertEquals(-1, StringUtils.getLevenshteinDistance("aaa", "bbb", 2)); assertEquals(3, StringUtils.getLevenshteinDistance("aaa", "bbb", 3)); assertEquals(6, StringUtils.getLevenshteinDistance("aaaaaa", "b", 10)); assertEquals(7, StringUtils.getLevenshteinDistance("aaapppp", "b", 8)); assertEquals(3, StringUtils.getLevenshteinDistance("a", "bbb", 4)); assertEquals(7, StringUtils.getLevenshteinDistance("aaapppp", "b", 7)); assertEquals(3, StringUtils.getLevenshteinDistance("a", "bbb", 3)); assertEquals(-1, StringUtils.getLevenshteinDistance("a", "bbb", 2)); assertEquals(-1, StringUtils.getLevenshteinDistance("bbb", "a", 2)); assertEquals(-1, StringUtils.getLevenshteinDistance("aaapppp", "b", 6)); assertEquals(-1, StringUtils.getLevenshteinDistance("a", "bbb", 1)); assertEquals(-1, StringUtils.getLevenshteinDistance("bbb", "a", 1));
/** * Compare the Levenshtein Distance between the two strings * * @param title1 title * @param title2 title * @param distance max distance */ private static boolean compareDistance(final String title1, final String title2, int distance) { return StringUtils.getLevenshteinDistance(title1, title2) <= distance; }
@Override public int compare(String arg0, String arg1) { double distance1 = StringUtils.getLevenshteinDistance(targetClass, arg0); double distance2 = StringUtils.getLevenshteinDistance(targetClass, arg1); return Double.compare(distance1, distance2); } }
private boolean nameFuzzyMatch(final String nameA, final String nameB) { return nameA.equalsIgnoreCase(nameB) || StringUtils.getLevenshteinDistance(nameA, nameB, lavenshteinDistanceThreshold) != -1; } }
private boolean nameFuzzyMatch(final String nameA, final String nameB) { return nameA.equalsIgnoreCase(nameB) || StringUtils.getLevenshteinDistance(nameA, nameB, LEVENSHTEIN_DISTANCE_THRESHOLD) != -1; }
public static String hasPossibleRequestKey(String given, List<String> configKeys) { if (configKeys.contains(given)) { return null; } for (String s : configKeys) { int distance = StringUtils.getLevenshteinDistance(given, s); if (distance != 0 && distance < 3) { return s; } } return null; } }
private double normalisedLevenshteinDistance(String one, String two) { if (one == null || two == null) { return 1.0; } double maxDistance = Math.max(one.length(), two.length()); double actualDistance = getLevenshteinDistance(one, two); return (actualDistance / maxDistance); }
public static SupportedWebDriver getClosestDriverValueTo(final String value) { SupportedWebDriver closestDriver = null; int closestDriverDistance = Integer.MAX_VALUE; for(SupportedWebDriver supportedDriver : values()) { int distance = StringUtils.getLevenshteinDistance(supportedDriver.toString(), value); if (distance < closestDriverDistance) { closestDriverDistance = distance; closestDriver = supportedDriver; } } return closestDriver; }
public static SupportedWebDriver getClosestDriverValueTo(final String value) { SupportedWebDriver closestDriver = null; int closestDriverDistance = Integer.MAX_VALUE; for (SupportedWebDriver supportedDriver : values()) { int distance = StringUtils.getLevenshteinDistance(supportedDriver.toString(), value); if (distance < closestDriverDistance) { closestDriverDistance = distance; closestDriver = supportedDriver; } } return closestDriver; }
public static SupportedWebDriver getClosestDriverValueTo(final String value) { SupportedWebDriver closestDriver = null; int closestDriverDistance = Integer.MAX_VALUE; for (SupportedWebDriver supportedDriver : values()) { int distance = StringUtils.getLevenshteinDistance(supportedDriver.toString(), value); if (distance < closestDriverDistance) { closestDriverDistance = distance; closestDriver = supportedDriver; } } return closestDriver; }
/** * Returns a column containing the levenshtein distance between the two given string columns */ default Column distance(Column column2) { FloatColumn newColumn = FloatColumn.create(name() + column2.name() + "[distance]"); for (int r = 0; r < size(); r++) { String value1 = getString(r); String value2 = column2.getString(r); newColumn.set(r, StringUtils.getLevenshteinDistance(value1, value2)); } return newColumn; }