/** * Compares two strings based not on the strings themselves, but on an encoding of the two strings using the * StringEncoder this Comparator was created with. * * If an {@link EncoderException} is encountered, return <code>0</code>. * * @param o1 * the object to compare * @param o2 * the object to compare to * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. * @see Comparable */ @Override public int compare(final Object o1, final Object o2) { int compareCode = 0; try { @SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable // However this was always the case. final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); compareCode = s1.compareTo(s2); } catch (final EncoderException ee) { compareCode = 0; } return compareCode; }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
@Test public void testEncodeNull() throws Exception { final StringEncoder encoder = this.getStringEncoder(); try { encoder.encode(null); } catch (final EncoderException ee) { // An exception should be thrown } }
public void checkEncoding(final String expected, final String source) throws EncoderException { Assert.assertEquals("Source: " + source, expected, this.getStringEncoder().encode(source)); }
@Test public void testLocaleIndependence() throws Exception { final StringEncoder encoder = this.getStringEncoder(); final String[] data = {"I", "i",}; final Locale orig = Locale.getDefault(); final Locale[] locales = {Locale.ENGLISH, new Locale("tr"), Locale.getDefault()}; try { for (final String element : data) { String ref = null; for (int j = 0; j < locales.length; j++) { Locale.setDefault(locales[j]); if (j <= 0) { ref = encoder.encode(element); } else { String cur = null; try { cur = encoder.encode(element); } catch (final Exception e) { Assert.fail(Locale.getDefault().toString() + ": " + e.getMessage()); } Assert.assertEquals(Locale.getDefault().toString() + ": ", ref, cur); } } } } finally { Locale.setDefault(orig); } }
@Test public void testEncodeWithInvalidObject() throws Exception { boolean exceptionThrown = false; try { final StringEncoder encoder = this.getStringEncoder(); encoder.encode(new Float(3.4)); } catch (final Exception e) { exceptionThrown = true; } Assert.assertTrue("An exception was not thrown when we tried to encode " + "a Float object", exceptionThrown); }
protected String encode(String string) throws AnalysisEngineProcessException { try { String encodedString = encoder.encode(string); return encodedString; } catch (EncoderException e) { throw new AnalysisEngineProcessException(e); } } }
/** * Compares two strings based not on the strings themselves, but on an encoding of the two strings using the * StringEncoder this Comparator was created with. * * If an {@link EncoderException} is encountered, return <code>0</code>. * * @param o1 * the object to compare * @param o2 * the object to compare to * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. * @see Comparable */ @Override public int compare(final Object o1, final Object o2) { int compareCode = 0; try { @SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable // However this was always the case. final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); compareCode = s1.compareTo(s2); } catch (final EncoderException ee) { compareCode = 0; } return compareCode; }
/** * Compares two strings based not on the strings themselves, but on an encoding of the two strings using the * StringEncoder this Comparator was created with. * * If an {@link EncoderException} is encountered, return <code>0</code>. * * @param o1 * the object to compare * @param o2 * the object to compare to * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. * @see Comparable */ @Override public int compare(final Object o1, final Object o2) { int compareCode = 0; try { @SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable // However this was always the case. final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); compareCode = s1.compareTo(s2); } catch (final EncoderException ee) { compareCode = 0; } return compareCode; }
/** * Compares two strings based not on the strings themselves, but on an encoding of the two strings using the * StringEncoder this Comparator was created with. * * If an {@link EncoderException} is encountered, return <code>0</code>. * * @param o1 * the object to compare * @param o2 * the object to compare to * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. * @see Comparable */ @Override public int compare(final Object o1, final Object o2) { int compareCode = 0; try { @SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable // However this was always the case. final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); compareCode = s1.compareTo(s2); } catch (final EncoderException ee) { compareCode = 0; } return compareCode; }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
/** * Encodes the Strings and returns the number of characters in the two * encoded Strings that are the same. * <ul> * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates * little or no similarity, and 4 indicates strong similarity or identical * values.</li> * <li>For refined Soundex, the return value can be greater than 4.</li> * </ul> * * @param encoder * The encoder to use to encode the Strings. * @param s1 * A String that will be encoded and compared. * @param s2 * A String that will be encoded and compared. * @return The number of characters in the two Soundex encoded Strings that * are the same. * * @see #differenceEncoded(String,String) * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> * MS T-SQL DIFFERENCE</a> * * @throws EncoderException * if an error occurs encoding one of the strings */ static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); }
@Override public double getSimilarity(String string1, String string2) throws SimilarityException { String encodedString1 = null; String encodedString2 = null; try { encodedString1 = encoder.encode(string1); encodedString2 = encoder.encode(string2); } catch (EncoderException e) { throw new SimilarityException(); } int value = SoundUtils.differenceEncoded( encodedString1, encodedString2 ); int minLength = Math.min(encodedString1.length(), encodedString2.length()); return (double) value / minLength; } }
/** * Converts the passed in object to a string * @param pObject * @return String */ public String toString(Object pObject){ String encoded = null; try{ encoded= encoder.encode((String)pObject); }catch(EncoderException ex){ ex.printStackTrace(); throw new TranslationException(ex.getMessage()); } return encoded; }