/** * Guesses the language of a word. * * @param text * the word * @return the language that the word originates from or {@link Languages#ANY} if there was no unique match */ public String guessLanguage(final String text) { final Languages.LanguageSet ls = guessLanguages(text); return ls.isSingleton() ? ls.getAny() : Languages.ANY; }
/** * An empty builder where all phonemes must come from some set of languages. This will contain a single * phoneme of zero characters. This can then be appended to. This should be the only way to create a new * phoneme from scratch. * * @param languages the set of languages * @return a new, empty phoneme builder */ public static PhonemeBuilder empty(final Languages.LanguageSet languages) { return new PhonemeBuilder(new Rule.Phoneme("", languages)); }
/** * Encodes a string to its phonetic representation. * * @param input * the String to encode * @return the encoding of the input */ public String encode(final String input) { final Languages.LanguageSet languageSet = this.lang.guessLanguages(input); return encode(input, languageSet); }
/** * Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered. * * @param ruleType * {@link RuleType#APPROX} or {@link RuleType#EXACT} for approximate or exact phonetic matches */ public void setRuleType(final RuleType ruleType) { this.engine = new PhoneticEngine(this.engine.getNameType(), ruleType, this.engine.isConcat(), this.engine.getMaxPhonemes()); }
/** * Sets the type of name. Use {@link NameType#GENERIC} unless you specifically want phonetic encodings * optimized for Ashkenazi or Sephardic Jewish family names. * * @param nameType * the NameType in use */ public void setNameType(final NameType nameType) { this.engine = new PhoneticEngine(nameType, this.engine.getRuleType(), this.engine.isConcat(), this.engine.getMaxPhonemes()); }
/** * Sets the number of maximum of phonemes that shall be considered by the engine. * * @param maxPhonemes * the maximum number of phonemes returned by the engine * @since 1.7 */ public void setMaxPhonemes(final int maxPhonemes) { this.engine = new PhoneticEngine(this.engine.getNameType(), this.engine.getRuleType(), this.engine.isConcat(), maxPhonemes); }
/** * Sets how multiple possible phonetic encodings are combined. * * @param concat * true if multiple encodings are to be combined with a '|', false if just the first one is * to be considered */ public void setConcat(final boolean concat) { this.engine = new PhoneticEngine(this.engine.getNameType(), this.engine.getRuleType(), concat, this.engine.getMaxPhonemes()); }
@Test(expected = IndexOutOfBoundsException.class) public void testNegativeIndexForRuleMatchIndexOutOfBoundsException() { final Rule r = new Rule("a", "", "", new Rule.Phoneme("", Languages.ANY_LANGUAGE)); r.patternAndContextMatches("bob", -1); }
/** * Returns a new Phoneme with the same text but a union of its * current language set and the given one. * * @param lang the language set to merge * @return a new Phoneme */ public Phoneme mergeWithLanguage(final LanguageSet lang) { return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); }
/** * Tests https://issues.apache.org/jira/browse/CODEC-125?focusedCommentId=13071566&page=com.atlassian.jira.plugin.system.issuetabpanels: * comment-tabpanel#comment-13071566 * * @throws EncoderException */ @Test public void testEncodeGna() throws EncoderException { final BeiderMorseEncoder bmpm = createGenericApproxEncoder(); bmpm.encode("gna"); }
@Override public String encode(final String source) throws EncoderException { if (source == null) { return null; } return this.engine.encode(source); }
/** * Gets the name type currently in operation. * * @return the NameType currently being used */ public NameType getNameType() { return this.engine.getNameType(); }
/** * Gets the rule type currently in operation. * * @return the RuleType currently being used */ public RuleType getRuleType() { return this.engine.getRuleType(); }
/** * Creates a new phoneme builder containing all phonemes in this one extended by <code>str</code>. * * @param str the characters to append to the phonemes */ public void append(final CharSequence str) { for (final Rule.Phoneme ph : this.phonemes) { ph.append(str); } }
/** * Discovers if multiple possible encodings are concatenated. * * @return true if multiple encodings are concatenated, false if just the first one is returned */ public boolean isConcat() { return this.engine.isConcat(); }
private BeiderMorseEncoder createGenericApproxEncoder() { final BeiderMorseEncoder encoder = new BeiderMorseEncoder(); encoder.setNameType(NameType.GENERIC); encoder.setRuleType(RuleType.APPROX); return encoder; }
@Test(expected = IllegalStateException.class) public void testInvalidLangIllegalStateException() { Lang.loadFromResource("thisIsAMadeUpResourceName", Languages.getInstance(NameType.GENERIC)); }
@Override protected StringEncoder createStringEncoder() { return new BeiderMorseEncoder(); }
@Test(timeout = 10000L) public void testLongestEnglishSurname() throws EncoderException { final BeiderMorseEncoder bmpm = createGenericApproxEncoder(); bmpm.encode("MacGhilleseatheanaich"); }