public MorphologicalAnalyzer(Language language) { analyzer = new EnglishMorphAnalyzer(); }
@Override public String lemmatize(String simplifiedWordForm, String pos) { String lemma = StringUtils.toLowerCase(simplifiedWordForm), t; if ((t = getAbbreviation(lemma, pos)) != null || (t = getBaseFormFromInflection(lemma, pos)) != null) lemma = t; if (isCardinal(lemma)) return MetaConst.CARDINAL; else if (isOrdinal (lemma)) return MetaConst.ORDINAL; return lemma; }
/** Constructs an English morphological analyzer from the dictionary in resource. */ public EnglishMorphAnalyzer() { Element inflection = XMLUtils.getDocumentElement(IOUtils.getInputStreamsFromResource(INFLECTION_SUFFIX)); Element derivationN2V = XMLUtils.getDocumentElement(IOUtils.getInputStreamsFromResource(DERIVATION_SUFFIX_N2V)); try { inf_verb = getInflectionRules(inflection, VERB , VERB_POS); inf_noun = getInflectionRules(inflection, NOUN , NOUN_POS); inf_adjective = getInflectionRules(inflection, ADJECTIVE, ADJECTIVE_POS); inf_adverb = getInflectionRules(inflection, ADVERB , ADVERB_POS); der_n2v = getDerivationalRules(derivationN2V, NOUN); base_cardinal = DSUtils.createStringHashSet(IOUtils.getInputStreamsFromResource(CARDINAL_BASE)); base_ordinal = DSUtils.createStringHashSet(IOUtils.getInputStreamsFromResource(ORDINAL_BASE)); rule_abbreviation = getAbbreviationMap(IOUtils.getInputStreamsFromResource(ABBREVIATOIN_RULE)); } catch (IOException e) {e.printStackTrace();} }
/** Called by {@link #EnglishLemmatizer()}. */ private EnglishInflection getInflectionRules(Element eInflection, String type, String basePOS) throws IOException { Element eAffixes = XMLUtils.getFirstElementByTagName(eInflection, type); InputStream baseStream = IOUtils.getInputStreamsFromResource(ROOT + type + EXT_BASE); InputStream exceptionStream = IOUtils.getInputStreamsFromResource(ROOT + type + EXT_EXCEPTION); return getInflection(baseStream, exceptionStream, eAffixes, basePOS); }
/** Constructs an English morphological analyzer from the dictionary in resource. */ public EnglishMorphAnalyzer() { Element inflection = XMLUtils.getDocumentElement(IOUtils.getInputStreamsFromResource(INFLECTION_SUFFIX)); Element derivationN2V = XMLUtils.getDocumentElement(IOUtils.getInputStreamsFromResource(DERIVATION_SUFFIX_N2V)); try { inf_verb = getInflectionRules(inflection, VERB , VERB_POS); inf_noun = getInflectionRules(inflection, NOUN , NOUN_POS); inf_adjective = getInflectionRules(inflection, ADJECTIVE, ADJECTIVE_POS); inf_adverb = getInflectionRules(inflection, ADVERB , ADVERB_POS); der_n2v = getDerivationalRules(derivationN2V, NOUN); base_cardinal = DSUtils.createStringHashSet(IOUtils.getInputStreamsFromResource(CARDINAL_BASE)); base_ordinal = DSUtils.createStringHashSet(IOUtils.getInputStreamsFromResource(ORDINAL_BASE)); rule_abbreviation = getAbbreviationMap(IOUtils.getInputStreamsFromResource(ABBREVIATOIN_RULE)); } catch (IOException e) {e.printStackTrace();} }
/** Called by {@link #EnglishLemmatizer()}. */ private EnglishInflection getInflectionRules(Element eInflection, String type, String basePOS) throws IOException { Element eAffixes = XMLUtils.getFirstElementByTagName(eInflection, type); InputStream baseStream = IOUtils.getInputStreamsFromResource(ROOT + type + EXT_BASE); InputStream exceptionStream = IOUtils.getInputStreamsFromResource(ROOT + type + EXT_EXCEPTION); return getInflection(baseStream, exceptionStream, eAffixes, basePOS); }
@Override public String lemmatize(String simplifiedWordForm, String pos) { String lemma = StringUtils.toLowerCase(simplifiedWordForm), t; if ((t = getAbbreviation(lemma, pos)) != null || (t = getBaseFormFromInflection(lemma, pos)) != null) lemma = t; if (isCardinal(lemma)) return MetaConst.CARDINAL; else if (isOrdinal (lemma)) return MetaConst.ORDINAL; return lemma; }
public MorphologicalAnalyzer(Language language) { analyzer = new EnglishMorphAnalyzer(); }