protected static Set<String> extractOnlyLemmas(Iterable<LemmaAndPos> lemmasAndPoses) { Set<String> ret = new LinkedHashSet<String>(); for (LemmaAndPos lemmaAndPos : lemmasAndPoses) { if (lemmaAndPos.getLemma().length()>0) { ret.add(lemmaAndPos.getLemma()); for (String word : StringUtil.stringToWords(lemmaAndPos.getLemma())) { ret.add(word); } } } return ret; }
private Map<ExtendedNode, List<String>> buildMapForTree(TreeAndParentMap<ExtendedInfo,ExtendedNode> tree) { Map<ExtendedNode, List<String>> ret = new LinkedHashMap<ExtendedNode, List<String>>(); for (ExtendedNode node : TreeIterator.iterableTree(tree.getTree())) { String lemma = InfoGetFields.getLemma(node.getInfo()); List<String> lemmaAsList = StringUtil.stringToWords(lemma); if (lemmaAsList.size()>0) { ret.put(node, lemmaAsList); } } return ret; }
protected static Set<String> wordsInTree(ExtendedNode tree) { Set<String> ret = new LinkedHashSet<String>(); Set<ExtendedNode> setNodes = AbstractNodeUtils.treeToLinkedHashSet(tree); for (ExtendedNode node : setNodes) { if (InfoObservations.infoHasLemma(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()); ret.add(lemma); ret.addAll(StringUtil.stringToWords(lemma)); } } return ret; }
public static boolean insertOnlyLexModOfMultiWord(Info insertedChildInfo, Info parentInfo) { boolean ret = false; if (InfoGetFields.getRelation(insertedChildInfo).equals(MINIPAR_LEX_MOD_RELATION)) { if (infoHasLemma(insertedChildInfo)) { String parentLemma = InfoGetFields.getLemma(parentInfo); List<String> parentWords = StringUtil.stringToWords(parentLemma); String childLemma = InfoGetFields.getLemma(insertedChildInfo); for (String parentWord : parentWords) { if (parentWord.equalsIgnoreCase(childLemma)) { ret = true; break; } } } } return ret; }
public static Set<LemmaAndPos> lemmasAndPosesInTree(ExtendedNode tree) throws TeEngineMlException { Set<LemmaAndPos> ret = new LinkedHashSet<LemmaAndPos>(); Set<ExtendedNode> setNodes = AbstractNodeUtils.treeToLinkedHashSet(tree); for (ExtendedNode node : setNodes) { if (InfoObservations.infoHasLemma(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()); PartOfSpeech pos = InfoGetFields.getPartOfSpeechObject(node.getInfo()); ret.add(new LemmaAndPos(lemma, pos)); for (String oneLemma : StringUtil.stringToWords(lemma)) { ret.add(new LemmaAndPos(oneLemma, pos)); } } } return ret; }
List<String> wordsInLemma = StringUtil.stringToWords(lemmaToInsert); double logEstimation = 0.0; boolean foundEstimation = false;