@Override public String normalize(LocalString text) { return text.getString(); }
@Override public List<LinkedHashMap<LocalId, Float>> disambiguate(List<LocalString> phrases, Set<LocalString> context) throws DaoException { List<LinkedHashMap<LocalId, Float>> results = new ArrayList<LinkedHashMap<LocalId, Float>>(); for (LocalString phrase : phrases) { LinkedHashMap<LocalId, Float> localMap = phraseAnalyzer.resolve(phrase.getLanguage(), phrase.getString(), 10); if (localMap==null){ results.add(null); } else { LinkedHashMap<LocalId, Float> phraseResult = new LinkedHashMap<LocalId, Float>(); for (LocalId id : localMap.keySet()) { phraseResult.put(id, localMap.get(id)); } results.add(phraseResult); } } return results; }
public LocalId disambiguateTop(LocalString phrase, Set<LocalString> context) throws DaoException{ LinkedHashMap<LocalId, Integer> results = new LinkedHashMap<LocalId, Integer>(); for (PhraseAnalyzer phraseAnalyzer : phraseAnalyzers){ LinkedHashMap<LocalId, Float> localMap = phraseAnalyzer.resolve(phrase.getLanguage(), phrase.getString(), 1); if (localMap==null||localMap.isEmpty()){ continue; } LocalId localId = localMap.keySet().iterator().next(); if (results.containsKey(localId)){ results.put(localId,results.get(localId)+1); } else { results.put(localId,1); } } if (results.isEmpty()){ return null; } else { LocalId best=null; int score = 0; for (LocalId localId : results.keySet()){ if (results.get(localId)>score){ score = results.get(localId); best = localId; } } return best; } }
@Override public String normalize(LocalString string) { return normalize(string.getLanguage(), string.getString()); }
@Override public List<LinkedHashMap<LocalId, Float>> disambiguate(List<LocalString> phrases, Set<LocalString> context) throws DaoException { if (phrases.isEmpty()) { return new ArrayList<LinkedHashMap<LocalId, Float>>(); } Language lang = phrases.get(0).getLanguage(); List<LinkedHashMap<LocalId, Float>> results = new ArrayList<LinkedHashMap<LocalId, Float>>(); for (LocalString phrase : phrases) { Map<Integer, Double> pageSums = new HashMap<Integer, Double>(); for (PhraseAnalyzer pa : phraseAnalyzers) { LinkedHashMap<LocalId, Float> probs = pa.resolve(phrase.getLanguage(), phrase.getString(), 20); for (Map.Entry<LocalId, Float> entry : probs.entrySet()) { int id = entry.getKey().getId(); if (pageSums.containsKey(id)) { pageSums.put(id, pageSums.get(id) + entry.getValue()); } else { pageSums.put(id, (double)entry.getValue()); } } } LinkedHashMap<LocalId, Float> pageResult = new LinkedHashMap<LocalId, Float>(); for (Integer key : WpCollectionUtils.sortMapKeys(pageSums, true)) { pageResult.put(new LocalId(lang, key), pageSums.get(key).floatValue()); } results.add(pageResult); } return results; }
candidates.put(s, phraseAnalyzer.resolve(s.getLanguage(), s.getString(), numCandidates));
private void debugSimilarityDisambiguator(List<LocalString> phrases) throws DaoException { String last = null; boolean same = true; StringBuffer b = new StringBuffer("results for " + phrases.get(0).getString() + ", " + phrases.get(1).getString() + "\n"); for (SimilarityDisambiguator.Criteria c : SimilarityDisambiguator.Criteria.values()) { if (c == SimilarityDisambiguator.Criteria.SIMILARITY) { continue; // weird, so skip for now. } List<LocalId> resolutions; synchronized (disambiguator) { ((SimilarityDisambiguator)disambiguator).setCriteria(c); resolutions = disambiguator.disambiguateTop(phrases, null); } String page1 = resolutions.get(0) == null ? "null" : localPageDao.getById(language, resolutions.get(0).getId()).toString(); String page2 = resolutions.get(1) == null ? "null" : localPageDao.getById(language, resolutions.get(1).getId()).toString(); b.append("\t" + c + ": " + page1 + ", " + page2 + "\n"); if (last == null) last = page1+page2; if (!last.equals(page1+page2)) { same = false; } } if (!same) { System.out.println(b.toString()); } }
throw new IllegalArgumentException("Disambiguator only supports language " + language); candidates.put(s, analyzer.resolve(s.getLanguage(), s.getString(), 100));