/** * Return the normalized (i.e. canonical) string associated with a phrase. * @param phrase * @return */ public String normalize(String phrase) { return stringNormalizer.normalize(language, phrase); }
/** * Uses the string's normalizer, but replaces adjacent whitespace white a single space * @param lang * @param text * @return */ private String normalize(Language lang, String text) { return normalizer.normalize(lang, text).replaceAll("\\s+", " "); }
private String cleanString(String s, boolean normalize) { if (normalize) s = normalizer.normalize(lang, s); StringTokenizer t = new StringTokenizer(); return StringUtils.join(t.getWords(lang, s), " "); }
public void savePhraseCounts(String phrase, PrunedCounts<Integer> counts) throws DaoException { phrase = normalizer.normalize(lang, phrase); try { resolveDb.put(phrase, counts); } catch (IOException e) { throw new DaoException(e); } }
public PrunedCounts<Integer> getPhraseCounts(String phrase, int maxPages) throws DaoException { phrase = normalizer.normalize(lang, phrase); try { PrunedCounts<Integer> counts = resolveDb.get(phrase); if (counts == null || counts.size() <= maxPages) { return counts; } PrunedCounts<Integer> result = new PrunedCounts<Integer>(counts.getTotal()); for (int id : counts.keySet()) { if (result.size() >= maxPages) { break; } result.put(id, counts.get(id)); } return result; } catch (IOException e) { throw new DaoException(e); } catch (ClassNotFoundException e) { throw new DaoException(e); } }