public String toSingleLine() { StringBuilder sb = new StringBuilder(); Iterator<String> it = iterator(); String w; int f; if (it.hasNext()) { w = it.next(); f = getFrequency(w); sb.append(w); sb.append(":"); sb.append(f); } while (it.hasNext()) { w = it.next(); f = getFrequency(w); sb.append(" "); sb.append(w); sb.append(":"); sb.append(f); } // end while return sb.toString(); }
SortedMap<Integer, List<String>> getSortedMap() { SortedMap<Integer, List<String>> sortedMap = new TreeMap<Integer, List<String>>(new IntegerComparator()); Iterator<String> it = iterator(); String w; List<String> list; int f; for (int i = 0; it.hasNext(); i++) { w = it.next(); f = getFrequency(w); list = sortedMap.get(f); if (list == null) { list = new ArrayList<String>(); sortedMap.put(f, list); } list.add(w); } return sortedMap; } // end sort
sb.append(w); sb.append("\t"); sb.append(getFrequency(w)); sb.append("\t"); sb.append(rawFrequency(w));
tf = bow.getFrequency(term); tfIdf = log2(tf); if (b) {
/** * Returns a document in the VSM. */ public Vector mapDocument(BOW bow, boolean b) { //logger.info("lsm.mapDocument " + b); SparseVector vector = new SparseVector(); Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { //logger.info(i + " " + t[i]); String term = it.next(); int index = termIndex.get(term); if (index != -1) { int tf = bow.getFrequency(term); float tfIdf = (float) (log2(tf)); if (b) { tfIdf *= Iidf[index]; } //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf); vector.add(index, tfIdf); } } // end for return vector; } // end map
tf = bow.getFrequency(term); tfIdf = 1.0 + Math.log10(tf); if (b) {
tf = bow.getFrequency(term); tfIdf = (float) (1.0 + Math.log10(tf)); if (b) {