/** * This method returns counts for a given first/second pair * * @param first * @param second * @return */ public double getCount(F first, S second) { Counter<S> counter = maps.get(first); if (counter == null) return 0.0; return counter.getCount(second); }
/** * This method returns pair of elements with a max value * * @return */ public Pair<F, S> argMax() { Double maxCount = -Double.MAX_VALUE; Pair<F, S> maxKey = null; for (Map.Entry<F, Counter<S>> entry : maps.entrySet()) { Counter<S> counter = entry.getValue(); S localMax = counter.argMax(); if (counter.getCount(localMax) > maxCount || maxKey == null) { maxKey = new Pair<F, S>(entry.getKey(), localMax); maxCount = counter.getCount(localMax); } } return maxKey; }
/** * This method returns probability of given element * * @param element * @return */ public double getProbability(T element) { if (totalCount() <= 0.0) throw new IllegalStateException("Can't calculate probability with empty counter"); return getCount(element) / totalCount(); }
/** * Calculate string similarity with tfidf weights relative to each character * frequency and how many times a character appears in a given string * @param strings the strings to calculate similarity for * @return the cosine similarity between the strings */ public static double stringSimilarity(String... strings) { if (strings == null) return 0; Counter<String> counter = new Counter<>(); Counter<String> counter2 = new Counter<>(); for (int i = 0; i < strings[0].length(); i++) counter.incrementCount(String.valueOf(strings[0].charAt(i)), 1.0f); for (int i = 0; i < strings[1].length(); i++) counter2.incrementCount(String.valueOf(strings[1].charAt(i)), 1.0f); Set<String> v1 = counter.keySet(); Set<String> v2 = counter2.keySet(); Set<String> both = SetUtils.intersection(v1, v2); double sclar = 0, norm1 = 0, norm2 = 0; for (String k : both) sclar += counter.getCount(k) * counter2.getCount(k); for (String k : v1) norm1 += counter.getCount(k) * counter.getCount(k); for (String k : v2) norm2 += counter2.getCount(k) * counter2.getCount(k); return sclar / Math.sqrt(norm1 * norm2); }
/** * This method will increment counts of this counter by counts from other counter * @param other */ public <T2 extends T> void incrementAll(Counter<T2> other) { for (T2 element: other.keySet()) { double cnt = other.getCount(element); incrementCount(element, cnt); } }
/** * This method will apply normalization to counter values and totals. */ public void normalize() { for (T key : keySet()) { setCount(key, getCount(key) / totalCount.get()); } rebuildTotals(); }
protected void rebuildTotals() { totalCount.set(0); for (T key : keySet()) { totalCount.addAndGet(getCount(key)); } dirty.set(false); }
/** * Calculate the binary Mathews correlation coefficient, for the specified class.<br> * MCC = (TP*TN - FP*FN) / sqrt((TP+FP)(TP+FN)(TN+FP)(TN+FN))<br> * * @param classIdx Class index to calculate Matthews correlation coefficient for */ public double matthewsCorrelation(int classIdx) { return EvaluationUtils.matthewsCorrelation((long) truePositives.getCount(classIdx), (long) falsePositives.getCount(classIdx), (long) falseNegatives.getCount(classIdx), (long) trueNegatives.getCount(classIdx)); }
/** * Returns the false negative rate for a given label * * @param classLabel the label * @param edgeCase What to output in case of 0/0 * @return fnr as a double */ public double falseNegativeRate(Integer classLabel, double edgeCase) { double fnCount = falseNegatives.getCount(classLabel); double tpCount = truePositives.getCount(classLabel); return EvaluationUtils.falseNegativeRate((long) fnCount, (long) tpCount, edgeCase); }
private Map<Integer, Integer> convertToMap(Counter<Integer> counter, int maxCount) { Map<Integer, Integer> map = new HashMap<>(); for (int i = 0; i < maxCount; i++) { map.put(i, (int) counter.getCount(i)); } return map; }
/** * Returns the false positive rate for a given label * * @param classLabel the label * @param edgeCase What to output in case of 0/0 * @return fpr as a double */ public double falsePositiveRate(int classLabel, double edgeCase) { double fpCount = falsePositives.getCount(classLabel); double tnCount = trueNegatives.getCount(classLabel); return EvaluationUtils.falsePositiveRate((long) fpCount, (long) tnCount, edgeCase); }
/** * Returns the precision for a given label * * @param classLabel the label * @param edgeCase What to output in case of 0/0 * @return the precision for the label */ public double precision(Integer classLabel, double edgeCase) { double tpCount = truePositives.getCount(classLabel); double fpCount = falsePositives.getCount(classLabel); return EvaluationUtils.precision((long) tpCount, (long) fpCount, edgeCase); }
/** * Returns the recall for a given label * * @param classLabel the label * @param edgeCase What to output in case of 0/0 * @return Recall rate as a double */ public double recall(int classLabel, double edgeCase) { double tpCount = truePositives.getCount(classLabel); double fnCount = falseNegatives.getCount(classLabel); return EvaluationUtils.recall((long) tpCount, (long) fnCount, edgeCase); }
private double sumOverTokens(Counter<String> counter, String column, double totalCount) { StringTokenizer tokenizer = new StringTokenizer(column); double count = 0; while (tokenizer.hasMoreTokens()) count += Math.log(counter.getCount(column) / totalCount); return count; }
/** * This method returns probability of given element * * @param element * @return */ public double getProbability(T element) { if (totalCount() <= 0.0) throw new IllegalStateException("Can't calculate probability with empty counter"); return getCount(element) / totalCount(); }
/** * This method will increment counts of this counter by counts from other counter * @param other */ public <T2 extends T> void incrementAll(Counter<T2> other) { for (T2 element: other.keySet()) { double cnt = other.getCount(element); incrementCount(element, cnt); } }
@Override public void incrementCount(String word, double by) { wordFrequencies.incrementCount(word, by); if (wordFrequencies.getCount(word) >= minWordFrequency && vocabWords.indexOf(word) < 0) vocabWords.add(word); }
/** * This method will apply normalization to counter values and totals. */ public void normalize() { for (T key : keySet()) { setCount(key, getCount(key) / totalCount.get()); } rebuildTotals(); }
protected void rebuildTotals() { totalCount.set(0); for (T key : keySet()) { totalCount.addAndGet(getCount(key)); } dirty.set(false); }
@Override public INDArray createVector(Object[] args) { Counter<String> docFrequencies = (Counter<String>) args[0]; double[] vector = new double[cache.vocabWords().size()]; for (int i = 0; i < cache.vocabWords().size(); i++) { double freq = docFrequencies.getCount(cache.wordAt(i)); vector[i] = cache.tfidf(cache.wordAt(i), freq); } return Nd4j.create(vector); }