/** * L2 normalize a counter in place. * * @param c The {@link Counter} to be L2 normalized. This counter is modified * @return the passed in counter l2-normalized */ public static <E> Counter<E> L2NormalizeInPlace(Counter<E> c) { return multiplyInPlace(c, 1.0 / L2Norm(c)); }
public ClassicCounter<OUT> lastPrecision() { ClassicCounter<OUT> result = new ClassicCounter<>(); Counters.addInPlace(result, previousGuessedCorrect); Counters.divideInPlace(result, previousGuessed); return result; }
public static <E> String toBiggestValuesFirstString(Counter<E> c) { return toPriorityQueue(c).toString(); }
@Override public E argmax() { return Counters.argmax(Counters.linearCombination(this.counter, 1.0, prior.counter, priorMultiplier)); }
Counters.removeKeys(patternsandWords4Label, removePats); Counters.removeKeys(unLabeledPatternsandWords4Label, removePats); Counters.removeKeys(negPatternsandWords4Label, removePats); unLabeledPatternsandWords4Label, props, p0Set, p0); Counter<E> finalPat = scorePatterns.score(); Counters.removeKeys(finalPat, alreadyIdentifiedPatterns); Counters.retainNonZeros(finalPat); Counters.retainTop(finalPat, constVars.numPatterns); if (Double.isNaN(Counters.max(finalPat))) throw new RuntimeException("how is the value NaN"); Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Patterns: " + finalPat); Counters.removeKeys(currentPatternWeights4Label, ignorePatterns); Redwood.log(ConstantsAndVariables.extremedebug, "Removing patterns from ignorePatterns of size " + ignorePatterns.size() + ". New patterns size " + currentPatternWeights4Label.size()); Counters.removeKeys(currentPatternWeights4Label, alreadyIdentifiedPatterns); Redwood.log(ConstantsAndVariables.extremedebug, "Removing already identified patterns of size " + alreadyIdentifiedPatterns.size() + ". New patterns size " + currentPatternWeights4Label.size()); PriorityQueue<E> q = Counters.toPriorityQueue(currentPatternWeights4Label); int num = 0; Redwood.log(ConstantsAndVariables.extremedebug, "Removing already chosen patterns in this iteration " + removeChosenPats + " in favor of " + pat); Counters.removeKeys(chosenPat, removeChosenPats); List<Pair<E, Double>> chosenPatSorted = Counters.toSortedListWithCounts(chosenPat);
public static<E> double pearsonsCorrelationCoefficient(Counter<E> x, Counter<E> y){ double stddevX = Counters.standardDeviation(x); double stddevY = Counters.standardDeviation(y); double meanX = Counters.mean(x); double meanY = Counters.mean(y); Counter<E> t1 = Counters.add(x, -meanX); Counter<E> t2 = Counters.add(y, -meanY); Counters.divideInPlace(t1, stddevX); Counters.divideInPlace(t2, stddevY); return Counters.dotProduct(t1, t2)/ (double)(x.size() -1); }
/** * Calculates the Jensen-Shannon divergence between the two counters. That is, * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * This code assumes that the Counters have only non-negative values in them. * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { // need to normalize the counters first before averaging them! Else buggy if not a probability distribution Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = average(d1, d2); double kl1 = klDivergence(d1, average); double kl2 = klDivergence(d2, average); return (kl1 + kl2) / 2.0; }
/** {@inheritDoc} */ @Override public void addAll(Counter<E> counter) { Counters.addInPlace(this, counter); }
public String toString(NumberFormat nf) { return Counters.toString(counter, nf); }
/** * Takes all the support vectors, and their corresponding alphas, and computes a weight * vector that can be used in a vanilla LinearClassifier. This only works because * we are using a linear kernel. The Counter is over the feature indices (+1 cos for * some reason svm_light is 1-indexed), not features. */ private static ClassicCounter<Integer> getWeights(List<Pair<Double, ClassicCounter<Integer>>> supportVectors) { ClassicCounter<Integer> weights = new ClassicCounter<>(); for (Pair<Double, ClassicCounter<Integer>> sv : supportVectors) { ClassicCounter<Integer> c = new ClassicCounter<>(sv.second()); Counters.multiplyInPlace(c, sv.first()); Counters.addInPlace(weights, c); } return weights; }
public static boolean updateBest(Counter<String> newWts, double obj, boolean force) { boolean nonZero = Counters.L2Norm(newWts) > 0.0; synchronized (MERT.class) { boolean better = false; if (bestObj > obj) { System.err.printf("\n<<<IMPROVED BEST: %f -> %f with {{{%s}}}.>>>\n", -bestObj, -obj, Counters.toString(newWts, 100)); better = true; } else if (bestObj == obj && breakTiesWithLastBest) { System.err.printf("\n<<<SAME BEST: %f with {{{%s}}}.>>>\n", -bestObj, Counters.toString(newWts, 100)); better = true; } if (force) { System.err.printf("\n<<<FORCED BEST UPDATE: %f -> %f>>>\n", -bestObj, -obj); } if ((better && nonZero) || force) { bestWts = newWts; bestObj = obj; return true; } return false; } }
@Override @SuppressWarnings("unchecked") public boolean equals(Object o) { if (this == o) { return true; } else if (!(o instanceof Counter)) { return false; } else { return Counters.equals(this, (Counter<E>) o); } }
/** * Finds and returns the key in the Counter with the largest count. Returning * null if count is empty. * * @param c The Counter * @return The key in the Counter with the largest count. */ public static <E> E argmax(Counter<E> c) { return argmax(c, (x, y) -> 0, null); }
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
/** * Calculates the Jensen-Shannon divergence between the two counters. * That is, it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { Counter<E> average = average(c1, c2); double kl1 = klDivergence(c1, average); double kl2 = klDivergence(c2, average); return (kl1 + kl2) / 2.0; }
tfidfScores.setCount(en.getKey(), score); Redwood.log(ConstantsAndVariables.extremedebug, "BEFORE IDF " + Counters.toSortedString(tfidfScores, 100, "%1$s:%2$f", "\t")); Counters.divideInPlace(tfidfScores, Data.processedDataFreq); for (Entry<CandidatePhrase, Counter<ScorePhraseMeasures>> wEn : scores .entrySet()) { Double avgScore = Counters.mean(wEn.getValue()); if(!avgScore.isInfinite() && !avgScore.isNaN()) phraseScores.setCount(wEn.getKey(), avgScore);
throw new RuntimeException("Cannot understand patterns scoring"); currentPatternWeights4Label = Counters.divisionNonNaN(numeratorPatWt, denominatorPatWt); logpos_i.setCount(en.getKey(), Math.log(en.getValue().size())); Counters.multiplyInPlace(currentPatternWeights4Label, logpos_i); Counters.retainNonZeros(currentPatternWeights4Label); return currentPatternWeights4Label;
/** * Normalizes the target counter in-place, so the sum of the resulting values * equals 1. * * @param <E> Type of elements in Counter */ public static <E> void normalize(Counter<E> target) { multiplyInPlace(target, 1.0 / target.totalCount()); }
/** * Normalizes the target counter in-place, so the sum of the resulting values * equals 1. * * @param <E> Type of elements in TwoDimensionalCounter * @param <F> Type of elements in TwoDimensionalCounter */ public static <E, F> void normalize(TwoDimensionalCounter<E, F> target) { Counters.divideInPlace(target, target.totalCount()); }