edu.stanford.nlp.stats.Counters java code examples

/**
 * L2 normalize a counter in place.
 *
 * @param c The {@link Counter} to be L2 normalized. This counter is modified
 * @return the passed in counter l2-normalized
 */
public static <E> Counter<E> L2NormalizeInPlace(Counter<E> c) {
 return multiplyInPlace(c, 1.0 / L2Norm(c));
}

public ClassicCounter<OUT> lastPrecision() {
 ClassicCounter<OUT> result = new ClassicCounter<>();
 Counters.addInPlace(result, previousGuessedCorrect);
 Counters.divideInPlace(result, previousGuessed);
 return result;
}

public static <E> String toBiggestValuesFirstString(Counter<E> c) {
 return toPriorityQueue(c).toString();
}

/**
 * L2 normalize a counter.
 *
 * @param c The {@link Counter} to be L2 normalized. This counter is not
 *          modified.
 * @return A new l2-normalized Counter based on c.
 */
public static <E, C extends Counter<E>> C L2Normalize(C c) {
 return scale(c, 1.0 / L2Norm(c));
}

@Override
public E argmax() {
 return Counters.argmax(Counters.linearCombination(this.counter, 1.0, prior.counter, priorMultiplier));
}

Counters.removeKeys(patternsandWords4Label, removePats);
Counters.removeKeys(unLabeledPatternsandWords4Label, removePats);
Counters.removeKeys(negPatternsandWords4Label, removePats);
   unLabeledPatternsandWords4Label, props, p0Set, p0);
 Counter<E> finalPat = scorePatterns.score();
 Counters.removeKeys(finalPat, alreadyIdentifiedPatterns);
 Counters.retainNonZeros(finalPat);
 Counters.retainTop(finalPat, constVars.numPatterns);
 if (Double.isNaN(Counters.max(finalPat)))
  throw new RuntimeException("how is the value NaN");
 Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Patterns: " + finalPat);
 Counters.removeKeys(currentPatternWeights4Label, ignorePatterns);
 Redwood.log(ConstantsAndVariables.extremedebug, "Removing patterns from ignorePatterns of size  " + ignorePatterns.size()
   + ". New patterns size " + currentPatternWeights4Label.size());
  Counters.removeKeys(currentPatternWeights4Label, alreadyIdentifiedPatterns);
 Redwood.log(ConstantsAndVariables.extremedebug, "Removing already identified patterns of size  " + alreadyIdentifiedPatterns.size()
   + ". New patterns size " + currentPatternWeights4Label.size());
PriorityQueue<E> q = Counters.toPriorityQueue(currentPatternWeights4Label);
int num = 0;
  Redwood.log(ConstantsAndVariables.extremedebug, "Removing already chosen patterns in this iteration " + removeChosenPats + " in favor of "
    + pat);
  Counters.removeKeys(chosenPat, removeChosenPats);
List<Pair<E, Double>> chosenPatSorted = Counters.toSortedListWithCounts(chosenPat);

public static<E> double pearsonsCorrelationCoefficient(Counter<E> x, Counter<E> y){
 double stddevX = Counters.standardDeviation(x);
 double stddevY = Counters.standardDeviation(y);
 double meanX = Counters.mean(x);
 double meanY = Counters.mean(y);
 Counter<E> t1 = Counters.add(x, -meanX);
 Counter<E> t2 = Counters.add(y, -meanY);
 Counters.divideInPlace(t1, stddevX);
 Counters.divideInPlace(t2, stddevY);
 return Counters.dotProduct(t1, t2)/ (double)(x.size() -1);
}

/**
 * Calculates the Jensen-Shannon divergence between the two counters. That is,
 * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] .
 * This code assumes that the Counters have only non-negative values in them.
 *
 * @return The Jensen-Shannon divergence between the distributions
 */
public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) {
 // need to normalize the counters first before averaging them! Else buggy if not a probability distribution
 Counter<E> d1 = asNormalizedCounter(c1);
 Counter<E> d2 = asNormalizedCounter(c2);
 Counter<E> average = average(d1, d2);
 double kl1 = klDivergence(d1, average);
 double kl2 = klDivergence(d2, average);
 return (kl1 + kl2) / 2.0;
}

/** {@inheritDoc} */
@Override
public void addAll(Counter<E> counter) {
 Counters.addInPlace(this, counter);
}

public String toString(NumberFormat nf) {
 return Counters.toString(counter, nf);
}

/**
 * Takes all the support vectors, and their corresponding alphas, and computes a weight
 * vector that can be used in a vanilla LinearClassifier.  This only works because
 * we are using a linear kernel.  The Counter is over the feature indices (+1 cos for
 * some reason svm_light is 1-indexed), not features.
 */
private static ClassicCounter<Integer> getWeights(List<Pair<Double, ClassicCounter<Integer>>> supportVectors) {
 ClassicCounter<Integer> weights = new ClassicCounter<>();
 for (Pair<Double, ClassicCounter<Integer>> sv : supportVectors) {
  ClassicCounter<Integer> c = new ClassicCounter<>(sv.second());
  Counters.multiplyInPlace(c, sv.first());
  Counters.addInPlace(weights, c);
 }
 return weights;
}

public static boolean updateBest(Counter<String> newWts, double obj, boolean force) {
 boolean nonZero = Counters.L2Norm(newWts) > 0.0;
 synchronized (MERT.class) {
  boolean better = false;
  if (bestObj > obj) {
   System.err.printf("\n<<<IMPROVED BEST: %f -> %f with {{{%s}}}.>>>\n",
     -bestObj, -obj, Counters.toString(newWts, 100));
   better = true;
  } else if (bestObj == obj && breakTiesWithLastBest) {
   System.err.printf("\n<<<SAME BEST: %f with {{{%s}}}.>>>\n", -bestObj,
     Counters.toString(newWts, 100));
   better = true;
  }
  if (force) {
   System.err.printf("\n<<<FORCED BEST UPDATE: %f -> %f>>>\n", -bestObj,
     -obj);
  }
  if ((better && nonZero) || force) {
   bestWts = newWts;
   bestObj = obj;
   return true;
  }
  return false;
 }
}

@Override
@SuppressWarnings("unchecked")
public boolean equals(Object o) {
 if (this == o) {
  return true;
 } else if (!(o instanceof Counter)) {
  return false;
 } else {
  return Counters.equals(this, (Counter<E>) o);
 }
}

/**
 * Finds and returns the key in the Counter with the largest count. Returning
 * null if count is empty.
 *
 * @param c The Counter
 * @return The key in the Counter with the largest count.
 */
public static <E> E argmax(Counter<E> c) {
 return argmax(c, (x, y) -> 0, null);
}

/**
 * Mostly just an alias, but make sure our featurizer is serializable!
 */
public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable {
 boolean isSimpleSplit(Counter<String> feats);
}

/**
 * Calculates the Jensen-Shannon divergence between the two counters.
 * That is, it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] .
 *
 * @return The Jensen-Shannon divergence between the distributions
 */
public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) {
 Counter<E> average = average(c1, c2);
 double kl1 = klDivergence(c1, average);
 double kl2 = klDivergence(c2, average);
 return (kl1 + kl2) / 2.0;
}

  tfidfScores.setCount(en.getKey(), score);
 Redwood.log(ConstantsAndVariables.extremedebug, "BEFORE IDF " + Counters.toSortedString(tfidfScores, 100, "%1$s:%2$f", "\t"));
 Counters.divideInPlace(tfidfScores, Data.processedDataFreq);
for (Entry<CandidatePhrase, Counter<ScorePhraseMeasures>> wEn : scores
  .entrySet()) {
 Double avgScore = Counters.mean(wEn.getValue());
 if(!avgScore.isInfinite() && !avgScore.isNaN())
  phraseScores.setCount(wEn.getKey(), avgScore);

 throw new RuntimeException("Cannot understand patterns scoring");
currentPatternWeights4Label = Counters.divisionNonNaN(numeratorPatWt,
  denominatorPatWt);
  logpos_i.setCount(en.getKey(), Math.log(en.getValue().size()));
 Counters.multiplyInPlace(currentPatternWeights4Label, logpos_i);
Counters.retainNonZeros(currentPatternWeights4Label);
return currentPatternWeights4Label;

/**
 * Normalizes the target counter in-place, so the sum of the resulting values
 * equals 1.
 *
 * @param <E> Type of elements in Counter
 */
public static <E> void normalize(Counter<E> target) {
 multiplyInPlace(target, 1.0 / target.totalCount());
}

/**
 * Normalizes the target counter in-place, so the sum of the resulting values
 * equals 1.
 *
 * @param <E> Type of elements in TwoDimensionalCounter
 * @param <F> Type of elements in TwoDimensionalCounter
 */
public static <E, F> void normalize(TwoDimensionalCounter<E, F> target) {
 Counters.divideInPlace(target, target.totalCount());
}

Javadoc

Static methods for operating on a Counter. All methods that change their arguments change the first argument (only), and have "InPlace" in their name. This class also provides access to Comparators that can be used to sort the keys or entries of this Counter by the counts, in either ascending or descending order.

Most used methods

L2Norm
Return the l2 norm (Euclidean vector length) of a Counter.Implementation note: The method name favo
addInPlace
Sets each value of double[] target to be target[idx.indexOf(k)]+a.getCount(k) for all keys k in arg
multiplyInPlace
Multiplies each value in target by the count of the key in mult, in place. Returns non zero entries
toPriorityQueue
Returns a edu.stanford.nlp.util.PriorityQueue whose elements are the keys of Counter c, and the scor
toString
Pretty print a Counter. This one has more flexibility in formatting, and doesn't sort the keys.
argmax
Finds and returns the key in the Counter with the largest count. Returning null if count is empty.
average
Returns a new Counter with counts averaged from the two given Counters. The average Counter will con
divideInPlace
equals
Equality comparison between two counters, allowing for a tolerance fudge factor.
fromMap
Returns a counter view of the given map. The type parameter is the type of the values in the map, wh
keysBelow
Returns the set of keys whose counts are at or below the given threshold. This set may have 0 elemen
klDivergence
Calculates the KL divergence between the two counters. That is, it calculates KL(from || to). This m

Popular in Java

Reactive rest calls using spring rest template
setRequestProperty (URLConnection)
requestLocationUpdates (LocationManager)
addToBackStack (FragmentTransaction)
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Option (scala)
Best plugins for Eclipse

How to useCounters in edu.stanford.nlp.stats

Best Java code snippets using edu.stanford.nlp.stats.Counters (Showing top 20 results out of 315)

How to use
Counters
in
edu.stanford.nlp.stats