@Override public int size() { return counter.size(); }
public int size() { return counter.size(); }
/** * Returns the mean of all the counts (totalCount/size). * * @param c The Counter to find the mean of. * @return The mean of all the counts (totalCount/size). */ public static <E> double mean(Counter<E> c) { return c.totalCount() / c.size(); }
public CompressedFeatureVector compress(Counter<K> c) { List<Integer> keys = new ArrayList<>(c.size()); List<Double> values = new ArrayList<>(c.size()); for (Map.Entry<K, Double> e : c.entrySet()) { K key = e.getKey(); Integer id = index.get(key); if (id == null) { id = index.size(); inverse.put(id, key); index.put(key, id); } keys.add(id); values.add(e.getValue()); } return new CompressedFeatureVector(keys, values); }
/** * Removes all entries from c except for the top {@code num}. */ public static <E extends Comparable<E>> void retainTopKeyComparable(Counter<E> c, int num) { int numToPurge = c.size() - num; if (numToPurge <= 0) { return; } List<E> l = Counters.toSortedListKeyComparable(c); Collections.reverse(l); for (int i = 0; i < numToPurge; i++) { c.remove(l.get(i)); } }
private static int[][] countCounts2IntArrays(Counter<Integer> countCounts) { int size = countCounts.size(); int[][] arrays = new int[2][]; arrays[0] = new int[size]; // counts arrays[1] = new int[size]; // count counts PriorityQueue<Integer> q = new PriorityQueue<>(countCounts.keySet()); int i = 0; while (!q.isEmpty()) { Integer count = q.poll(); Integer countCount = (int) Math.round(countCounts.getCount(count)); arrays[0][i] = count; arrays[1][i] = countCount; i++; } return arrays; }
/** * Removes all entries from c except for the top {@code num}. */ public static <E> void retainTop(Counter<E> c, int num) { int numToPurge = c.size() - num; if (numToPurge <= 0) { return; } List<E> l = Counters.toSortedList(c, true); for (int i = 0; i < numToPurge; i++) { c.remove(l.get(i)); } }
public double precisionMacro() { double sumPrecision = 0.0; for (String rel : totalCount.keySet()) { sumPrecision += precision(rel); } return sumPrecision / ((double) totalCount.size()); }
/** * A List of the keys in c, sorted from highest count to lowest, paired with * counts * * @return A List of the keys in c, sorted from highest count to lowest. */ public static <E> List<Pair<E, Double>> toSortedListWithCounts(Counter<E> c) { List<Pair<E, Double>> l = new ArrayList<>(c.size()); for (E e : c.keySet()) { l.add(new Pair<>(e, c.getCount(e))); } // descending order Collections.sort(l, (a, b) -> Double.compare(b.second, a.second)); return l; }
public static void loadDomainNGrams() { assert(domainNGramsFile != null); if (domainNGramRawFreq == null || domainNGramRawFreq.size() == 0) { for (String line : IOUtils.readLines(domainNGramsFile)) { String[] t = line.split("\t"); domainNGramRawFreq.setCount(t[0], Double.valueOf(t[1])); } Redwood.log(ConstantsAndVariables.minimaldebug, "Data", "loading freq from domain ngram file " + domainNGramsFile); } } }
public double recallMacro() { double sumRecall = 0.0; for (String rel : totalCount.keySet()) { sumRecall += recall(rel); } return sumRecall / ((double) totalCount.size()); }
/** * A List of the keys in c, sorted by the given comparator, paired with * counts. * * @return A List of the keys in c, sorted from highest count to lowest. */ public static <E> List<Pair<E, Double>> toSortedListWithCounts(Counter<E> c, Comparator<Pair<E,Double>> comparator) { List<Pair<E, Double>> l = new ArrayList<>(c.size()); for (E e : c.keySet()) { l.add(new Pair<>(e, c.getCount(e))); } // descending order Collections.sort(l, comparator); return l; }
/** * Convert a counter to an array, the order of the array is random */ public static <E> double[] asArray(Counter<E> counter) { Set<E> keys = counter.keySet(); double[] array = new double[counter.size()]; int i = 0; for (E key : keys) { array[i] = counter.getCount(key); i++; } return array; }
/** * Removes all entries from c except for the bottom {@code num}. */ public static <E> List<E> retainBottom(Counter<E> c, int num) { int numToPurge = c.size() - num; if (numToPurge <= 0) { return Generics.newArrayList(); } List<E> removed = new ArrayList<>(); List<E> l = Counters.toSortedList(c); for (int i = 0; i < numToPurge; i++) { E rem = l.get(i); removed.add(rem); c.remove(rem); } return removed; }
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
public static <E> Distribution<E> getPerturbedDistribution(Counter<E> wordCounter, Random r) { Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>(); norm.numberOfKeys = wordCounter.size(); norm.reservedMass = 0; double totalCount = wordCounter.totalCount(); double stdev = 1.0 / norm.numberOfKeys / 1000.0; // tiny relative to average value for (E key : wordCounter.keySet()) { double prob = wordCounter.getCount(key) / totalCount; double perturbedProb = prob + (r.nextGaussian() * stdev); if (perturbedProb < 0.0) { perturbedProb = 0.0; } norm.counter.setCount(key, perturbedProb); } return norm; }
/** Construct a counter with keys the labels of the classifier and * values the score (unnormalized log probability) of each class * for an RVFDatum. */ private Counter<L> scoresOfRVFDatum(RVFDatum<L, F> example) { Counter<L> scores = new ClassicCounter<>(); // Index the features in the datum Counter<F> asCounter = example.asFeaturesCounter(); Counter<Integer> asIndexedCounter = new ClassicCounter<>(asCounter.size()); for (Map.Entry<F, Double> entry : asCounter.entrySet()) { asIndexedCounter.setCount(featureIndex.indexOf(entry.getKey()), entry.getValue()); } // Set the scores appropriately for (L l : labels()) { scores.setCount(l, scoreOfRVFDatum(asIndexedCounter, l)); } //System.out.println("Scores are: " + scores + " (gold: " + example.label() + ")"); return scores; }
public static <E> Distribution<E> getDistributionWithReservedMass(Counter<E> counter, double reservedMass) { Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>(); norm.numberOfKeys = counter.size(); norm.reservedMass = reservedMass; double total = counter.totalCount() * (1 + reservedMass); if (total == 0.0) { total = 1.0; } for (E key : counter.keySet()) { double count = counter.getCount(key) / total; // if (Double.isNaN(count) || count < 0.0 || count> 1.0 ) throw new RuntimeException("count=" + counter.getCount(key) + " total=" + total); norm.counter.setCount(key, count); } return norm; }
public static<E> double pearsonsCorrelationCoefficient(Counter<E> x, Counter<E> y){ double stddevX = Counters.standardDeviation(x); double stddevY = Counters.standardDeviation(y); double meanX = Counters.mean(x); double meanY = Counters.mean(y); Counter<E> t1 = Counters.add(x, -meanX); Counter<E> t2 = Counters.add(y, -meanY); Counters.divideInPlace(t1, stddevX); Counters.divideInPlace(t2, stddevY); return Counters.dotProduct(t1, t2)/ (double)(x.size() -1); }