validateCounter(counter); int numUnseen = numberOfKeys - counter.size(); if (numUnseen < 1) int[][] cc = countCounts2IntArrays(collectCountCounts(counter)); int[] r = cc[0]; // counts int[] n = cc[1]; // counts of counts Distribution<E> dist = new Distribution<>(); dist.counter = new ClassicCounter<>(); for (Map.Entry<E, Double> entry : counter.entrySet()) {
@Override public void addToKeySet(E o) { prior.addToKeySet(o); }
@Override public boolean containsKey(E key) { return prior.containsKey(key); }
/** * Assuming that c has a total count < 1, returns a new Distribution using the counts in c as probabilities. * If c has a total count > 1, returns a normalized distribution with no remaining mass. */ public static <E> Distribution<E> getDistributionFromPartiallySpecifiedCounter(Counter<E> c, int numKeys){ Distribution<E> d; double total = c.totalCount(); if (total >= 1.0){ d = getDistribution(c); d.numberOfKeys = numKeys; } else { d = new Distribution<>(); d.numberOfKeys = numKeys; d.counter = c; d.reservedMass = 1.0 - total; } return d; } //--- end JM added
Distribution<String> n = getDistribution(c); Distribution<String> prior = getUniformDistribution(s); Distribution<String> dir1 = distributionWithDirichletPrior(c, prior, 4000); Distribution<String> dir2 = dynamicCounterWithDirichletPrior(c, prior, 4000); Distribution<String> add1; Distribution<String> gt; if (true) { add1 = laplaceSmoothedDistribution(c, 4000); gt = goodTuringSmoothedCounter(c, 4000); } else { c.setCount(UNK, 45); add1 = laplaceWithExplicitUnknown(c, 0.5, UNK); gt = goodTuringWithExplicitUnknown(c, UNK); Distribution<String> sgt = simpleGoodTuring(c, 4000); System.out.printf("%10d ", Math.round(p / i)); String in = String.valueOf(i); System.out.printf("%10.8f ", n.probabilityOf(String.valueOf(in))); System.out.printf("%10.8f ", add1.probabilityOf(in)); System.out.printf("%10.8f ", dir1.probabilityOf(in)); System.out.printf("%10.8f ", dir2.probabilityOf(in)); System.out.printf("%10.8f ", gt.probabilityOf(in)); System.out.printf("%10.8f ", sgt.probabilityOf(in)); System.out.println(); System.out.printf("%10d ", 1); String last = String.valueOf(1500); System.out.printf("%10.8f ", n.probabilityOf(last));
int[] countCounts = getCountCounts(counter); return laplaceSmoothedDistribution(counter, numberOfKeys, 0.5); Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>();
POSDistribution = Distribution.getDistribution(POSCounter); Timing.tick("Creating character prior distribution..."); Distribution<Symbol> prior = Distribution.goodTuringSmoothedCounter(charCounter, numberOfKeys); charDistributions.put(Collections.EMPTY_LIST, prior); ClassicCounter<Symbol> c = entry.getValue(); Distribution<Symbol> thisPrior = charDistributions.get(context.subList(0, context.size() - 1)); double priorWeight = thisPrior.getNumberOfKeys() / 200.0; Distribution<Symbol> newDist = Distribution.dynamicCounterWithDirichletPrior(c, thisPrior, priorWeight); charDistributions.put(context, newDist);
int[] countCounts = getCountCounts(counter); return laplaceWithExplicitUnknown(counter, 0.5, UNK); Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>();
/** * @param s a Collection of keys. */ public static <E> Distribution<E> getPerturbedUniformDistribution(Collection<E> s, Random r) { Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>(); norm.numberOfKeys = s.size(); norm.reservedMass = 0; double total = s.size(); double prob = 1.0 / total; double stdev = prob / 1000.0; for (E key : s) { norm.counter.setCount(key, prob + (r.nextGaussian() * stdev)); } return norm; }
private Distribution<Integer> getWordLengthDistribution() { int samples = 0; ClassicCounter<Integer> c = new ClassicCounter<>(); while (samples++ < 10000) { String s = sampleFrom(); c.incrementCount(Integer.valueOf(s.length())); if (samples % 1000 == 0) { System.out.print("."); } } System.out.println(); Distribution<Integer> genWordLengthDist = Distribution.getDistribution(c); return genWordLengthDist; }
@Override @SuppressWarnings("unchecked") public boolean equals(Object o) { if (this == o) { return true; } return o instanceof Distribution && equals((Distribution) o); }
singletonWordPOSes.incrementCount(taggedWord.tag()); Distribution<String> singletonWordPOSDist = Distribution.getDistribution(singletonWordPOSes); singletonCharRads.incrementCount(Character.valueOf(RadicalMap.getRadical(s.getCh()))); Distribution<Character> singletonCharRadDist = Distribution.getDistribution(singletonCharRads); Distribution<Integer> wordLengthDist = Distribution.getDistribution(wordLengthCounter); pw.println(); pw.println("Distribution over singleton word POS:"); pw.println(singletonWordPOSDist.toString()); pw.println(); pw.println("Distribution over singleton char radicals:"); pw.println(singletonCharRadDist.toString()); pw.println(); pw.println("Distribution over word length:");
Distribution<String> n = getDistribution(c); Distribution<String> prior = getUniformDistribution(s); Distribution<String> dir1 = distributionWithDirichletPrior(c, prior, 4000); Distribution<String> dir2 = dynamicCounterWithDirichletPrior(c, prior, 4000); Distribution<String> add1; Distribution<String> gt; if (true) { add1 = laplaceSmoothedDistribution(c, 4000); gt = goodTuringSmoothedCounter(c, 4000); } else { c.setCount(UNK, 45); add1 = laplaceWithExplicitUnknown(c, 0.5, UNK); gt = goodTuringWithExplicitUnknown(c, UNK); Distribution<String> sgt = simpleGoodTuring(c, 4000); System.out.printf("%10d ", Math.round(p / i)); String in = String.valueOf(i); System.out.printf("%10.8f ", n.probabilityOf(String.valueOf(in))); System.out.printf("%10.8f ", add1.probabilityOf(in)); System.out.printf("%10.8f ", dir1.probabilityOf(in)); System.out.printf("%10.8f ", dir2.probabilityOf(in)); System.out.printf("%10.8f ", gt.probabilityOf(in)); System.out.printf("%10.8f ", sgt.probabilityOf(in)); System.out.println(); System.out.printf("%10d ", 1); String last = String.valueOf(1500); System.out.printf("%10.8f ", n.probabilityOf(last));
POSDistribution = Distribution.getDistribution(POSCounter); Timing.tick("Creating character prior distribution..."); Distribution<Symbol> prior = Distribution.goodTuringSmoothedCounter(charCounter, numberOfKeys); charDistributions.put(Collections.EMPTY_LIST, prior); ClassicCounter<Symbol> c = entry.getValue(); Distribution<Symbol> thisPrior = charDistributions.get(context.subList(0, context.size() - 1)); double priorWeight = thisPrior.getNumberOfKeys() / 200.0; Distribution<Symbol> newDist = Distribution.dynamicCounterWithDirichletPrior(c, thisPrior, priorWeight); charDistributions.put(context, newDist);
int[] countCounts = getCountCounts(counter); return laplaceSmoothedDistribution(counter, numberOfKeys, 0.5); Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>();
int[] countCounts = getCountCounts(counter); return laplaceWithExplicitUnknown(counter, 0.5, UNK); Distribution<E> norm = new Distribution<E>(); norm.counter = new ClassicCounter<E>();
/** * @param s a Collection of keys. */ public static <E> Distribution<E> getUniformDistribution(Collection<E> s) { Distribution<E> norm = new Distribution<>(); norm.counter = new ClassicCounter<>(); norm.numberOfKeys = s.size(); norm.reservedMass = 0; double total = s.size(); double count = 1.0 / total; for (E key : s) { norm.counter.setCount(key, count); } return norm; }
/** * Assuming that c has a total count < 1, returns a new Distribution using the counts in c as probabilities. * If c has a total count > 1, returns a normalized distribution with no remaining mass. */ public static <E> Distribution<E> getDistributionFromPartiallySpecifiedCounter(Counter<E> c, int numKeys){ Distribution<E> d; double total = c.totalCount(); if (total >= 1.0){ d = getDistribution(c); d.numberOfKeys = numKeys; } else { d = new Distribution<E>(); d.numberOfKeys = numKeys; d.counter = c; d.reservedMass = 1.0 - total; } return d; } //--- end JM added
private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) { // CharacterLevelTagExtender ext = new CharacterLevelTagExtender(); ClassicCounter<Integer> c = new ClassicCounter<>(); for (Tree gold : tb) { StringBuilder goldChars = new StringBuilder(); ArrayList goldYield = gold.yield(); for (Object aGoldYield : goldYield) { Word word = (Word) aGoldYield; goldChars.append(word); } List<HasWord> ourWords = segment(goldChars.toString()); for (HasWord ourWord : ourWords) { c.incrementCount(Integer.valueOf(ourWord.word().length())); } } return Distribution.getDistribution(c); }
@Override @SuppressWarnings("unchecked") public boolean equals(Object o) { if (this == o) { return true; } return o instanceof Distribution && equals((Distribution) o); }