public ConditionalFrequencyDistribution(Map<C,Iterable<V>> samples) { this(); for (C t : samples.keySet()) { addSamples(t, samples.get(t)); } }
@Override public long getFrequency(String phrase) throws Exception { int phraseLength = CoreUtils.getPhraseLength(phrase); if (cfd.hasCondition(phraseLength)) { return cfd.getCount(phraseLength, phrase); } else { return 0; } }
@Test public void cfdTest() { String condition1 = "text1"; String condition2 = "text2"; List<String> tokens1 = Arrays.asList("This is a first test that contains a first test example".split(" ")); List<String> tokens2 = Arrays.asList("This second example contains other example tokens".split(" ")); ConditionalFrequencyDistribution<String, String> cfd = new ConditionalFrequencyDistribution<String, String>(); cfd.addSamples(condition1, tokens1); cfd.addSamples(condition2, tokens2); System.out.println(cfd); assertEquals(2, cfd.getConditions().size()); for (String condition : cfd.getConditions()) { assertTrue(condition.equals(condition1) || condition.equals(condition2)); } assertEquals(18, cfd.getN()); assertEquals(0, cfd.getCount(condition1, "humpelgrumpf")); assertEquals(1, cfd.getCount(condition1, "This")); assertEquals(1, cfd.getCount(condition2, "This")); assertEquals(2, cfd.getCount(condition1, "test")); assertEquals(2, cfd.getCount(condition2, "example")); } }
public BrownProvider(int minN, int maxN) throws Exception { BrownCorpus brownCorpus = new BrownCorpus(); cfd = new ConditionalFrequencyDistribution<Integer,String>(); if (minN > maxN) { throw new IllegalArgumentException("minN > maxN"); } for (int i=minN; i<=maxN; i++) { for (Sentence s : brownCorpus.getSentences()) { cfd.addSamples( i, new NGramIterable(s.getTokens(), i, i) ); } } }
@Override public double getProbability(String phrase) throws Exception { long count = getFrequency(phrase); long N = cfd.getN(); if (N == 0) { return 0; } else { return (double) count / N; } }
@Override public long getNrOfTokens() throws Exception { return cfd.getFrequencyDistribution(1).getN(); } }
public void addSample(C t, V sample) { List<V> samples = new ArrayList<V>(); samples.add(sample); addSamples(t, samples); }