/** Accumulate statistics on how often each token value occurs */ public void train(StringWrapperIterator i) { Set seenTokens = new HashSet(); while (i.hasNext()) { BagOfTokens bag = asBagOfTokens(i.nextStringWrapper()); seenTokens.clear(); for (Iterator j=bag.tokenIterator(); j.hasNext(); ) { totalTokenCount++; Token tokj = (Token)j.next(); if (!seenTokens.contains(tokj)) { seenTokens.add(tokj); // increment documentFrequency counts Integer df = (Integer)documentFrequency.get(tokj); if (df==null) documentFrequency.put(tokj,ONE); else if (df==ONE) documentFrequency.put(tokj,TWO); else if (df==TWO) documentFrequency.put(tokj,THREE); else documentFrequency.put(tokj, new Integer(df.intValue()+1)); } } collectionSize++; } }
/** Accumulate statistics on how often each token value occurs */ public void train(StringWrapperIterator i) { Set<Token> seenTokens = new HashSet<Token>(); while (i.hasNext()) { BagOfTokens bag = asBagOfTokens(i.nextStringWrapper()); seenTokens.clear(); for (Iterator<Token> j=bag.tokenIterator(); j.hasNext(); ) { totalTokenCount++; Token tokj = j.next(); if (!seenTokens.contains(tokj)) { seenTokens.add(tokj); // increment documentFrequency counts Integer df = (Integer)documentFrequency.get(tokj); if (df==null) documentFrequency.put(tokj,ONE); else if (df==ONE) documentFrequency.put(tokj,TWO); else if (df==TWO) documentFrequency.put(tokj,THREE); else documentFrequency.put(tokj, new Integer(df.intValue()+1)); } } collectionSize++; } }
/** Accumulate statistics on how often each token value occurs */ public void train(StringWrapperIterator i) { Set seenTokens = new HashSet(); while (i.hasNext()) { BagOfTokens bag = asBagOfTokens(i.nextStringWrapper()); seenTokens.clear(); for (Iterator j=bag.tokenIterator(); j.hasNext(); ) { totalTokenCount++; Token tokj = (Token)j.next(); if (!seenTokens.contains(tokj)) { seenTokens.add(tokj); // increment documentFrequency counts Integer df = (Integer)documentFrequency.get(tokj); if (df==null) documentFrequency.put(tokj,ONE); else if (df==ONE) documentFrequency.put(tokj,TWO); else if (df==TWO) documentFrequency.put(tokj,THREE); else documentFrequency.put(tokj, new Integer(df.intValue()+1)); } } collectionSize++; } }