public void processTerm(String t) { // null means the term has been filtered out (eg stopwords) if (t != null) { //add term to thingy tree ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; if (++numOfTokensInBlock >= BLOCK_SIZE && blockId < MAX_BLOCKS) { numOfTokensInBlock = 0; blockId++; } } }
public void processTerm(String t) { // null means the term has been filtered out (eg stopwords) if (t != null) { //add term to thingy tree ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; if (++numOfTokensInBlock >= BLOCK_SIZE && blockId < MAX_BLOCKS) { numOfTokensInBlock = 0; blockId++; } } }
public void processTerm(String t) { if (t== null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; } }
public void processTerm(String t) { if (t== null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; } }
/** Insert a term into this document, occurs at given block id */ public void insert(String t, int blockId) { insert(t); TIntHashSet blockids = null; if ((blockids = term_blocks.get(t)) == null) { term_blocks.put(t, blockids = new TIntHashSet(/*TODO */)); } blockids.add(blockId); blockCount++; }
@Override public void readFields(DataInput in) throws IOException { clear(); final int termCount = WritableUtils.readVInt(in); for(int i=0;i<termCount;i++) { final String term = Text.readString(in); final int freq = WritableUtils.readVInt(in); final int bf = WritableUtils.readVInt(in); insert(freq, term); if (bf == 0) continue; final int[] blocks = new int[bf]; blocks[0] = WritableUtils.readVInt(in)-1; for(int j=1;j<bf;j++) blocks[j] = WritableUtils.readVInt(in) - blocks[j-1]; term_blocks.put(term, new TIntHashSet(blocks)); } }