public boolean execute(final String a, final int b) { termids[i] = termCodes.getCode(a); tfs[i++] = b; return true; } });
@Test public void testTermCodesPut() { TermCodes termCodes = new TermCodes(); final int termsLength = terms.length; TObjectIntHashMap<String> check = new TObjectIntHashMap<String>(); for (int i = 0; i < termsLength; i++) { termCodes.setTermCode(terms[i], i); check.put(terms[i], i); } for (int j = 0; j < 1000000; j++) { for (int i = 0; i < termsLength; i++) { int code = termCodes.getCode(terms[i]); assertEquals(check.get(terms[i]), code); } } } }
@Test public void testTermCodes() { TermCodes termCodes = new TermCodes(); TObjectIntHashMap<String> check = new TObjectIntHashMap<String>(); final int termsLength = terms.length; for (int i = 0; i < termsLength; i++) { int id = termCodes.getCode(terms[i]); check.put(terms[i], id); } int code; for (int j = 0; j < 1000000; j++) { for (int i = 0; i < termsLength; i++) { code = termCodes.getCode(terms[i]); assertEquals(check.get(terms[i]), code); } } //TODO: get a new term, then check that the new id is unique }
@Test(expected=IllegalArgumentException.class) public void testNullTermException() throws Exception { IndexOnDisk index = Index.createNewIndex( ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX); LexiconBuilder lb = new LexiconBuilder(index, "lexicon", new TermCodes()); lb.addTerm("", 0); }
termCodes.reset(); System.gc(); try {
Index createLexiconIndex(String[] tokens) throws Exception { IndexOnDisk index = Index.createNewIndex( ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX); LexiconBuilder lb = new LexiconBuilder(index, "lexicon", new TermCodes()); for(String tok: tokens) { lb.addTerm(tok, 1); } lb.finishedDirectIndexBuild(); lb.finishedInvertedIndexBuild(); return index; }
termCodes.reset();
public boolean execute(final String a, final int b) { postings[0][i] = termCodes.getCode(a); postings[1][i] = b; for(int fi=0;fi< fieldCount;fi++) postings[2+fi][i] = field_occurrences[fi].get(a); //fields[i++] = term_fields.get(a); i++; return true; } });
public boolean execute(final String a, final int b) { cache_termids.put(a, termCodes.getCode(a)); return true; } });
/** Stores the lexicon tree to a lexicon stream as a sequence of entries. * The binary tree is traversed in order, by called the method * traverseAndStoreToStream. * @param lexiconStream The lexicon output stream to store to. */ public void storeToStream(LexiconOutputStream<String> lexiconStream, TermCodes termCodes) throws IOException { final String[] terms = tfs.keys(new String[0]); Arrays.sort(terms); BasicLexiconEntry le = new BasicLexiconEntry(); for (String t : terms) { le.setTermId(termCodes.getCode(t)); le.setStatistics(nts.get(t), tfs.get(t)); le.setMaxFrequencyInDocuments(maxtfs.get(t)); lexiconStream.writeNextEntry(t, le); } }
/** Stores the lexicon tree to a lexicon stream as a sequence of entries. * The binary tree is traversed in order, by called the method * traverseAndStoreToStream. * @param lexiconStream The lexicon output stream to store to. */ @Override public void storeToStream(LexiconOutputStream<String> lexiconStream, TermCodes termCodes) throws IOException { final String[] terms = tfs.keys(new String[0]); Arrays.sort(terms); for (String t : terms) { final FieldLexiconEntry fle = new FieldLexiconEntry(getFieldFrequency(t)); fle.setTermId(termCodes.getCode(t)); fle.setStatistics(nts.get(t), tfs.get(t)); fle.setMaxFrequencyInDocuments(maxtfs.get(t)); final int[] TFf = new int[fieldCount]; for(int fi=0;fi< fieldCount;fi++) TFf[fi] = field_tfs[fi].get(t); fle.setFieldFrequencies(TFf); lexiconStream.writeNextEntry(t, fle); } }