/** * creates the final term code to offset file, and the lexicon hash if enabled. */ protected void createLexidFile() { LexiconBuilder.optimise(destIndex, "lexicon"); }
/** * constructor * @param i * @param _structureName */ public LexiconBuilder(IndexOnDisk i, String _structureName, TermCodes tc) { this(i, _structureName, instantiate(LexiconMap.class), "org.terrier.structures.BasicLexiconEntry", "", "", tc); } /**
Index createLexiconIndex(String[] tokens) throws Exception { IndexOnDisk index = Index.createNewIndex( ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX); LexiconBuilder lb = new LexiconBuilder(index, "lexicon", new TermCodes()); for(String tok: tokens) { lb.addTerm(tok, 1); } lb.finishedDirectIndexBuild(); lb.finishedInvertedIndexBuild(); return index; }
Iterator<Map.Entry<String,LexiconEntry>> le = getLexInputStream(index, structureName); int i=0; while(le.hasNext()) le = getLexInputStream(index, structureName + "-old"); LexiconOutputStream<String> leOut = getLexOutputStream(index, structureName); while(le.hasNext()) optimise(index, structureName);
currentIndex = Index.createNewIndex(path, prefix); lexiconBuilder = FieldScore.FIELDS_COUNT > 0 ? new LexiconBuilder(currentIndex, "lexicon", new FieldLexiconMap(FieldScore.FIELDS_COUNT), FieldLexiconEntry.class.getName(), "java.lang.String", "\""+ FieldScore.FIELDS_COUNT + "\"", termCodes) : new LexiconBuilder(currentIndex, "lexicon", new LexiconMap(), BasicLexiconEntry.class.getName(), termCodes); docIndexBuilder.finishedCollections(); lexiconBuilder.finishedDirectIndexBuild(); try { metaBuilder.close();
Iterator<Map.Entry<String,LexiconEntry>> lis1 = getLexInputStream(fileToMerge1); Iterator<Map.Entry<String,LexiconEntry>> lis2 = getLexInputStream(fileToMerge2); LexiconOutputStream<String> los = getLexOutputStream(newMergedFile); + newMergedFile); mergeTwoLexicons(lis1, lis2, los); lis[i] = getLexInputStream(inputLexiconFileNames[i]); newMergedFile = defaultStructureName + "-mergetmp"+ String.valueOf(progressiveNumber++); final LexiconOutputStream<String> los = getLexOutputStream(newMergedFile); mergeNLexicons(lis, los); for(String inputLexiconFileName : inputLexiconFileNames) lis[i] = getLexInputStream(inputLexiconFileNames[i]); final LexiconOutputStream<String> los = getLexOutputStream(defaultStructureName); mergeNLexicons(lis, los); for(int i=0;i<StartFileCount;i++)
@Test(expected=IllegalArgumentException.class) public void testNullTermException() throws Exception { IndexOnDisk index = Index.createNewIndex( ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX); LexiconBuilder lb = new LexiconBuilder(index, "lexicon", new TermCodes()); lb.addTerm("", 0); }
/** * This adds a document to the direct and document indexes, as well * as it's terms to the lexicon. Handled internally by the methods * indexFieldDocument and indexNoFieldDocument. * @param docProperties Map<String,String> properties of the document * @param _termsInDocument DocumentPostingList the terms in the document. * */ protected void indexDocument(Map<String,String> docProperties, DocumentPostingList _termsInDocument) throws Exception { /* add words to lexicontree */ lexiconBuilder.addDocumentTerms(_termsInDocument); /* add doc postings to the direct index */ BitIndexPointer dirIndexPost = directIndexBuilder.writePostings(_termsInDocument.getPostings2(termCodes)); /* add doc to documentindex */ DocumentIndexEntry die = _termsInDocument.getDocumentStatistics(); die.setBitIndexPointer(dirIndexPost); docIndexBuilder.addEntryToBuffer(die); /** add doc metadata to index */ metaBuilder.writeDocumentEntry(docProperties); }
/** Writes the current contents of TempLex temporary lexicon binary tree down to * a temporary disk lexicon. */ protected void writeTemporaryLexicon() { try{ final String tmpLexName = this.defaultStructureName+"-tmp"+ TempLexCount; LexiconOutputStream<String> los = getLexOutputStream(tmpLexName); TempLex.storeToStream(los, termCodes); los.close(); /* An alternative but deprecated method to store the temporary lexicons is: * TempLex.storeToFile(tmpLexName); */ //tempLexFiles.addLast(TempLexDirCount+""+TempLexCount); tempLexFiles.addLast(tmpLexName); }catch(IOException ioe){ logger.error("Indexing failed to write a lexicon to disk : ", ioe); } }
? new LexiconBuilder(currentIndex, "lexicon", new FieldLexiconMap(FieldScore.FIELDS_COUNT), FieldLexiconEntry.class.getName(), "java.lang.String", "\""+ FieldScore.FIELDS_COUNT + "\"", termCodes) : new LexiconBuilder(currentIndex, "lexicon", new LexiconMap(), BasicLexiconEntry.class.getName(), termCodes); lexiconBuilder.finishedDirectIndexBuild(); currentIndex.setIndexProperty("num.Tokens", ""+numberOfTokens); currentIndex.setIndexProperty("termpipelines", ApplicationSetup.getProperty("termpipelines", "Stopwords,PorterStemmer"));
/** * This adds a document to the direct and document indexes, as well * as it's terms to the lexicon. Handled internally by the methods * indexFieldDocument and indexNoFieldDocument. * @param docProperties Map<String,String> properties of the document * @param _termsInDocument DocumentPostingList the terms in the document. * */ protected void indexDocument(Map<String,String> docProperties, DocumentPostingList _termsInDocument) throws Exception { /* add words to lexicontree */ lexiconBuilder.addDocumentTerms(_termsInDocument); /* add doc postings to the direct index */ BitIndexPointer dirIndexPost = directIndexBuilder.writePostings(_termsInDocument.getPostings2(termCodes)); //.addDocument(termsInDocument.getPostings()); /* add doc to documentindex */ DocumentIndexEntry die = _termsInDocument.getDocumentStatistics(); die.setBitIndexPointer(dirIndexPost); docIndexBuilder.addEntryToBuffer(die); /** add doc metadata to index */ metaBuilder.writeDocumentEntry(docProperties); }
/** Creates a lexicon hash for the specified index * @param index IndexOnDisk to make the LexiconHash the lexicoin * @deprecated use optimise instead */ public static void createLexiconHash(final IndexOnDisk index) throws IOException { optimise(index, "lexicon"); } /** optimise the lexicon */
/** * constructor * @param i * @param _structureName * @param _LexiconMapClass * @param _lexiconEntryClass */ public LexiconBuilder(IndexOnDisk i, String _structureName, Class <? extends LexiconMap> _LexiconMapClass, String _lexiconEntryClass, TermCodes termCodes) { this(i, _structureName, instantiate(_LexiconMapClass), _lexiconEntryClass, "", "", termCodes); }
/** optimise the lexicon */ public void optimiseLexicon() { optimise(index, defaultStructureName); }
/** Creates a lexicon index for the specified index * @param index IndexOnDisk to make the lexicon index for * @deprecated use optimise instead */ public static void createLexiconIndex(IndexOnDisk index) throws IOException { optimise(index, "lexicon"); }
/** Hook method, called when the inverted index is finished - ie the lexicon is finished */ protected void finishedInvertedIndexBuild() { if (invertedIndexBuilder != null) try{ invertedIndexBuilder.close(); } catch (IOException ioe) { logger.warn("Problem closing inverted index builder", ioe); } LexiconBuilder.optimise(currentIndex, "lexicon"); } }
/** Hook method, called when the inverted index is finished - ie the lexicon is finished */ protected void finishedInvertedIndexBuild() { if (invertedIndexBuilder != null) try{ invertedIndexBuilder.close(); } catch (IOException ioe) { logger.warn("Problem closing inverted index builder", ioe); } LexiconBuilder.optimise(currentIndex, "lexicon"); }
LexiconBuilder.optimise(newIndex, "lexicon");
LexiconBuilder.optimise(destIndex, "lexicon"); destIndex.flush(); } catch(IOException ioe) {
LexiconBuilder.optimise(newIndex, "lexicon");