/** This method must be called by anything which directly extends Indexer. * See: http://benpryor.com/blog/2008/01/02/dont-call-subclass-methods-from-a-superclass-constructor/ */ protected void init() { FieldScore.init(); //init fields before constructing pipeline this.load_field_ids(); //construct pipeline using list specified in terrier.properties //this object should be the last item in the pipeline this.load_indexer_properties(); this.load_pipeline(); //load the docnos of any documents that should force builder boundaries this.load_builder_boundary_documents(); }
/** Utility method for merging indices */ public static void main(String args[]) throws Exception { if (args[0].equals("--merge") && args.length == 3) { merge( ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX, Integer.parseInt(args[1]), Integer.parseInt(args[2]) ); return; } logger.error("Usage: org.terrier.indexing.Indexer --merge [lowid] [highid]"); }
/** * Calls the method index(Collection[]) of the * class Indexer in order to build the data * structures for a set of collections. This * particular method of the Indexer uses a * set of builders for a subset of the collection * and builds separate data structures, which are * later merged. */ @Override public void index() { if (Index.existsIndex(path, prefix)) { logger.error("Cannot index while an index exists at "+path + ","+ prefix); return; } loadIndexer(path, prefix).index(new Collection[] {collectionTREC}); try{ collectionTREC.close(); } catch (Exception e) { logger.warn("problem closing collection", e); } }
this.createDirectIndex(collections); this.createInvertedIndex(); merge(path, oldIndexPrefix, 1, counter);
protected Index doIndexing(Indexer indexer, boolean fieldsExpected, Document[] sourceDocs) { Collection col = new CollectionDocumentList(sourceDocs, "filename"); indexer.createDirectIndex(new Collection[] { col }); indexer.createInvertedIndex(); return ((MemoryIndexer) indexer).getIndex(); }
@Override protected void load_indexer_properties() { super.load_indexer_properties(); BLOCK_SIZE = ApplicationSetup.BLOCK_SIZE; MAX_BLOCKS = ApplicationSetup.MAX_BLOCKS; } }
.split("\\s*,\\s*"); TermPipeline next = getEndOfPipeline(); final TermPipeline last = next; TermPipeline tmp;
public static Index makeIndexFields(String[] docnos, String[] documents, Indexer indexer, String path, String prefix) throws Exception { assertFalse("Index at "+ path + "," + prefix + " already exists!", IndexOnDisk.existsIndex(path, prefix)); assertEquals(docnos.length, documents.length); Document[] sourceDocs = new Document[docnos.length]; for(int i=0;i<docnos.length;i++) { Map<String,String> docProperties = new HashMap<String,String>(); docProperties.put("filename", docnos[i]); docProperties.put("docno", docnos[i]); sourceDocs[i] = new TaggedDocument(new ByteArrayInputStream(documents[i].getBytes()), docProperties, new EnglishTokeniser()); } Collection col = new CollectionDocumentList(sourceDocs, "filename"); indexer.index(new Collection[]{col}); Index index = Index.createIndex(path, prefix); assertNotNull(index); assertEquals(sourceDocs.length, index.getCollectionStatistics().getNumberOfDocuments()); return index; }
indexer.createDirectIndex(new Collection[]{col}); indexer.createInvertedIndex();
public static Index makeIndex(String[] docnos, String[] documents, Indexer indexer, String path, String prefix) throws Exception { assertFalse("Index at "+ path + "," + prefix + " already exists!", IndexOnDisk.existsIndex(path, prefix)); assertEquals(docnos.length, documents.length); Document[] sourceDocs = new Document[docnos.length]; for(int i=0;i<docnos.length;i++) { Map<String,String> docProperties = new HashMap<String,String>(); docProperties.put("filename", docnos[i]); docProperties.put("docno", docnos[i]); sourceDocs[i] = makeDocumentFromText(documents[i], docProperties); } Collection col = makeCollection(docnos, documents); indexer.index(new Collection[]{col}); Index index = Index.createIndex(path, prefix); assertNotNull(index); assertEquals(sourceDocs.length, index.getCollectionStatistics().getNumberOfDocuments()); return index; }
/** Merge a series of numbered indices in the same path/prefix area. New merged index * will be stored at mpath/mprefix_highest+1. * @param mpath Path of all indices * @param mprefix Common prefix of all indices * @param lowest lowest subfix of prefix * @param highest highest subfix of prefix */ public static void merge(String mpath, String mprefix, int lowest, int highest) { //we define the counterMerged in order to //ensure that the merged data structures will //have different names LinkedList<String[]> llist = new LinkedList<String[]>(); for (int i=lowest; i<=highest; i++) { llist.add(new String[]{mpath,mprefix+ "_" + i}); } merge(mpath, mprefix, llist, highest+1); }
public static Index makeIndex(String[] docnos, String[] documents, Indexer indexer) throws Exception { assertEquals(docnos.length, documents.length); Document[] sourceDocs = new Document[docnos.length]; for(int i=0;i<docnos.length;i++) { Map<String,String> docProperties = new HashMap<String,String>(); docProperties.put("filename", docnos[i]); sourceDocs[i] = new FileDocument(new ByteArrayInputStream(documents[i].getBytes()), docProperties, new EnglishTokeniser()); } Collection col = new CollectionDocumentList(sourceDocs, "filename"); indexer.index(new Collection[]{col}); Index index = Index.createIndex(); assertEquals(sourceDocs.length, index.getCollectionStatistics().getNumberOfDocuments()); return index; }