/** * Returns the end of the term pipeline, which corresponds to * an instance of either BasicIndexer.BasicTermProcessor, or * BasicIndexer.FieldTermProcessor, depending on whether * field information is stored. * @return TermPipeline the end of the term pipeline. */ protected TermPipeline getEndOfPipeline() { if(FieldScore.USE_FIELD_INFORMATION) return new FieldTermProcessor(); return new BasicTermProcessor(); }
public static Index makeIndex(String[] docnos, String[] documents) throws Exception { count++; return makeIndex(docnos, documents, new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count), ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count); }
public static Index makeIndexFieldsBlocks(String[] docnos, String[] documents) throws Exception { count++; return makeIndexFields(docnos, documents, new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count), ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count); }
/** * Returns the object that is to be the end of the TermPipeline. * This method is used at construction time of the parent object. * @return TermPipeline the last component of the term pipeline. */ protected TermPipeline getEndOfPipeline() { // if using delimited blocks if (Boolean.parseBoolean(ApplicationSetup.getProperty("block.delimiters.enabled", "false"))) { String delim = ApplicationSetup.getProperty("block.delimiters", "").trim(); if (Boolean.parseBoolean(ApplicationSetup.getProperty("lowercase", "true"))) delim = delim.toLowerCase(); String delims[] = delim.split("\\s*,\\s*"); final boolean indexDelims = Boolean.parseBoolean(ApplicationSetup.getProperty("block.delimiters.index.terms", "false")); final boolean countDelims = Boolean.parseBoolean(ApplicationSetup.getProperty("block.delimiters.index.doclength","true")); return (FieldScore.USE_FIELD_INFORMATION) ? new DelimFieldTermProcessor(delims, indexDelims, countDelims) : new DelimTermProcessor(delims, indexDelims, countDelims); } else if (FieldScore.USE_FIELD_INFORMATION) { return new FieldTermProcessor(); } return new BasicTermProcessor(); }
invertedIndexBuilder = new BlockInvertedIndexBuilder(currentIndex, "inverted", compressionInvertedConfig); invertedIndexBuilder.createInvertedIndex(); this.finishedInvertedIndexBuild(); try{ currentIndex.flush();
invertedIndexBuilder = new InvertedIndexBuilder(currentIndex, "inverted", compressionInvertedConfig); invertedIndexBuilder.createInvertedIndex(); finishedInvertedIndexBuild();
tmpStorageStorage.add(createPointerForTerm(le)); numberOfPointersThisIteration + " pointers == "+ _processTerms +" terms"); return new IntLongTuple(_processTerms, numberOfPointersThisIteration);
/** Hook method, called when the inverted index is finished - ie the lexicon is finished */ protected void finishedInvertedIndexBuild() { if (invertedIndexBuilder != null) try{ invertedIndexBuilder.close(); } catch (IOException ioe) { logger.warn("Problem closing inverted index builder", ioe); } LexiconBuilder.optimise(currentIndex, "lexicon"); } }
/** * Constructs an instance of a BasicIndexer, using the given path name * for storing the data structures. * @param path String the path where the data structures will be created. This is assumed to be * absolute. * @param prefix String the filename component of the data structures */ public BasicIndexer(String path, String prefix) { super(path, prefix); //delay the execution of init() if we are a parent class if (this.getClass() == BasicIndexer.class) init(); compressionDirectConfig = CompressionFactory.getCompressionConfiguration("direct", FieldScore.FIELD_NAMES, 0, 0); compressionInvertedConfig = CompressionFactory.getCompressionConfiguration("inverted", FieldScore.FIELD_NAMES, 0, 0); }
@Override protected void load_indexer_properties() { super.load_indexer_properties(); docsPerCheck = ApplicationSetup.DOCS_CHECK_SINGLEPASS; maxDocsPerFlush = Integer.parseInt(ApplicationSetup.getProperty("indexing.singlepass.max.documents.flush", "0")); memoryCheck = new RuntimeMemoryChecker(); logger.info("Checking memory usage every " + docsPerCheck + " maxDocPerFlush=" + maxDocsPerFlush); }
/** Constructs an instance of this class, where the created data structures * are stored in the given path, with the given prefix on the filenames. * @param pathname String the path in which the created data structures will be saved. This is assumed to be * absolute. * @param prefix String the prefix on the filenames of the created data structures, usually "data" */ public BlockIndexer(String pathname, String prefix) { super(pathname, prefix); if (this.getClass() == BlockIndexer.class) init(); int blockSize = BLOCK_SIZE; if (Boolean.parseBoolean(ApplicationSetup.getProperty("block.delimiters.enabled", "false"))) blockSize = 2; compressionDirectConfig = CompressionFactory.getCompressionConfiguration("direct", FieldScore.FIELD_NAMES, blockSize, MAX_BLOCKS); compressionInvertedConfig = CompressionFactory.getCompressionConfiguration("inverted", FieldScore.FIELD_NAMES, blockSize, MAX_BLOCKS); }
numberOfPointersThisIteration + " pointers == "+ j +" terms"); return new IntLongTuple(j, numberOfPointersThisIteration);
public static Index makeIndexFields(String[] docnos, String[] documents) throws Exception { count++; return makeIndexFields(docnos, documents, new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count), ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count); }
public static Index makeIndexBlocks(String[] docnos, String[] documents) throws Exception { count++; return makeIndex(docnos, documents, new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count), ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX + '-'+ count); }
/** Hook method, called when the inverted index is finished - ie the lexicon is finished */ protected void finishedInvertedIndexBuild() { if (invertedIndexBuilder != null) try{ invertedIndexBuilder.close(); } catch (IOException ioe) { logger.warn("Problem closing inverted index builder", ioe); } LexiconBuilder.optimise(currentIndex, "lexicon"); }
public static Index makeIndex(String[] docnos, String[] documents) throws Exception { return makeIndex(docnos, documents, new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)); }
@Test public void testBlockNoFields() throws Exception { ApplicationSetup.setProperty("FieldTags.process", ""); testIndexer(new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX), true, false); } @Test
@Test public void testBasicNoFields() throws Exception { ApplicationSetup.setProperty("FieldTags.process", ""); testIndexer(new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX), true, false); }
@Test public void testBlockFields() throws Exception { ApplicationSetup.setProperty("FieldTags.process", "TITLE,ELSE"); testIndexer(new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, "fields"), true, true); }
@Test public void testBasicFields() throws Exception { ApplicationSetup.setProperty("FieldTags.process", "TITLE,ELSE"); testIndexer(new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, "fields"), true, true); }