/** * Hook method that creates the right type of MemoryPostings class. */ protected void createMemoryPostings(){ if (useFieldInformation) mp = new FieldsMemoryPostings(); else mp = new MemoryPostings(); }
/** * Add the terms in a DocumentPostingList to the postings in memory. * @param docPostings DocumentPostingList containing the term information for the denoted document. * @param docid Current document Identifier. * @throws IOException if an I/O error occurs. */ public void addTerms(DocumentPostingList docPostings, int docid) throws IOException { for (String term : docPostings.termSet()) add(term, docid, docPostings.getFrequency(term)); }
/** * Triggers the writing of the postings in memory to disk. * Uses the default RunWriter, writing to the specified files. * @param file name of the file to write the postings. * @throws IOException if an I/O error occurs. */ public void finish(String[] file) throws IOException{ finish(new RunWriter(file[0], file[1])); }
/** * {@inheritDoc}. * This implementation only places content in the runs in memory, which will eventually be flushed to disk. */ @Override protected void indexDocument(Map<String,String> docProperties, DocumentPostingList termsInDocument) throws Exception { if (seenDocnos.contains(docProperties.get("docno"))) return; else seenDocnos.add(docProperties.get("docno")); if (termsInDocument.getDocumentLength() > 0) { numberOfDocsSinceCheck++; numberOfDocsSinceFlush++; checkFlush(); mp.addTerms(termsInDocument, currentId); DocumentIndexEntry die = termsInDocument.getDocumentStatistics(); docIndexBuilder.addEntryToBuffer((FieldScore.FIELDS_COUNT > 0) ? die : new SimpleDocumentIndexEntry(die)); metaBuilder.writeDocumentEntry(docProperties); currentId++; numberOfDocuments++; } }
/** Triggers the writing of the postings in memory to the specified * RunWriter. If the RunWriter requires that terms are written in order, * then this will happen. * @param runWriter * @throws IOException */ public void finish(RunWriter runWriter) throws IOException { logger.debug("Writing run "+runWriter.toString()); //only sort the postings if required by the RunWriter writeToWriter(runWriter, runWriter.writeSorted() ? new TreeMap<String, Posting>(postings) : postings); logger.debug(" done"); }
return; numberOfDocsSinceCheck = 0; final long consumed = mp.getMemoryConsumption(); boolean doFlush = false; final boolean memCheck = memoryCheck.checkMemory();
/** * {@inheritDoc}. * This implementation only places content in the runs in memory, which will eventually be flushed to disk. */ @Override protected void indexDocument(Map<String,String> docProperties, DocumentPostingList termsInDocument) throws Exception { if (termsInDocument.getDocumentLength() > 0) { numberOfDocsSinceCheck++; numberOfDocsSinceFlush++; checkFlush(); mp.addTerms(termsInDocument, currentId); DocumentIndexEntry die = termsInDocument.getDocumentStatistics(); docIndexBuilder.addEntryToBuffer((FieldScore.FIELDS_COUNT > 0) ? die : new SimpleDocumentIndexEntry(die)); metaBuilder.writeDocumentEntry(docProperties); currentId++; numberOfDocuments++; } }
@edu.umd.cs.findbugs.annotations.SuppressWarnings( value="DM_GC", justification="Forcing GC is an essential part of releasing" + "memory for further indexing") /** causes the posting lists built up in memory to be flushed out */ protected void forceFlush() throws IOException { mp.finish(finishMemoryPosting()); System.gc(); createMemoryPostings(); memoryCheck.reset(); numberOfDocsSinceFlush = 0; }