/** * Index all new records from the given data sources. This method * does <em>not</em> do any matching. * @since 0.4 */ public void index(Collection<DataSource> sources, int batch_size) { index(1, sources, batch_size); }
private static void reindex(Configuration config, Database database) { System.out.println("Reindexing all records..."); Processor processor = new Processor(config, database); if (config.isDeduplicationMode()) processor.index(config.getDataSources(), DEFAULT_BATCH_SIZE); else { processor.index(config.getDataSources(1), DEFAULT_BATCH_SIZE); processor.index(config.getDataSources(2), DEFAULT_BATCH_SIZE); } } }
/** * Does record linkage across the two groups, but does not link * records within each group. * @param matchall If true, all matching records are accepted. If false, * only the single best match for each record is accepted. * @param batch_size The batch size to use. * @since 1.1 */ public void link(Collection<DataSource> sources1, Collection<DataSource> sources2, boolean matchall, int batch_size) { startProcessing(); // start with source 1 for (Collection<Record> batch : makeBatches(sources1, batch_size)) { index(1, batch); if (hasTwoDatabases()) linkBatch(2, batch, matchall); } // then source 2 for (Collection<Record> batch : makeBatches(sources2, batch_size)) { if (hasTwoDatabases()) index(2, batch); linkBatch(1, batch, matchall); } endProcessing(); }
private static void reindex(Configuration config, Database database) { System.out.println("Reindexing all records..."); Processor processor = new Processor(config, database); if (config.isDeduplicationMode()) processor.index(config.getDataSources(), DEFAULT_BATCH_SIZE); else { processor.index(config.getDataSources(1), DEFAULT_BATCH_SIZE); processor.index(config.getDataSources(2), DEFAULT_BATCH_SIZE); } } }
/** * Does record linkage across the two groups, but does not link * records within each group. * @param matchall If true, all matching records are accepted. If false, * only the single best match for each record is accepted. * @param batch_size The batch size to use. * @since 1.1 */ public void link(Collection<DataSource> sources1, Collection<DataSource> sources2, boolean matchall, int batch_size) { startProcessing(); // first, index up group 1 index(sources1, batch_size); // second, traverse group 2 to look for matches with group 1 linkRecords(sources2, matchall, batch_size); }