/** * Loads variant annotations from an specified file into the selected Variant DataBase. * * @param uri URI of the annotation file * @param params Specific params. * @throws IOException IOException thrown * @throws StorageEngineException if there is a problem creating or running the {@link ParallelTaskRunner} */ public void loadVariantAnnotation(URI uri, ObjectMap params) throws IOException, StorageEngineException { final int batchSize = params.getInt(DefaultVariantAnnotationManager.BATCH_SIZE, 100); final int numConsumers = params.getInt(DefaultVariantAnnotationManager.NUM_WRITERS, 6); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build(); DataReader<VariantAnnotation> reader; reader = newVariantAnnotationDataReader(uri); try { ProgressLogger progressLogger = new ProgressLogger("Loaded annotations: ", numAnnotationsToLoad.get()); ParallelTaskRunner<VariantAnnotation, ?> ptr = buildLoadAnnotationParallelTaskRunner(reader, config, progressLogger, params); ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading variant annotation", e); } }
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }
public void run() throws ExecutionException { try { run(Long.MAX_VALUE, TimeUnit.DAYS); } catch (InterruptedException e) { throw new ExecutionException("Error while running ParallelTaskRunner. Found " + interruptions.size() + " interruptions.", interruptions.get(0)); } }
protected ParallelTaskRunner<VariantAnnotation, ?> buildLoadAnnotationParallelTaskRunner( DataReader<VariantAnnotation> reader, ParallelTaskRunner.Config config, ProgressLogger progressLogger, ObjectMap params) { return new ParallelTaskRunner<>(reader, () -> newVariantAnnotationDBWriter(dbAdaptor, new QueryOptions(params)) .setProgressLogger(progressLogger), null, config); }
public ParallelTaskRunner.Config build() { if (capacity < 0) { capacity = numTasks * 2; } return new ParallelTaskRunner.Config(numTasks, batchSize, capacity, abortOnFail, sorted, readQueuePutTimeout); } }
/** * @param reader Unique DataReader. If null, empty batches will be generated * @param tasks Generated Tasks. Each task will be used in one thread. Will use tasks.size() as "numTasks". * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, List<? extends org.opencb.commons.run.Task<I, O>> tasks, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(tasks); check(); }
public static Builder builder() { return new Builder(); }
@Override public List<R> drain() throws Exception { // Drain and write List<R> drain = task.drain(); writer.write(drain); return drain; }
private Batch<I> readBatch() { long start; Batch<I> batch; start = System.nanoTime(); int position = numBatches++; try { batch = new Batch<>(reader.read(config.batchSize), position); } catch (Exception e) { logger.error("Error reading batch " + position, e); batch = POISON_PILL; exceptions.add(e); } timeReading += System.nanoTime() - start; return batch; }
@Override public boolean pre() { try { task.pre(); } catch (Exception e) { throw new RuntimeException(e); } return DataReader.this.pre(); }
@Override public boolean post() { try { task.post(); } catch (Exception e) { throw new RuntimeException(e); } return DataReader.this.post(); }
/** * @param reader Unique DataReader. If null, empty batches will be generated * @param task Task to be used. Will be used the same instance in all threads * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, org.opencb.commons.run.Task<I, O> task, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(config.numTasks); for (int i = 0; i < config.numTasks; i++) { tasks.add(task); } check(); }
@Override public List<R> apply(List<T> batch) throws Exception { List<R> batch2 = task.apply(batch); writer.write(batch2); return batch2; }
@Override public void pre() throws Exception { if (!pre.getAndSet(true)) { writer.open(); writer.pre(); } task.pre(); }
@Override public void post() throws Exception { task.post(); if (!post.getAndSet(true)) { writer.post(); writer.close(); } } };
/** * @param reader Unique DataReader. If null, empty batches will be generated. * @param taskSupplier TaskGenerator. Will generate a new task for each thread. * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, Supplier<? extends org.opencb.commons.run.Task<I, O>> taskSupplier, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(config.numTasks); for (int i = 0; i < config.numTasks; i++) { tasks.add(taskSupplier.get()); } check(); }