public static StoragePipelineResult runDefaultETL(VariantStorageEngine variantStorageManager, StudyConfiguration studyConfiguration) throws URISyntaxException, IOException, FileFormatException, StorageEngineException { return runDefaultETL(inputUri, variantStorageManager, studyConfiguration); }
public static StoragePipelineResult runDefaultETL(URI inputUri, VariantStorageEngine variantStorageManager, StudyConfiguration studyConfiguration) throws URISyntaxException, IOException, FileFormatException, StorageEngineException { return runDefaultETL(inputUri, variantStorageManager, studyConfiguration, new ObjectMap()); }
@BeforeClass public static void beforeClass() throws Exception { variantStorageEngine = externalResource.getVariantStorageEngine(); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study1), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study1), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study2), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); VariantHbaseTestUtils.printVariants(variantStorageEngine.getDBAdaptor(), newOutputUri()); }
public static StoragePipelineResult runDefaultETL(URI inputUri, VariantStorageEngine variantStorageManager, StudyConfiguration studyConfiguration, ObjectMap params, boolean doTransform, boolean doLoad) throws URISyntaxException, IOException, FileFormatException, StorageEngineException { ObjectMap newParams = new ObjectMap(params); // newParams.put(VariantStorageEngine.Options.STUDY_CONFIGURATION.key(), studyConfiguration); newParams.putIfAbsent(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), studyConfiguration.getAggregation()); // newParams.putIfAbsent(VariantStorageEngine.Options.STUDY_ID.key(), studyConfiguration.getStudyId()); newParams.putIfAbsent(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); // newParams.putIfAbsent(VariantStorageEngine.Options.FILE_ID.key(), FILE_ID); // Default value is already avro // newParams.putIfAbsent(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"); newParams.putIfAbsent(VariantStorageEngine.Options.ANNOTATE.key(), true); newParams.putIfAbsent(VariantAnnotationManager.SPECIES, "hsapiens"); newParams.putIfAbsent(VariantAnnotationManager.ASSEMBLY, "GRch37"); newParams.putIfAbsent(VariantStorageEngine.Options.CALCULATE_STATS.key(), true); StoragePipelineResult storagePipelineResult = runETL(variantStorageManager, inputUri, outputUri, newParams, true, doTransform, doLoad); try (VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor()) { StudyConfiguration newStudyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyName(), null).first(); if (newStudyConfiguration != null) { studyConfiguration.copy(newStudyConfiguration); } } return storagePipelineResult; }
@BeforeClass public static void beforeClass() throws Exception { HadoopVariantStorageEngine variantStorageManager = externalResource.getVariantStorageEngine(); externalResource.clearDB(variantStorageManager.getVariantTableName()); externalResource.clearDB(variantStorageManager.getArchiveTableName(STUDY_ID)); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); // URI inputUri = VariantStorageManagerTestUtils.getResourceUri("variant-test-file.vcf.gz"); studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); etlResult = VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageManager, studyConfiguration, new ObjectMap(Options.TRANSFORM_FORMAT.key(), "avro") .append(Options.ANNOTATE.key(), true) .append(Options.CALCULATE_STATS.key(), false) ); fileMetadata = variantStorageManager.readVariantFileMetadata(etlResult.getTransformResult()); VariantSetStats stats = fileMetadata.getStats(); Assert.assertNotNull(stats); try (VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor()) { VariantHbaseTestUtils.printVariantsFromVariantsTable(dbAdaptor); VariantHbaseTestUtils.printVariantsFromArchiveTable(dbAdaptor, studyConfiguration); } }