@Test public void avroBasicIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro")); assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.avro.gz'", Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.avro.gz")); assertEquals(1, studyConfiguration.getIndexedFiles().size()); VariantFileMetadata fileMetadata = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants (fileMetadata)); }
@Test public void basicIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json")); assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.json.gz'", Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.json.gz")); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); assertEquals(1, studyConfiguration.getIndexedFiles().size()); checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants(fileMetadata)); }
.append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options); Integer defaultCohortId = studyConfiguration.getCohortIds().get(StudyEntry.DEFAULT_COHORT); checkLoadedVariants(getVariantStorageEngine().getDBAdaptor(), studyConfiguration, true, false, false, -1); runDefaultETL(getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine,
.append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration1, options); runDefaultETL(getResourceUri("1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration1, options); runDefaultETL(getResourceUri("1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options); runDefaultETL(getResourceUri("1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options); runDefaultETL(getResourceUri("1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options);
@Test public void multiRegionIndexFail() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion"); ObjectMap options = new ObjectMap() .append(VariantStorageEngine.Options.STUDY_TYPE.key(), SampleSetType.CONTROL_SET) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true) .append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options); studyConfiguration.getFileIds().put("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", 6); StorageEngineException exception = StorageEngineException.alreadyLoadedSamples(studyConfiguration, 6); thrown.expect(exception.getClass()); thrown.expectMessage(exception.getMessage()); runDefaultETL(getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options.append(VariantStorageEngine.Options.LOAD_SPLIT_DATA.key(), false)); }
@Test public void loadFromSTDIN() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"), true, false); VariantFileMetadata fileMetadata = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); Path tempFile = Paths.get(outputUri).resolve("temp_file"); Files.move(Paths.get(etlResult.getTransformResult()), tempFile); assertFalse(Files.exists(Paths.get(etlResult.getTransformResult()))); InputStream in = System.in; try (InputStream is = new FileInputStream(tempFile.toFile())) { System.setIn(is); variantStorageEngine.getConfiguration() .getStorageEngine(variantStorageEngine.getStorageEngineId()).getVariant().getOptions() .put(VariantStorageEngine.Options.STDIN.key(), true); variantStorageEngine.index(Collections.singletonList(etlResult.getTransformResult()), outputUri, false, false, true); } finally { System.setIn(in); } studyConfiguration = variantStorageEngine.getStudyConfigurationManager().getStudyConfiguration(STUDY_NAME, null).first(); assertEquals(1, studyConfiguration.getIndexedFiles().size()); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants(fileMetadata)); }
.append(VariantStorageEngine.Options.ANNOTATE.key(), false); URI file1Uri = getResourceUri("1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file1Uri, variantStorageEngine, studyConfigurationMultiFile, options); Integer defaultCohortId = studyConfigurationMultiFile.getCohortIds().get(StudyEntry.DEFAULT_COHORT); assertTrue(studyConfigurationMultiFile.getCohorts().containsKey(defaultCohortId)); runDefaultETL(file2Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(1000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); assertEquals(Collections.singleton(defaultCohortId), studyConfigurationMultiFile.getCalculatedStats()); runDefaultETL(file3Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(1500, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getCalculatedStats()); runDefaultETL(file4Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(2000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); int fileId4 = studyConfigurationMultiFile.getFileIds().get(UriUtils.fileName(file4Uri)); runDefaultETL(file5Uri, variantStorageEngine, studyConfigurationMultiFile, options); int fileId5 = studyConfigurationMultiFile.getFileIds().get(UriUtils.fileName(file5Uri)); assertEquals(2504, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); etlResult = runDefaultETL(fileUri, variantStorageEngine, studyConfigurationSingleFile, options); int fileId = studyConfigurationSingleFile.getFileIds().get(UriUtils.fileName(fileUri)); assertTrue(studyConfigurationSingleFile.getIndexedFiles().contains(fileId));
StoragePipelineResult etlResult = runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), extraFields) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro")
@Test public void indexWithoutOtherFields() throws Exception { StudyConfiguration studyConfiguration = newStudyConfiguration(); runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), VariantQueryUtils.NONE) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") .append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals("GT", variant.getStudy(STUDY_NAME).getFormatAsString()); } }
ObjectMap fileOptions = new ObjectMap(); fileOptions.putAll(options); runDefaultETL(getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA128" + fileId + "_S1.genome.vcf.gz"), variantStorageManager, studyConfigurationMultiFile, fileOptions); studyConfigurationMultiFile = studyConfigurationManager.getStudyConfiguration(studyConfigurationMultiFile.getStudyId(), null).first();