public StudyConfiguration testPlatinumFilesOneByOne(ObjectMap otherParams, int maxFilesLoaded) throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); List<VariantFileMetadata> filesMetadata = new LinkedList<>(); Set<String> expectedVariants = new HashSet<>();
@Test public void removeSingleFileTest() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); System.out.println("studyConfiguration = " + studyConfiguration); String studyName = studyConfiguration.getStudyName(); Map<String, Object> options = Collections.singletonMap(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, true); loadFile("s1.genome.vcf", studyConfiguration, options); Map<String, Variant> variants = buildVariantsIdx(); assertFalse(variants.containsKey("1:10014:A:G")); assertTrue(variants.containsKey("1:10013:T:C")); assertEquals("0/1", variants.get("1:10013:T:C").getStudy(studyName).getSampleData("s1", "GT")); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); // delete removeFile("s1.genome.vcf", studyConfiguration, options); VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); checkSampleIndexTable(studyConfiguration, dbAdaptor, "s1.genome.vcf"); variants = buildVariantsIdx(); assertEquals("Expected none variants", 0, variants.size()); assertEquals("Expected none indexed files", 0, studyConfiguration.getIndexedFiles().size()); }
@Test public void removeFileTestMergeBasicFillGaps() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); System.out.println("studyConfiguration = " + studyConfiguration); String studyName = studyConfiguration.getStudyName();
params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"); StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1.genome.vcf", studyConfiguration, params);
@Test public void removeFileTestMergeBasic() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); System.out.println("studyConfiguration = " + studyConfiguration); String studyName = studyConfiguration.getStudyName();
StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor();
@Test @Ignore public void removeFileTestMergeAdvanced() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); System.out.println("studyConfiguration = " + studyConfiguration); String studyName = studyConfiguration.getStudyName();
@Test public void testMultiSampleFile() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1_s2.genome.vcf", studyConfiguration, new ObjectMap(HadoopVariantStorageEngine.VARIANT_TABLE_LOAD_REFERENCE, true)); checkArchiveTableTimeStamp(dbAdaptor); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); int numHomRef = 0; for (Variant variant : dbAdaptor) { StudyEntry study = variant.getStudies().get(0); for (String s : study.getSamplesName()) { String gt = study.getSampleData(s, "GT"); assertNotEquals(GenotypeClass.UNKNOWN_GENOTYPE, gt); if (GenotypeClass.HOM_REF.test(gt)) { numHomRef++; assertTrue(StringUtils.isNumeric(study.getSampleData(s, "DP"))); } } } assertNotEquals(0, numHomRef); }
public void testMultipleFilesConcurrent(ObjectMap extraParams) throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); HadoopVariantStorageEngine variantStorageManager = getVariantStorageEngine(); VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor();
@Test public void testTwoFiles() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1.genome.vcf", studyConfiguration, new ObjectMap()); checkArchiveTableTimeStamp(dbAdaptor); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); loadFile("s2.genome.vcf", studyConfiguration, new ObjectMap()); checkArchiveTableTimeStamp(dbAdaptor); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); checkLoadedFilesS1S2(studyConfiguration, dbAdaptor); }
@Test public void testTwoFilesConcurrent() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); HadoopVariantStorageEngine variantStorageManager = getVariantStorageEngine(); ObjectMap options = variantStorageManager.getConfiguration().getStorageEngine(variantStorageManager.getStorageEngineId()).getVariant().getOptions(); options.put(HadoopVariantStorageEngine.HADOOP_LOAD_DIRECT, true); options.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "proto"); options.put(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); List<URI> inputFiles = Arrays.asList(getResourceUri("s1.genome.vcf"), getResourceUri("s2.genome.vcf")); List<StoragePipelineResult> index = variantStorageManager.index(inputFiles, outputUri, true, true, true); VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); for (StoragePipelineResult storagePipelineResult : index) { System.out.println(storagePipelineResult); } printVariants(studyConfiguration, dbAdaptor, newOutputUri()); for (Variant variant : dbAdaptor) { System.out.println("variant = " + variant); } checkLoadedFilesS1S2(studyConfiguration, dbAdaptor); assertThat(studyConfiguration.getIndexedFiles(), hasItems(1, 2)); }
@Test public void testTwoFilesBasicAggregateNoneArchiveRefFields() throws Exception { ObjectMap params = new ObjectMap(); params.put(HadoopVariantStorageEngine.ARCHIVE_FIELDS, VariantQueryUtils.NONE); params.put(HadoopVariantStorageEngine.HADOOP_LOAD_DIRECT, true); params.put(VariantStorageEngine.Options.MERGE_MODE.key(), VariantStorageEngine.MergeMode.BASIC); params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"); StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1.genome.vcf", studyConfiguration, params); checkArchiveTableTimeStamp(dbAdaptor); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); loadFile("s2.genome.vcf", studyConfiguration, params); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); checkArchiveTableTimeStamp(dbAdaptor); getVariantStorageEngine().fillGaps(studyConfiguration.getStudyName(), Arrays.asList("s1", "s2"), new ObjectMap("local", true)); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); dbAdaptor.getHBaseManager().act(dbAdaptor.getArchiveTableName(1), table -> { for (Result r : table.getScanner(new Scan())) { for (Map.Entry<byte[], byte[]> entry : r.getFamilyMap(dbAdaptor.getGenomeHelper().getColumnFamily()).entrySet()) { assertFalse(Bytes.toString(entry.getKey()).endsWith(ArchiveTableHelper.REF_COLUMN_SUFIX)); } } }); }
@Test public void testTwoFiles_reverse() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s2.genome.vcf", studyConfiguration, new ObjectMap()); checkArchiveTableTimeStamp(dbAdaptor); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); loadFile("s1.genome.vcf", studyConfiguration, new ObjectMap()); checkArchiveTableTimeStamp(dbAdaptor); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); checkLoadedFilesS1S2(studyConfiguration, dbAdaptor); }
private StudyConfiguration load(ObjectMap extraParams, List<URI> inputFiles) throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); HadoopVariantStorageEngine variantStorageManager = getVariantStorageEngine(); VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(); ObjectMap options = variantStorageManager.getConfiguration().getStorageEngine(variantStorageManager.getStorageEngineId()).getVariant().getOptions(); options.put(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); options.put(VariantStorageEngine.Options.GVCF.key(), true); options.put(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, true); options.put(HadoopVariantStorageEngine.HADOOP_LOAD_ARCHIVE_BATCH_SIZE, 1); options.put(VariantStorageEngine.Options.MERGE_MODE.key(), VariantStorageEngine.MergeMode.BASIC); options.putAll(extraParams); List<StoragePipelineResult> index = variantStorageManager.index(inputFiles, outputUri, true, true, true); for (StoragePipelineResult storagePipelineResult : index) { System.out.println(storagePipelineResult); } URI outputUri = newOutputUri(1); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); printVariants(studyConfiguration, dbAdaptor, outputUri); return studyConfiguration; }
@BeforeClass public static void beforeClass() throws Exception { HadoopVariantStorageEngine variantStorageManager = externalResource.getVariantStorageEngine(); externalResource.clearDB(variantStorageManager.getVariantTableName()); externalResource.clearDB(variantStorageManager.getArchiveTableName(STUDY_ID)); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); // URI inputUri = VariantStorageManagerTestUtils.getResourceUri("variant-test-file.vcf.gz"); studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); etlResult = VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageManager, studyConfiguration, new ObjectMap(Options.TRANSFORM_FORMAT.key(), "avro") .append(Options.ANNOTATE.key(), true) .append(Options.CALCULATE_STATS.key(), false) ); fileMetadata = variantStorageManager.readVariantFileMetadata(etlResult.getTransformResult()); VariantSetStats stats = fileMetadata.getStats(); Assert.assertNotNull(stats); try (VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor()) { VariantHbaseTestUtils.printVariantsFromVariantsTable(dbAdaptor); VariantHbaseTestUtils.printVariantsFromArchiveTable(dbAdaptor, studyConfiguration); } }
@Test public void testTwoFilesBasicFillMissing() throws Exception { ObjectMap params = new ObjectMap(); params.put(HadoopVariantStorageEngine.HADOOP_LOAD_DIRECT, true); params.put(VariantStorageEngine.Options.MERGE_MODE.key(), VariantStorageEngine.MergeMode.BASIC); params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro"); StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1.genome.vcf", studyConfiguration, params); checkArchiveTableTimeStamp(dbAdaptor); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); loadFile("s2.genome.vcf", studyConfiguration, params); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); checkArchiveTableTimeStamp(dbAdaptor); getVariantStorageEngine().fillMissing(studyConfiguration.getStudyName(), new ObjectMap("local", true), false); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); checkLoadedFilesS1S2(studyConfiguration, dbAdaptor); }