default int getExpectedNumLoadedVariants(VariantFileMetadata fileMetadata) { int numRecords = 0; for (VariantType variantType : HadoopVariantStorageEngine.TARGET_VARIANT_TYPE_SET) { numRecords += fileMetadata.getStats().getVariantTypeCount(variantType); } return numRecords; }
int expectedCount = 0; for (VariantType variantType : TARGET_VARIANT_TYPE_SET) { expectedCount += variantFileMetadata.getStats().getVariantTypeCounts().getOrDefault(variantType.toString(), 0); for (VariantType type : VariantType.values()) { if (!TARGET_VARIANT_TYPE_SET.contains(type)) { Integer countByType = variantFileMetadata.getStats().getVariantTypeCounts().get(type.toString()); if (countByType != null && countByType > 0) { logger.info(" * Of which " + countByType + " are " + type.toString() + " variants.");
if (fileMetadata.getStats() != null) { progressLogger.setApproximateTotalCount(fileMetadata.getStats().getNumVariants()); if (fileMetadata.getStats() != null) { progressLogger = new ProgressLogger("Loaded variants for file \"" + input.getFileName() + "\" :", fileMetadata.getStats().getNumVariants()); } else { progressLogger = new ProgressLogger("Loaded variants for file \"" + input.getFileName() + "\" :");
public Set<String> checkArchiveTableLoadedVariants(StudyConfiguration studyConfiguration, VariantHadoopDBAdaptor dbAdaptor, VariantFileMetadata fileMetadata) { int fileId = Integer.valueOf(fileMetadata.getId()); Set<String> variants = getVariants(dbAdaptor, studyConfiguration, fileId); int expected = fileMetadata.getStats().getVariantTypeCounts().entrySet().stream() .filter(entry -> VARIANT_TYPES.contains(VariantType.valueOf(entry.getKey()))) .map(Map.Entry::getValue) .reduce(Integer::sum) .orElse(0); assertEquals(expected, variants.size()); return variants; }
@Override public boolean pre() { if (metadataPath != null) { Files.exists(metadataPath); try (InputStream inputStream = FileUtils.newInputStream(metadataPath)) { // Read global JSON file and copy its info into the already available VariantSource object VariantFileMetadata readMetadata = VariantReaderUtils.readVariantFileMetadataFromJson(inputStream); fileMetadata.setId(readMetadata.getId()); fileMetadata.setPath(readMetadata.getPath()); fileMetadata.setHeader(readMetadata.getHeader()); fileMetadata.setSamplesPosition(readMetadata.getSamplesPosition()); fileMetadata.setStats(readMetadata.getStats()); } catch (IOException ex) { throw new UncheckedIOException(ex); } } if (fileMetadata != null) { Map<String, Integer> samplesPosition = fileMetadata.getSamplesPosition(); this.samplesPosition = new LinkedHashMap<>(samplesPosition.size()); String[] samples = new String[samplesPosition.size()]; for (Map.Entry<String, Integer> entry : samplesPosition.entrySet()) { samples[entry.getValue()] = entry.getKey(); } for (int i = 0; i < samples.length; i++) { this.samplesPosition.put(samples[i], i); } } return true; }
@Test public void countVariants() { long totalCount = dbAdaptor.count(new Query()).first(); long count = TARGET_VARIANT_TYPE_SET.stream() .map(type -> fileMetadata.getStats().getVariantTypeCount(type)) .reduce((a, b) -> a + b) .orElse(0).longValue(); // count -= 1; // Deletion is in conflict with other variant: 1:10403:ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC:A assertEquals(count, totalCount); }
long alreadyLoadedVariants = options.getLong(ALREADY_LOADED_VARIANTS.key(), 0L); for (Map.Entry<String, Integer> entry : fileMetadata.getStats().getVariantTypeCounts().entrySet()) { if (SKIPPED_VARIANTS.contains(VariantType.valueOf(entry.getKey()))) { expectedSkippedVariants += entry.getValue(); logger.warn("There were " + writeResult.getSkippedVariants() + " skipped variants."); for (VariantType type : SKIPPED_VARIANTS) { Integer countByType = fileMetadata.getStats().getVariantTypeCounts().get(type.toString()); if (countByType != null && countByType > 0) { logger.info(" * Of which " + countByType + " are " + type.toString() + " variants.");
try { VariantFileMetadata fileMetadata = VariantReaderUtils.readVariantFileMetadata(metaFile, null); stats = fileMetadata.getStats(); } catch (StorageEngineException e) { throw new CatalogException("Error reading file \"" + metaFile + "\"", e);
private VariantFileMetadata checkTransformedVariants(URI variantsJson, StudyConfiguration studyConfiguration, int expectedNumVariants) throws StorageEngineException { long start = System.currentTimeMillis(); VariantFileMetadata source = new VariantFileMetadata("6", VCF_TEST_FILE_NAME); VariantReader variantReader = VariantReaderUtils.getVariantReader(Paths.get(variantsJson.getPath()), source.toVariantStudyMetadata(String.valueOf(studyConfiguration.getStudyId()))); variantReader.open(); variantReader.pre(); List<Variant> read; int numVariants = 0; while ((read = variantReader.read(100)) != null && !read.isEmpty()) { numVariants += read.size(); } variantReader.post(); variantReader.close(); if (expectedNumVariants < 0) { expectedNumVariants = source.getStats().getNumVariants(); } else { assertEquals(expectedNumVariants, source.getStats().getNumVariants()); //9792 } assertEquals(expectedNumVariants, numVariants); //9792 logger.info("checkTransformedVariants time : " + (System.currentTimeMillis() - start) / 1000.0 + "s"); return source; }
@Test public void checkVariantTable() throws IOException { System.out.println("Query from HBase : " + dbAdaptor.getVariantTable()); HBaseManager hm = new HBaseManager(configuration.get()); GenomeHelper genomeHelper = dbAdaptor.getGenomeHelper(); int numVariants = hm.act(dbAdaptor.getVariantTable(), table -> { int num = 0; ResultScanner resultScanner = table.getScanner(genomeHelper.getColumnFamily()); for (Result result : resultScanner) { Variant variant = VariantPhoenixKeyFactory.extractVariantFromVariantRowKey(result.getRow()); System.out.println("Variant = " + variant); num++; } resultScanner.close(); return num; }); System.out.println("End query from HBase : " + dbAdaptor.getVariantTable()); System.out.println(fileMetadata.getStats().getVariantTypeCounts()); long count = TARGET_VARIANT_TYPE_SET.stream() .map(type -> fileMetadata.getStats().getVariantTypeCount(type)) .reduce((a, b) -> a + b).orElse(0).longValue(); // count -= 1; // Deletion is in conflict with other variant: 1:10403:ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC:A assertEquals(count, numVariants); }
@Test public void queryArchiveTable() { final int[] numVariants = {0}; Map<String, Integer> variantCounts = new HashMap<>(); System.out.println("Query from Archive table"); dbAdaptor.iterator( new Query() .append(VariantQueryParam.STUDY.key(), studyConfiguration.getStudyId()) .append(VariantQueryParam.FILE.key(), FILE_ID), new QueryOptions("archive", true)).forEachRemaining(variant -> { System.out.println("Variant from archive = " + variant.toJson()); numVariants[0]++; variantCounts.compute(variant.getType().toString(), (s, integer) -> integer == null ? 1 : (integer + 1)); }); System.out.println("End query from Archive table"); fileMetadata.getStats().getVariantTypeCounts().forEach((s, integer) -> assertEquals(integer, variantCounts.getOrDefault(s, 0))); assertEquals(fileMetadata.getStats().getNumVariants(), numVariants[0]); }
@Test public void testConvert() throws Exception { // Transform smallInputFile to get the expected meta file with stats StoragePipelineResult storagePipelineResult = runETL(variantStorageEngine, smallInputUri, newOutputUri(), new ObjectMap(VariantStorageEngine.Options.ISOLATE_FILE_FROM_STUDY_CONFIGURATION.key(), true), true, true, false); VariantFileMetadata expectedFileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(storagePipelineResult.getTransformResult()); // Read and convert the legacy metadata file InputStream resource = new GZIPInputStream(getClass().getResourceAsStream("/variant-test-file.vcf.gz.file_legacy.json.gz")); org.opencb.biodata.models.variant.avro.legacy.VariantSource legacy = new ObjectMapper().readValue(resource, org.opencb.biodata.models.variant.avro.legacy.VariantSource.class); VariantFileMetadata convertedFileMetadata = new VariantSourceToVariantFileMetadataConverter().convert(legacy); // Impossible to get StdDev from legacy VariantSource expectedFileMetadata.getStats().setStdDevQuality(0); assertEquals(expectedFileMetadata, convertedFileMetadata); }
/** * Reads the VariantSource from a Variant file given an initialized VariantReader * * @param reader Initialized variant reader * @param metadata Optional metadata to fill up * @return The read variant metadata * @throws IOException if an I/O error occurs */ public static VariantFileMetadata readVariantFileMetadata(VariantReader reader, VariantFileMetadata metadata) throws IOException { Objects.requireNonNull(reader); if (metadata == null) { metadata = new VariantFileMetadata("", ""); } try { reader.open(); reader.pre(); metadata.setHeader(reader.getVariantFileMetadata().getHeader()); metadata.setSampleIds(reader.getVariantFileMetadata().getSampleIds()); metadata.setStats(reader.getVariantFileMetadata().getStats()); reader.post(); } finally { reader.close(); } return metadata; }
int numRecords = fileMetadata.getStats().getNumVariants(); int batchSize = options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue()); int bulkSize = options.getInt(BULK_SIZE.key(), batchSize);
try (InputStream is = FileUtils.newInputStream(statsFile)) { VariantFileMetadata fileMetadata = getDefaultObjectMapper().readValue(is, VariantFileMetadata.class); VariantSetStats stats = fileMetadata.getStats(); params = new ObjectMap(FileDBAdaptor.QueryParams.STATS.key(), new ObjectMap(VARIANT_FILE_STATS, stats)); update(studyStr, vcf.getPath(), params, new QueryOptions(), sessionId);
int numRecords = fileMetadata.getStats().getNumVariants(); int batchSize = options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue()); int loadThreads = options.getInt(Options.LOAD_THREADS.key(), Options.LOAD_THREADS.defaultValue());
checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration, fileMetadata.getStats().getNumVariants()); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); checkLoadedVariants(dbAdaptor, studyConfiguration, true, false, false, getExpectedNumLoadedVariants(fileMetadata));
@Test public void testArchiveIterator() { int count = 0; Query query = new Query(VariantQueryParam.STUDY.key(), studyConfiguration.getStudyId()) .append(VariantQueryParam.FILE.key(), UriUtils.fileName(smallInputUri)); for (VariantDBIterator iterator = dbAdaptor.iterator(query, new QueryOptions("archive", true)); iterator.hasNext(); ) { Variant variant = iterator.next(); // System.out.println(variant.toJson()); count++; } Assert.assertEquals(fileMetadata.getStats().getNumVariants(), count); }
@BeforeClass public static void beforeClass() throws Exception { HadoopVariantStorageEngine variantStorageManager = externalResource.getVariantStorageEngine(); externalResource.clearDB(variantStorageManager.getVariantTableName()); externalResource.clearDB(variantStorageManager.getArchiveTableName(STUDY_ID)); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); // URI inputUri = VariantStorageManagerTestUtils.getResourceUri("variant-test-file.vcf.gz"); studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); etlResult = VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageManager, studyConfiguration, new ObjectMap(Options.TRANSFORM_FORMAT.key(), "avro") .append(Options.ANNOTATE.key(), true) .append(Options.CALCULATE_STATS.key(), false) ); fileMetadata = variantStorageManager.readVariantFileMetadata(etlResult.getTransformResult()); VariantSetStats stats = fileMetadata.getStats(); Assert.assertNotNull(stats); try (VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor()) { VariantHbaseTestUtils.printVariantsFromVariantsTable(dbAdaptor); VariantHbaseTestUtils.printVariantsFromArchiveTable(dbAdaptor, studyConfiguration); } }