@Test public void transformFromSTDIN() throws Exception { URI outputUri = newOutputUri(); VariantStorageEngine variantStorageManager = getVariantStorageEngine(); InputStream inputStream = FileUtils.newInputStream(Paths.get(smallInputUri)); System.setIn(inputStream); ObjectMap options = variantStorageManager.getConfiguration() .getStorageEngine(variantStorageManager.getStorageEngineId()).getVariant().getOptions(); options.append(VariantStorageEngine.Options.STDIN.key(), true); URI inputFile = Paths.get(smallInputUri).getFileName().toUri(); System.out.println("inputFile = " + inputFile); assertFalse("File should not exist in that specific location", Paths.get(inputFile).toFile().exists()); StoragePipelineResult storagePipelineResult = variantStorageManager.index(Collections.singletonList(inputFile), outputUri, true, true, false).get(0); assertEquals(999, countLinesFromAvro(Paths.get(storagePipelineResult.getTransformResult()).toFile())); }
protected StudyConfiguration checkExistsStudyConfiguration(StudyConfiguration studyConfiguration) throws StorageEngineException { if (studyConfiguration == null) { studyConfiguration = getStudyConfiguration(); if (studyConfiguration == null) { String studyName = options.getString(Options.STUDY.key(), Options.STUDY.defaultValue()); logger.info("Creating a new StudyConfiguration '{}'", studyName); studyConfiguration = getStudyConfigurationManager().createStudy(studyName); } } privateStudyConfiguration = studyConfiguration; setStudyId(studyConfiguration.getStudyId()); return studyConfiguration; }
@Test public void testNoNamespace() throws Exception { runDefaultETL(smallInputUri, getVariantStorageEngine(), newStudyConfiguration(), new ObjectMap() .append(HadoopVariantStorageEngine.HBASE_NAMESPACE, "") .append(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true)); HadoopVariantStorageEngine variantStorageManager = getVariantStorageEngine(); Admin admin = variantStorageManager.getDBAdaptor().getConnection().getAdmin(); for (NamespaceDescriptor namespaceDescriptor : admin.listNamespaceDescriptors()) { System.out.println("namespaceDescriptor = " + namespaceDescriptor); for (TableName tableName : admin.listTableNamesByNamespace(namespaceDescriptor.getName())) { System.out.println("\ttableName = " + tableName); } } assertTrue(variantStorageManager.getDBAdaptor().count(null).first() > 0); }
@Override @Before public void before() throws Exception { if (studyConfiguration == null) { clearDB(DB_NAME); studyConfiguration = newStudyConfiguration(); } for (int i = 0; i < VCF_TEST_FILE_NAMES.length; i++) { if (etlResult[i] == null) { etlResult[i] = runDefaultETL(inputUri[i], getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false)); } } dbAdaptor = getVariantStorageEngine().getDBAdaptor(); }
@Test public void testPlatinumFilesOneByOne_MergeBasic() throws Exception { StudyConfiguration studyConfiguration = testPlatinumFilesOneByOne(new ObjectMap() .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") .append(VariantStorageEngine.Options.MERGE_MODE.key(), VariantStorageEngine.MergeMode.BASIC) /*.append(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), VariantMerger.GENOTYPE_FILTER_KEY + ",DP,GQX,MQ")*/, 4); HadoopVariantStorageEngine variantStorageEngine = getVariantStorageEngine(); VariantHadoopDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); List<Integer> sampleIds = new ArrayList<>(studyConfiguration.getSampleIds().values()); FillGapsTaskTest.fillGaps(variantStorageEngine, studyConfiguration, sampleIds.subList(0, sampleIds.size()/2)); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); FillGapsTaskTest.fillGaps(variantStorageEngine, studyConfiguration, sampleIds.subList(sampleIds.size()/2, sampleIds.size())); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); FillGapsTaskTest.fillGaps(variantStorageEngine, studyConfiguration, sampleIds); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); }
@Test public void testResumeCalculateStats() throws Exception { before(); calculateStats(coh[0]); catalogManager.getCohortManager().setStatus(studyId, coh[1], Cohort.CohortStatus.CALCULATING, "", sessionId); Cohort coh1 = catalogManager.getCohortManager().get(studyId, coh[1], null, sessionId).first(); Exception expected = VariantStatsStorageOperation.unableToCalculateCohortCalculating(coh1); try { calculateStats(coh[1]); fail(); } catch (Exception e) { assertThat(e, instanceOf(expected.getClass())); assertThat(e, hasMessage(is(expected.getMessage()))); } calculateStats(coh[1], new QueryOptions(VariantStorageEngine.Options.RESUME.key(), true)); }
@Test public void calculateAggregatedStatsTest() throws Exception { //Calculate stats for 2 cohorts at one time VariantStatisticsManager vsm = variantStorageEngine.newVariantStatisticsManager(); checkAggregatedCohorts(dbAdaptor, studyConfiguration); QueryOptions options = new QueryOptions(); options.put(VariantStorageEngine.Options.LOAD_BATCH_SIZE.key(), 100); options.put(DefaultVariantStatisticsManager.OUTPUT, outputUri.resolve("aggregated.stats").getPath()); if (getAggregationMappingFile() != null) { options.put(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), getAggregationMappingFile()); } //Calculate stats List<String> cohorts = Collections.singletonList(StudyEntry.DEFAULT_COHORT); vsm.calculateStatistics(studyConfiguration.getStudyName(), cohorts, options); checkAggregatedCohorts(dbAdaptor, studyConfiguration); }
public File beforeAggregated(String fileName, Aggregation aggregation) throws Exception { Map<String, Object> attributes; if (aggregation != null) { attributes = Collections.singletonMap(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), aggregation); } else { attributes = Collections.emptyMap(); } catalogManager.getStudyManager().update(studyId, new ObjectMap(StudyDBAdaptor.QueryParams.ATTRIBUTES.key(), attributes), null, sessionId); File file1 = opencga.createFile(studyId, fileName, sessionId); // coh0 = catalogManager.createCohort(studyId, "coh0", Cohort.Type.CONTROL_SET, "", file1.getSampleIds(), null, sessionId).first().getId(); QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, outputId); variantManager.index(studyId, file1.getId(), createTmpOutdir(file1), queryOptions, sessionId); return file1; }
private static Aggregation getAggregation(StudyConfiguration studyConfiguration, ObjectMap options) { return AggregationUtils.valueOf(options.getString(Options.AGGREGATED_TYPE.key(), studyConfiguration.getAggregation().toString())); }
@Test public void testConvert() throws Exception { // Transform smallInputFile to get the expected meta file with stats StoragePipelineResult storagePipelineResult = runETL(variantStorageEngine, smallInputUri, newOutputUri(), new ObjectMap(VariantStorageEngine.Options.ISOLATE_FILE_FROM_STUDY_CONFIGURATION.key(), true), true, true, false); VariantFileMetadata expectedFileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(storagePipelineResult.getTransformResult()); // Read and convert the legacy metadata file InputStream resource = new GZIPInputStream(getClass().getResourceAsStream("/variant-test-file.vcf.gz.file_legacy.json.gz")); org.opencb.biodata.models.variant.avro.legacy.VariantSource legacy = new ObjectMapper().readValue(resource, org.opencb.biodata.models.variant.avro.legacy.VariantSource.class); VariantFileMetadata convertedFileMetadata = new VariantSourceToVariantFileMetadataConverter().convert(legacy); // Impossible to get StdDev from legacy VariantSource expectedFileMetadata.getStats().setStdDevQuality(0); assertEquals(expectedFileMetadata, convertedFileMetadata); }
static Properties getAggregationMappingProperties(QueryOptions options) { return options.get(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), Properties.class, null); }
@Test @Override public void multiIndexPlatinum() throws Exception { super.multiIndexPlatinum(new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), "DP,AD,PL")); checkPlatinumDatabase(d -> ((List) d.get(FILES_FIELD)).size(), Collections.singleton("0/0")); // StudyConfiguration studyConfiguration = variantStorageEngine.getStudyConfigurationManager() // .getStudyConfiguration(1, null).first(); // Iterator<BatchFileOperation> iterator = studyConfiguration.getBatches().iterator(); // assertEquals(MongoDBVariantOptions.DIRECT_LOAD.key(), iterator.next().getOperationName()); // while (iterator.hasNext()) { // BatchFileOperation batchFileOperation = iterator.next(); // assertNotEquals(MongoDBVariantOptions.DIRECT_LOAD.key(), batchFileOperation.getOperationName()); // } }
@Test public void testDeleteTransformedFile() throws Exception { QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false); File inputFile = getFile(0); File transformedFile = transformFile(inputFile, queryOptions); catalogManager.getFileManager().delete(studyFqn, new Query(FileDBAdaptor.QueryParams.NAME.key(), transformedFile.getName()), new ObjectMap(FileManager.SKIP_TRASH, true), sessionId); catalogManager.getFileManager().delete(studyFqn, new Query(FileDBAdaptor.QueryParams.NAME.key(), VariantReaderUtils.getMetaFromTransformedFile(transformedFile.getName())), new ObjectMap(FileManager.SKIP_TRASH, true), sessionId); indexFile(inputFile, queryOptions, outputId); }
public static boolean isResume(ObjectMap options) { return options.getBoolean(Options.RESUME.key(), Options.RESUME.defaultValue()); }
@Test public void testApproxCount() throws Exception { Query query = new Query(SAMPLE.key(), "NA19660") .append(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult<Long> result = variantStorageEngine .approximateCount(query, new QueryOptions(VariantStorageEngine.Options.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), realCount * 0.1)); long approxCount = result.first(); System.out.println("approxCount = " + approxCount); System.out.println("realCount = " + realCount); assertTrue(result.getApproximateCount()); assertThat(approxCount, lte(realCount * 1.25)); assertThat(approxCount, gte(realCount * 0.75)); }
@Test public void testDeleteIndexedFile() throws Exception { QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false); File inputFile = getFile(0); indexFile(inputFile, queryOptions, outputId); Study study = catalogManager.getFileManager().getStudy(inputFile, sessionId); WriteResult result = catalogManager.getFileManager().delete( study.getFqn(), new Query(FileDBAdaptor.QueryParams.PATH.key(), inputFile.getPath()) , null, sessionId); assertEquals(0, result.getNumModified()); assertTrue(result.getFailed().get(0).getMessage().contains("index status")); }
@Test public void indexWithoutOtherFields() throws Exception { StudyConfiguration studyConfiguration = newStudyConfiguration(); runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), VariantQueryUtils.NONE) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") .append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals("GT", variant.getStudy(STUDY_NAME).getFormatAsString()); } }
@Override public Map<String, ?> getOtherStorageConfigurationOptions() { return new ObjectMap(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, true).append(VariantStorageEngine.Options.ANNOTATE.key(), false); }
public void before () throws Exception { File file = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId); for (int i = 0; i < coh.length; i++) { Cohort cohort = catalogManager.getCohortManager().create(studyId, "coh" + i, Study.Type.CONTROL_SET, "", file.getSamples().subList(file.getSamples() .size() / coh.length * i, file.getSamples().size() / coh.length * (i + 1)), null, null, sessionId).first(); coh[i] = cohort.getId(); } QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, outputId); variantManager.index(studyId, file.getId(), createTmpOutdir(file), queryOptions, sessionId); all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first().getId(); }
@Test public void testExactApproxCount() throws Exception { Query query = new Query(SAMPLE.key(), "NA19660") .append(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult<Long> result = variantStorageEngine .approximateCount(query, new QueryOptions(VariantStorageEngine.Options.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), allVariants.getNumResults())); long approxCount = result.first(); System.out.println("approxCount = " + approxCount); System.out.println("realCount = " + realCount); assertFalse(result.getApproximateCount()); assertEquals(approxCount, realCount); }