/** * Read the {@link VariantFileMetadata} from a variant file. * * Accepted formats: Avro, Json and VCF * * @param input Input variant file (avro, json, vcf) * @return Read {@link VariantFileMetadata} * @throws StorageEngineException if the format is not valid or there is an error reading */ public static VariantFileMetadata readVariantFileMetadata(Path input) throws StorageEngineException { return readVariantFileMetadata(input, null); }
@Override public VariantFileMetadata readVariantFileMetadata(URI input) throws StorageEngineException { VariantFileMetadata source; if (input.getScheme() == null || input.getScheme().startsWith("file")) { return VariantReaderUtils.readVariantFileMetadata(Paths.get(input.getPath()), null); } Path metaPath = new Path(VariantReaderUtils.getMetaFromTransformedFile(input.toString())); FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (IOException e) { throw new StorageEngineException("Unable to get FileSystem", e); } try ( InputStream inputStream = new GZIPInputStream(fs.open(metaPath)) ) { source = VariantReaderUtils.readVariantFileMetadataFromJson(inputStream); } catch (IOException e) { throw new StorageEngineException("Unable to read VariantFileMetadata", e); } return source; } }
/** * Read the {@link VariantFileMetadata} from a variant file. * * Accepted formats: Avro, Json and VCF * * @param input Input variant file (avro, json, vcf) * @param metadata {@link VariantFileMetadata} to fill. Can be null * @return Read {@link VariantFileMetadata} * @throws StorageEngineException if the format is not valid or there is an error reading */ public static VariantFileMetadata readVariantFileMetadata(Path input, VariantFileMetadata metadata) throws StorageEngineException { return readVariantFileMetadata(input, metadata, false); }
public VariantFileMetadata readVariantFileMetadata(URI input) throws StorageEngineException { if (input.getScheme() == null || input.getScheme().startsWith("file")) { return readVariantFileMetadata(Paths.get(input.getPath()), null); } else { throw new StorageEngineException("Can not read files from " + input.getScheme()); } }
public VariantFileMetadata readVariantFileMetadata(URI input) throws StorageEngineException { VariantFileMetadata variantFileMetadata = variantReaderUtils.readVariantFileMetadata(input); // Ensure correct fileId // FIXME // variantFileMetadata.setId(String.valueOf(getFileId())); variantFileMetadata.setId(null); return variantFileMetadata; }
String fileName = input.getFileName().toString(); VariantFileMetadata fileMetadata = variantReaderUtils.readVariantFileMetadata(inputUri); fileMetadata.setId(String.valueOf(fileId));
VariantFileMetadata fileMetadata = VariantReaderUtils.readVariantFileMetadata(metaFile, null); stats = fileMetadata.getStats(); } catch (StorageEngineException e) {
@Before public void setUp() throws Exception { studyConfiguration = new StudyConfiguration(1, "study"); studyConfiguration.getSampleIds().put("s1", 1); studyConfiguration.getSampleIds().put("s2", 2); studyConfiguration.getSampleIds().put("s3", 3); studyConfiguration.getSampleIds().put("s4", 4); studyConfiguration.getSampleIds().put("s5", 5); studyConfiguration.getSampleIds().put("s6", 6); studyConfiguration.getSampleIds().put("s7", 7); studyConfiguration.getIndexedFiles().add(10); studyConfiguration.getFileIds().put("file1.vcf", 10); studyConfiguration.getSamplesInFiles().put(10, new LinkedHashSet<>(Arrays.asList(1, 2, 3, 4))); studyConfiguration.getIndexedFiles().add(11); studyConfiguration.getFileIds().put("file2.vcf", 11); studyConfiguration.getSamplesInFiles().put(11, new LinkedHashSet<>(Arrays.asList(4, 5, 6))); studyConfiguration.getCalculatedStats().add(20); studyConfiguration.getCohortIds().put("ALL", 20); studyConfiguration.getCohorts().put(20, new HashSet<>(Arrays.asList(1, 2, 3, 4, 5, 6))); URI uri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); VariantFileMetadata fileMetadata = VariantReaderUtils.readVariantFileMetadata(Paths.get(uri), null); studyConfiguration.addVariantFileHeader(fileMetadata.getHeader(), null); variantMetadataConverter = new VariantMetadataConverter(); objectWriter = new ObjectMapper() .configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true) .setSerializationInclusion(JsonInclude.Include.NON_EMPTY) .writerWithDefaultPrettyPrinter(); projectMetadata = new ProjectMetadata("hsapiens", "37", 1); }
@Test public void testConvert() throws Exception { // Transform smallInputFile to get the expected meta file with stats StoragePipelineResult storagePipelineResult = runETL(variantStorageEngine, smallInputUri, newOutputUri(), new ObjectMap(VariantStorageEngine.Options.ISOLATE_FILE_FROM_STUDY_CONFIGURATION.key(), true), true, true, false); VariantFileMetadata expectedFileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(storagePipelineResult.getTransformResult()); // Read and convert the legacy metadata file InputStream resource = new GZIPInputStream(getClass().getResourceAsStream("/variant-test-file.vcf.gz.file_legacy.json.gz")); org.opencb.biodata.models.variant.avro.legacy.VariantSource legacy = new ObjectMapper().readValue(resource, org.opencb.biodata.models.variant.avro.legacy.VariantSource.class); VariantFileMetadata convertedFileMetadata = new VariantSourceToVariantFileMetadataConverter().convert(legacy); // Impossible to get StdDev from legacy VariantSource expectedFileMetadata.getStats().setStdDevQuality(0); assertEquals(expectedFileMetadata, convertedFileMetadata); }
final VariantFileMetadata metadata = VariantReaderUtils.readVariantFileMetadata(input, metadataTemplate, stdin);
@Test public void basicIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json")); assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.json.gz'", Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.json.gz")); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); assertEquals(1, studyConfiguration.getIndexedFiles().size()); checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants(fileMetadata)); }
public void checkLoadedFilesS1S2(StudyConfiguration studyConfiguration, VariantHadoopDBAdaptor dbAdaptor) throws IOException, StorageEngineException { VariantFileMetadata fileMetadata = VariantReaderUtils.readVariantFileMetadata(path, null);
VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(Paths.get(etlResult.getTransformResult().getPath()).toUri()); numVariants = getExpectedNumLoadedVariants(fileMetadata);
fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(Paths.get(etlResult.getTransformResult().getPath()).toUri()); Integer indexedFileId = studyConfiguration.getIndexedFiles().iterator().next();
fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(Paths.get(etlResult.getTransformResult().getPath()).toUri()); NUM_VARIANTS = getExpectedNumLoadedVariants(fileMetadata); fileIndexed = true;
variantStorageEngineExpected.getStudyConfigurationManager().registerFileSamples(studyConfiguration1, fileId2, variantStorageEngineExpected.getVariantReaderUtils().readVariantFileMetadata(file2Uri), null);
); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration, fileMetadata.getStats().getNumVariants()); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor();