/** * Read the VariantSource from an InputStream. * * InputStream must point to a json object. * * @param inputStream Input variant source file * @return Read VariantSource * @throws IOException if there is an error reading */ public static VariantFileMetadata readVariantFileMetadataFromJson(InputStream inputStream) throws IOException { org.opencb.biodata.models.variant.metadata.VariantFileMetadata metadata = new ObjectMapper() .configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true) .readValue(inputStream, org.opencb.biodata.models.variant.metadata.VariantFileMetadata.class); return new VariantFileMetadata(metadata); }
public static VariantFileMetadata createEmptyVariantFileMetadata(Path input) { return new VariantFileMetadata(input.getFileName().toString(), input.toAbsolutePath().toString()); }
public VariantVcfHtsjdkReader(Path input, VariantStudyMetadata metadata, VariantNormalizer normalizer) { this.input = Objects.requireNonNull(input); this.inputStream = null; this.metadata = Objects.requireNonNull(metadata); this.fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); this.normalizer = normalizer; this.closeInputStream = true; // Close input stream }
public VariantVcfHtsjdkReader(InputStream inputStream, VariantStudyMetadata metadata, VariantNormalizer normalizer) { this.input = null; this.inputStream = Objects.requireNonNull(inputStream); this.metadata = Objects.requireNonNull(metadata); this.fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); this.normalizer = normalizer; this.closeInputStream = false; // Do not close input stream }
@Override public VariantFileMetadata getVariantFileMetadata() { return new VariantFileMetadata(metadata.getFiles().get(0)); }
public AbstractVariantReader(Path metadataPath, VariantStudyMetadata metadata) { this.metadataPath = metadataPath; this.samplesPositions = null; if (metadata.getFiles().isEmpty()) { fileMetadata = new VariantFileMetadata("", ""); metadata.getFiles().add(fileMetadata.getImpl()); } else { fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); } }
protected static VariantStudyMetadata getMetadata(Path vcfPath) { return new VariantFileMetadata(vcfPath.getFileName().toString(), "").toVariantStudyMetadata(STUDY_NAME); }
public static VariantFileMetadata readVariantFileMetadata(File file, URI fileUri) throws IOException { File.Format format = file.getFormat(); File.Format detectFormat = FileUtils.detectFormat(fileUri); if (format == File.Format.VCF || format == File.Format.GVCF || format == File.Format.BCF || detectFormat == File.Format.VCF || detectFormat == File.Format.GVCF || detectFormat == File.Format.BCF) { VariantFileMetadata metadata = new VariantFileMetadata(String.valueOf(file.getUid()), file.getName()); metadata.setId(String.valueOf(file.getUid())); return VariantMetadataUtils.readVariantFileMetadata(Paths.get(fileUri.getPath()), metadata); } else { return null; } }
protected VariantFileMetadata createVariantFileMetadata(StudyConfiguration studyConfiguration, Integer fileId) { studyConfiguration.getFileIds().put("fileName", fileId); VariantFileMetadata source = new VariantFileMetadata("fileName", fileId.toString()); Map<String, Integer> samplesPosition = new HashMap<>(); samplesPosition.put("s0", 0); samplesPosition.put("s1", 1); samplesPosition.put("s2", 2); samplesPosition.put("s3", 3); samplesPosition.put("s4", 4); samplesPosition.put("s5", 5); source.setSamplesPosition(samplesPosition); return source; }
@Override public VariantFileMetadata convert(VariantSource legacy) { VariantFileMetadata fileMetadata = new VariantFileMetadata(legacy.getFileId(), legacy.getFileName()); fileMetadata.setSampleIds(legacy.getSamples());
public Runnable createSerialRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); return () -> { try { List<Variant> read = Collections.emptyList(); while( !(read = reader.read(100)).isEmpty()) { List<VcfSliceProtos.VcfSlice> slices = task.apply(read); if (!slices.isEmpty()) collector.write(slices); } collector.write(task.drain()); } catch (Exception e) { throw Throwables.propagate(e); } }; }
private VariantFileMetadata checkTransformedVariants(URI variantsJson, StudyConfiguration studyConfiguration, int expectedNumVariants) throws StorageEngineException { long start = System.currentTimeMillis(); VariantFileMetadata source = new VariantFileMetadata("6", VCF_TEST_FILE_NAME); VariantReader variantReader = VariantReaderUtils.getVariantReader(Paths.get(variantsJson.getPath()), source.toVariantStudyMetadata(String.valueOf(studyConfiguration.getStudyId()))); variantReader.open(); variantReader.pre(); List<Variant> read; int numVariants = 0; while ((read = variantReader.read(100)) != null && !read.isEmpty()) { numVariants += read.size(); } variantReader.post(); variantReader.close(); if (expectedNumVariants < 0) { expectedNumVariants = source.getStats().getNumVariants(); } else { assertEquals(expectedNumVariants, source.getStats().getNumVariants()); //9792 } assertEquals(expectedNumVariants, numVariants); //9792 logger.info("checkTransformedVariants time : " + (System.currentTimeMillis() - start) / 1000.0 + "s"); return source; }
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }
variantStorageManager = getVariantStorageEngine(); metadata1 = new VariantFileMetadata(fileId1.toString(), getFileName(fileId1)).toVariantStudyMetadata(studyId1.toString()); studyConfiguration = new StudyConfiguration(studyId1, studyName1); studyConfiguration.getAttributes().append(DEFAULT_GENOTYPE.key(), defaultGenotype); studyConfiguration.getSamplesInFiles().put(fileId1, file1SampleIds); metadata2 = new VariantFileMetadata(fileId2.toString(), getFileName(fileId2)).toVariantStudyMetadata(studyId2.toString()); studyConfiguration2 = new StudyConfiguration(studyId2, studyName2); studyConfiguration2.getAttributes().append(DEFAULT_GENOTYPE.key(), defaultGenotype); studyConfiguration2.getSamplesInFiles().put(fileId2, file2SampleIds); metadata3 = new VariantFileMetadata(fileId3.toString(), getFileName(fileId3)).toVariantStudyMetadata(studyId2.toString()); studyConfiguration2.getSampleIds().put("NA00001.X", 5); studyConfiguration2.getSampleIds().put("NA00002.X", 6);
/** * Reads the VariantSource from a Variant file given an initialized VariantReader * * @param reader Initialized variant reader * @param metadata Optional metadata to fill up * @return The read variant metadata * @throws IOException if an I/O error occurs */ public static VariantFileMetadata readVariantFileMetadata(VariantReader reader, VariantFileMetadata metadata) throws IOException { Objects.requireNonNull(reader); if (metadata == null) { metadata = new VariantFileMetadata("", ""); } try { reader.open(); reader.pre(); metadata.setHeader(reader.getVariantFileMetadata().getHeader()); metadata.setSampleIds(reader.getVariantFileMetadata().getSampleIds()); metadata.setStats(reader.getVariantFileMetadata().getStats()); reader.post(); } finally { reader.close(); } return metadata; }
protected List<Variant> readVariants(StudyConfiguration sc, String fileName, Integer fileId, String sampleSufix) { FullVcfCodec codec = new FullVcfCodec(); LineIterator lineIterator = codec.makeSourceFromStream(getClass().getResourceAsStream(fileName)); VCFHeader header = (VCFHeader) codec.readActualHeader(lineIterator); VariantNormalizer normalizer = new VariantNormalizer().configure(header); VariantFileMetadata file = new VariantFileMetadata(fileId.toString(), "file"); VariantStudyMetadata studyMetadata = file.toVariantStudyMetadata(String.valueOf(sc.getStudyId())); VariantVcfHtsjdkReader reader = new VariantVcfHtsjdkReader(getClass().getResourceAsStream(fileName), studyMetadata, normalizer); reader.open(); reader.pre(); List<Variant> variants = reader.read(1000000); reader.post(); reader.close(); sc.getAttributes().append(DEFAULT_GENOTYPE.key(), defaultGenotype); LinkedHashSet<Integer> sampleIds = new LinkedHashSet<>(); LinkedHashMap<String, Integer> samplesPosition = new LinkedHashMap<>(); for (String sample : file.getSampleIds()) { sample = sample + sampleSufix; sc.getSampleIds().putIfAbsent(sample, sc.getSampleIds().size() + 1); sampleIds.add(sc.getSampleIds().get(sample)); samplesPosition.put(sample, samplesPosition.size()); } sc.getFileIds().put(getFileName(fileId), fileId); sc.getSamplesInFiles().put(fileId, sampleIds); for (Variant variant : variants) { variant.getStudies().get(0).setSortedSamplesPosition(samplesPosition); } return variants; }
@Override public QueryResult updateStats(VariantSourceStats variantSourceStats, StudyConfiguration studyConfiguration, QueryOptions queryOptions) { MongoDBCollection coll = db.getCollection(collectionName); VariantFileMetadata source = new VariantFileMetadata("", ""); source.setStats(variantSourceStats.getFileStats()); Document globalStats = variantFileMetadataConverter.convertToStorageType("", source).get("stats", Document.class); Bson query = parseQuery(new Query(VariantFileMetadataQueryParam.STUDY_ID.key(), variantSourceStats.getStudyId()) .append(VariantFileMetadataQueryParam.FILE_ID.key(), variantSourceStats.getFileId())); Bson update = Updates.set("stats", globalStats); return coll.update(query, update, null); }
private void stageVariants(StudyConfiguration study, int fileId, List<Variant> variants) throws Exception { String archiveTableName = engine.getArchiveTableName(study.getStudyId()); ArchiveTableHelper.createArchiveTableIfNeeded(dbAdaptor.getGenomeHelper(), archiveTableName); // Create empty VariantFileMetadata VariantFileMetadata fileMetadata = new VariantFileMetadata(String.valueOf(fileId), String.valueOf(fileId)); fileMetadata.setSampleIds(variants.get(0).getStudies().get(0).getOrderedSamplesName()); dbAdaptor.getStudyConfigurationManager().updateVariantFileMetadata(String.valueOf(study.getStudyId()), fileMetadata); // Create dummy reader VariantSliceReader reader = getVariantSliceReader(variants, study.getStudyId(), fileId); // Task supplier Supplier<ParallelTaskRunner.Task<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice>> taskSupplier = () -> { VariantToVcfSliceConverter converter = new VariantToVcfSliceConverter(); return list -> { System.out.println("list.size() = " + list.size()); List<VcfSliceProtos.VcfSlice> vcfSlice = new ArrayList<>(list.size()); for (ImmutablePair<Long, List<Variant>> pair : list) { vcfSlice.add(converter.convert(pair.getRight(), pair.getLeft().intValue())); } return vcfSlice; }; }; // Writer VariantHBaseArchiveDataWriter writer = new VariantHBaseArchiveDataWriter(dbAdaptor.getArchiveHelper(study.getStudyId(), fileId), archiveTableName, dbAdaptor.getHBaseManager()); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, taskSupplier, writer, config); // Execute stage System.out.println("Stage start!"); ptr.run(); System.out.println("Stage finished!"); }
new VariantFileMetadata(fileId, "").toVariantStudyMetadata(String.valueOf(studyConfiguration.getStudyId())));
VariantFileMetadata fileMetadata = new VariantFileMetadata(String.valueOf(fileId), String.valueOf(fileId)); fileMetadata.setSampleIds(variants.get(0).getStudies().get(0).getOrderedSamplesName()); dbAdaptor.getStudyConfigurationManager().updateVariantFileMetadata(String.valueOf(sc.getStudyId()), fileMetadata);