/** * Print to the standard output a summary of the variant metadata manager. * * @throws IOException IOException */ public void printSummary() { StringBuilder res = new StringBuilder(); res.append("Num. studies: ").append(variantMetadata.getStudies().size()).append("\n"); int counter, studyCounter = 0; for (VariantStudyMetadata study: variantMetadata.getStudies()) { studyCounter++; res.append("\tStudy #").append(studyCounter).append(": ").append(study.getId()).append("\n"); res.append("\tNum. files: ").append(study.getFiles().size()).append("\n"); counter = 0; for (VariantFileMetadata file: study.getFiles()) { counter++; res.append("\t\tFile #").append(counter).append(": ").append(file.getId()); res.append(" (").append(file.getSampleIds().size()).append(" samples)\n"); } res.append("\tNum. cohorts: ").append(study.getCohorts().size()).append("\n"); counter = 0; for (Cohort cohort: study.getCohorts()) { counter++; res.append("\t\tCohort #").append(counter).append(": ").append(cohort.getId()); res.append(" (").append(cohort.getSampleIds().size()).append(" samples)\n"); } } System.out.println(res.toString()); }
/** * Retrieve the variant study metadata from its study ID. * * @param studyId Study ID * @return VariantStudyMetadata object */ public VariantStudyMetadata getVariantStudyMetadata(String studyId) { if (studyId != null) { if (variantMetadata.getStudies() == null) { variantMetadata.setStudies(new ArrayList<>()); } for (VariantStudyMetadata study : variantMetadata.getStudies()) { if (studyId.equals(study.getId())) { return study; } } } else { logger.error("Study ID is null"); } return null; }
/** * Remove a variant study metadata (from study ID). * * @param studyId Study ID */ public void removeVariantStudyMetadata(String studyId) { // Sanity check if (StringUtils.isEmpty(studyId)) { logger.error("Variant study metadata ID {} is null or empty.", studyId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Dataset not found. Check your study ID: '{}'", studyId); return; } for (int i = 0; i < variantMetadata.getStudies().size(); i++) { if (studyId.equals(variantMetadata.getStudies().get(i).getId())) { variantMetadata.getStudies().remove(i); return; } } }
public void open(OutputStream os, Options... writerOptions) { variantConverter = new VariantAvroToVariantContextConverter(metadata.getId(), sampleNames, formats, annotations); // create the variant context writer outputStream = Objects.requireNonNull(os); writer = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), writerOptions); // write VCF header writer.writeHeader(vcfHeader); }
@Override public boolean pre() { String study = metadata.getStudies().get(0).getId(); List<String> samples = metadata.getStudies().get(0).getIndividuals().stream() .flatMap(individual -> individual.getSamples().stream()).map(Sample::getId).collect(Collectors.toList()); VCFHeader vcfHeader = new VariantStudyMetadataToVCFHeaderConverter().convert(metadata.getStudies().get(0), annotations); variantContextWriter = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), Options.ALLOW_MISSING_FIELDS_IN_HEADER); variantContextWriter.writeHeader(vcfHeader); converter = newConverter(study, samples, annotations); return true; }
/** * Add a variant study metadata. Study ID must not exist. * * @param variantStudyMetadata Variant study metadata to add */ public void addVariantDatasetMetadata(VariantStudyMetadata variantStudyMetadata) { if (variantStudyMetadata != null) { VariantStudyMetadata found = getVariantStudyMetadata(variantStudyMetadata.getId()); // if there is not any study with that ID then we add the new one // TODO we need to think what to do when it exists, should we throw an exception? if (found == null) { if (variantMetadata.getStudies() == null) { variantMetadata.setStudies(new ArrayList<>()); } variantMetadata.getStudies().add(variantStudyMetadata); } else { logger.error("Study ID already exists"); } } }
public VcfSliceToVariantListConverter(VariantStudyMetadata metadata) { this(VariantFileMetadata.getSamplesPositionMap(metadata.getFiles().get(0).getSampleIds()), metadata.getFiles().get(0).getId(), metadata.getId()); }
.map(a -> new AlternateCoordinate(chromosome, null, null, null, a, null)) .collect(Collectors.toList()); StudyEntry entry = new StudyEntry(metadata.getId(), secondaryAlternatesMap, Arrays.asList(format.split(":"))); VariantFileMetadata fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); entry.setFileId(fileMetadata.getId());
Variant variant = iterator.next(); List<List<String>> sampleData = variant.getStudiesMap().get(studyMetadata.getId()).getSamplesData(); assert(sampleData.size() == sampleNames.size()); for (int i = 0; i < sampleData.size(); i++) {
/** * Calculate global statistics for the whole study. i.e. cohort ALL * @param metadata VariantStudyMetadata */ public VariantSetStatsCalculator(VariantStudyMetadata metadata) { this.studyId = metadata.getId(); this.metadata = metadata; files = metadata.getFiles() .stream() .map(org.opencb.biodata.models.variant.metadata.VariantFileMetadata::getId) .collect(Collectors.toSet()); samples = metadata.getFiles() .stream() .flatMap(fileMetadata -> fileMetadata.getSampleIds().stream()) .collect(Collectors.toSet()); header = metadata.getAggregatedHeader(); stats = new VariantSetStats(); if (metadata.getStats() == null) { metadata.setStats(new VariantStudyStats(new HashMap<>(), new HashMap<>())); } if (metadata.getStats().getCohortStats() == null) { metadata.getStats().setCohortStats(new HashMap<>()); } metadata.getStats().getCohortStats().put(StudyEntry.DEFAULT_COHORT, stats.getImpl()); }
try { for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { String studyId = studyMetadata.getId(); fillStudy(studyId, studyMetadata);
protected VariantMetadata makeVariantMetadata(List<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles, QueryOptions queryOptions) throws StorageEngineException { VariantMetadata metadata = new VariantMetadataConverter() .toVariantMetadata(studyConfigurations, projectMetadata, returnedSamples, returnedFiles); Map<String, StudyConfiguration> studyConfigurationMap = studyConfigurations.stream() .collect(Collectors.toMap(StudyConfiguration::getStudyName, Function.identity())); for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { StudyConfiguration studyConfiguration = studyConfigurationMap.get(studyMetadata.getId()); List<Integer> fileIds = studyMetadata.getFiles().stream() .map(fileMetadata -> { Integer fileId = studyConfiguration.getFileIds().get(fileMetadata.getId()); if (fileId == null) { fileId = studyConfiguration.getFileIds().get(fileMetadata.getPath()); } return fileId; }).collect(Collectors.toList()); if (fileIds != null && !fileIds.isEmpty()) { Query query = new Query() .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.STUDY_ID.key(), studyConfiguration.getStudyId()) .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.FILE_ID.key(), fileIds); scm.variantFileMetadataIterator(query, new QueryOptions()).forEachRemaining(fileMetadata -> { studyMetadata.getFiles().removeIf(file -> file.getId().equals(fileMetadata.getId())); studyMetadata.getFiles().add(fileMetadata.getImpl()); }); } } return metadata; }
public MongoDBVariantWriteResult loadFile2(String chromosome, Integer fileId, List<String> chromosomes) throws StorageEngineException { studyConfiguration2.getFileIds().putIfAbsent(getFileName(fileId), fileId); studyConfiguration2.getSamplesInFiles().putIfAbsent(fileId, file2SampleIds); System.out.println("chromosome = " + chromosome); System.out.println("fileId = " + fileId); System.out.println("samples = " + file2SampleIds.stream().map(i -> studyConfiguration2.getSampleIds().inverse().get(i)).collect(Collectors.toList()) + " : " + file2SampleIds); return loadFile(studyConfiguration2, createFile2Variants(chromosome, fileId.toString(), metadata2.getId()), fileId, chromosomes); }
public MongoDBVariantWriteResult loadFile3(String chromosome, Integer fileId, List<String> chromosomes) throws StorageEngineException { studyConfiguration2.getFileIds().putIfAbsent(getFileName(fileId), fileId); studyConfiguration2.getSamplesInFiles().putIfAbsent(fileId, file3SampleIds); System.out.println("chromosome = " + chromosome); System.out.println("fileId = " + fileId); System.out.println("samples = " + file3SampleIds.stream().map(i -> studyConfiguration2.getSampleIds().inverse().get(i)).collect(Collectors.toList()) + " : " + file3SampleIds); return loadFile(studyConfiguration2, createFile3Variants(chromosome, fileId.toString(), metadata3.getId()), fileId, chromosomes); }
public List<Variant> createFile1Variants() { return createFile1Variants("X", metadata1.getFiles().get(0).getId(), metadata1.getId()); } public List<Variant> createFile2Variants() {
public List<Variant> createFile2Variants() { return createFile2Variants("X", metadata2.getFiles().get(0).getId(), metadata2.getId()); } public List<Variant> createFile3Variants() {
public List<Variant> createFile3Variants() { return createFile3Variants("X", metadata3.getFiles().get(0).getId(), metadata3.getId()); }
public List<StudyConfiguration> toStudyConfigurations(VariantMetadata variantMetadata) { List<StudyConfiguration> studyConfigurations = new ArrayList<>(variantMetadata.getStudies().size()); int id = 1; VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(variantMetadata); for (VariantStudyMetadata studyMetadata : variantMetadata.getStudies()) { StudyConfiguration sc = new StudyConfiguration(id++, studyMetadata.getId()); studyConfigurations.add(sc); List<Sample> samples = metadataManager.getSamples(studyMetadata.getId()); for (Sample sample : samples) { sc.getSampleIds().put(sample.getId(), id++); } for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { int fileId = id++; sc.getIndexedFiles().add(fileId); sc.getFileIds().put(fileMetadata.getPath(), fileId); List<Integer> sampleIds = toSampleIds(sc, fileMetadata.getSampleIds()); sc.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds)); } for (Cohort cohort : studyMetadata.getCohorts()) { int cohortId = id++; sc.getCohortIds().put(cohort.getId(), cohortId); sc.getCalculatedStats().add(cohortId); sc.getCohorts().put(cohortId, new HashSet<>(toSampleIds(sc, cohort.getSampleIds()))); } sc.setVariantHeader(studyMetadata.getAggregatedHeader()); sc.setAggregation(studyMetadata.getAggregation()); studyMetadata.getAttributes().forEach(sc.getAttributes()::put); } return studyConfigurations; }
converter = new VariantContextToVariantConverter(metadata.getId(), fileMetadata.getId(), header.getSampleNamesInOrder()); fileMetadata.setHeader(new VCFHeaderToVariantFileHeaderConverter().convert(header)); fileMetadata.setSampleIds(header.getSampleNamesInOrder());