@Override public boolean pre() { String study = metadata.getStudies().get(0).getId(); List<String> samples = metadata.getStudies().get(0).getIndividuals().stream() .flatMap(individual -> individual.getSamples().stream()).map(Sample::getId).collect(Collectors.toList()); VCFHeader vcfHeader = new VariantStudyMetadataToVCFHeaderConverter().convert(metadata.getStudies().get(0), annotations); variantContextWriter = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), Options.ALLOW_MISSING_FIELDS_IN_HEADER); variantContextWriter.writeHeader(vcfHeader); converter = newConverter(study, samples, annotations); return true; }
/** * Calculate global statistics for the whole study. i.e. cohort ALL * @param metadata VariantStudyMetadata */ public VariantSetStatsCalculator(VariantStudyMetadata metadata) { this.studyId = metadata.getId(); this.metadata = metadata; files = metadata.getFiles() .stream() .map(org.opencb.biodata.models.variant.metadata.VariantFileMetadata::getId) .collect(Collectors.toSet()); samples = metadata.getFiles() .stream() .flatMap(fileMetadata -> fileMetadata.getSampleIds().stream()) .collect(Collectors.toSet()); header = metadata.getAggregatedHeader(); stats = new VariantSetStats(); if (metadata.getStats() == null) { metadata.setStats(new VariantStudyStats(new HashMap<>(), new HashMap<>())); } if (metadata.getStats().getCohortStats() == null) { metadata.getStats().setCohortStats(new HashMap<>()); } metadata.getStats().getCohortStats().put(StudyEntry.DEFAULT_COHORT, stats.getImpl()); }
/** * Print to the standard output a summary of the variant metadata manager. * * @throws IOException IOException */ public void printSummary() { StringBuilder res = new StringBuilder(); res.append("Num. studies: ").append(variantMetadata.getStudies().size()).append("\n"); int counter, studyCounter = 0; for (VariantStudyMetadata study: variantMetadata.getStudies()) { studyCounter++; res.append("\tStudy #").append(studyCounter).append(": ").append(study.getId()).append("\n"); res.append("\tNum. files: ").append(study.getFiles().size()).append("\n"); counter = 0; for (VariantFileMetadata file: study.getFiles()) { counter++; res.append("\t\tFile #").append(counter).append(": ").append(file.getId()); res.append(" (").append(file.getSampleIds().size()).append(" samples)\n"); } res.append("\tNum. cohorts: ").append(study.getCohorts().size()).append("\n"); counter = 0; for (Cohort cohort: study.getCohorts()) { counter++; res.append("\t\tCohort #").append(counter).append(": ").append(cohort.getId()); res.append(" (").append(cohort.getSampleIds().size()).append(" samples)\n"); } } System.out.println(res.toString()); }
public List<StudyConfiguration> toStudyConfigurations(VariantMetadata variantMetadata) { List<StudyConfiguration> studyConfigurations = new ArrayList<>(variantMetadata.getStudies().size()); int id = 1; VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(variantMetadata); for (VariantStudyMetadata studyMetadata : variantMetadata.getStudies()) { StudyConfiguration sc = new StudyConfiguration(id++, studyMetadata.getId()); studyConfigurations.add(sc); List<Sample> samples = metadataManager.getSamples(studyMetadata.getId()); for (Sample sample : samples) { sc.getSampleIds().put(sample.getId(), id++); } for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { int fileId = id++; sc.getIndexedFiles().add(fileId); sc.getFileIds().put(fileMetadata.getPath(), fileId); List<Integer> sampleIds = toSampleIds(sc, fileMetadata.getSampleIds()); sc.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds)); } for (Cohort cohort : studyMetadata.getCohorts()) { int cohortId = id++; sc.getCohortIds().put(cohort.getId(), cohortId); sc.getCalculatedStats().add(cohortId); sc.getCohorts().put(cohortId, new HashSet<>(toSampleIds(sc, cohort.getSampleIds()))); } sc.setVariantHeader(studyMetadata.getAggregatedHeader()); sc.setAggregation(studyMetadata.getAggregation()); studyMetadata.getAttributes().forEach(sc.getAttributes()::put); } return studyConfigurations; }
public VariantVcfHtsjdkReader(Path input, VariantStudyMetadata metadata, VariantNormalizer normalizer) { this.input = Objects.requireNonNull(input); this.inputStream = null; this.metadata = Objects.requireNonNull(metadata); this.fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); this.normalizer = normalizer; this.closeInputStream = true; // Close input stream }
public VCFHeader convert(VariantStudyMetadata variantStudyMetadata, List<String> annotations) { VariantFileHeader header = variantStudyMetadata.getAggregatedHeader(); if (header == null) { if (variantStudyMetadata.getFiles() != null && variantStudyMetadata.getFiles().size() == 1) { header = variantStudyMetadata.getFiles().get(0).getHeader(); for (Individual individual : variantStudyMetadata.getIndividuals()) { for (Sample sample : individual.getSamples()) { samples.add(sample.getId()); vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(VCFConstants.PASSES_FILTERS_v4, "All filters passed")); for (Cohort cohort : variantStudyMetadata.getCohorts()) { String cohortName = cohort.getId(); if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) {
/** * Remove a variant study metadata (from study ID). * * @param studyId Study ID */ public void removeVariantStudyMetadata(String studyId) { // Sanity check if (StringUtils.isEmpty(studyId)) { logger.error("Variant study metadata ID {} is null or empty.", studyId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Dataset not found. Check your study ID: '{}'", studyId); return; } for (int i = 0; i < variantMetadata.getStudies().size(); i++) { if (studyId.equals(variantMetadata.getStudies().get(i).getId())) { variantMetadata.getStudies().remove(i); return; } } }
for (Cohort cohort : variantStudyMetadata.getCohorts()) { cohort.setSampleIds(Collections.emptyList()); try { for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { String studyId = studyMetadata.getId(); fillStudy(studyId, studyMetadata); Iterator<org.opencb.biodata.models.metadata.Individual> iterator = studyMetadata.getIndividuals().iterator(); while (iterator.hasNext()) { org.opencb.biodata.models.metadata.Individual individual = iterator.next();
return; if (variantStudyMetadata.getFiles() == null) { variantStudyMetadata.setFiles(new ArrayList<>()); for (VariantFileMetadata file: variantStudyMetadata.getFiles()) { if (file.getId() != null && file.getId().equals(fileMetadata.getId())) { logger.error("Variant file metadata with id '{}' already exists in study '{}'", fileMetadata.getId(), if (variantStudyMetadata.getIndividuals() == null) { variantStudyMetadata.setIndividuals(new ArrayList<>()); if (!variantStudyMetadata.getIndividuals().isEmpty()) { for (org.opencb.biodata.models.metadata.Individual individual: variantStudyMetadata.getIndividuals()) { for (Sample sample: individual.getSamples()) { if (sampleId.equals(sample.getId())) { individual.setSamples(samples); variantStudyMetadata.getIndividuals().add(individual); variantStudyMetadata.getFiles().add(fileMetadata);
/** * Remove an individual (from individual ID) of a given variant study metadata (from study ID). * * @param individualId Individual ID * @param studyId Study ID */ public void removeIndividual(String individualId, String studyId) { // Sanity check if (StringUtils.isEmpty(individualId)) { logger.error("Individual ID {} is null or empty.", individualId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Study not found. Check your study ID: '{}'", studyId); return; } if (variantStudyMetadata.getIndividuals() != null) { for (int i = 0; i < variantStudyMetadata.getIndividuals().size(); i++) { if (individualId.equals(variantStudyMetadata.getIndividuals().get(i).getId())) { variantStudyMetadata.getIndividuals().remove(i); return; } } } }
/** * Add an individual to a given variant study metadata (from study ID). * * @param individual Individual to add * @param studyId Study ID */ public void addIndividual(org.opencb.biodata.models.metadata.Individual individual, String studyId) { // Sanity check if (individual == null || StringUtils.isEmpty(individual.getId())) { logger.error("Individual (or its ID) is null or empty."); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Study not found. Check your study ID: '{}'", studyId); return; } if (variantStudyMetadata.getIndividuals() == null) { variantStudyMetadata.setIndividuals(new ArrayList<>()); } for (org.opencb.biodata.models.metadata.Individual indi: variantStudyMetadata.getIndividuals()) { if (indi.getId() != null && indi.getId().equals(individual.getId())) { logger.error("Individual with id '{}' already exists in study '{}'", individual.getId(), studyId); return; } } variantStudyMetadata.getIndividuals().add(individual); }
/** * Add a cohort to a given variant study metadata (from study ID). * * @param cohort Cohort to add * @param studyId Study ID */ public void addCohort(Cohort cohort, String studyId) { // Sanity check if (cohort == null || StringUtils.isEmpty(cohort.getId())) { logger.error("Cohort (or its ID) is null or empty."); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Study not found. Check your study ID: '{}'", studyId); return; } if (variantStudyMetadata.getCohorts() == null) { variantStudyMetadata.setCohorts(new ArrayList<>()); } for (Cohort coho: variantStudyMetadata.getCohorts()) { if (coho.getId() != null && coho.getId().equals(cohort.getId())) { logger.error("Cohort with id '{}' already exists in study '{}'", cohort.getId(), studyId); return; } } variantStudyMetadata.getCohorts().add(cohort); }
public VariantStudyMetadata toVariantStudyMetadata(String studyId) { return VariantStudyMetadata.newBuilder() .setId(studyId) .setFiles(Collections.singletonList(getImpl())) .setSampleSetType(SampleSetType.UNKNOWN) .build(); }
/** * Remove a cohort (from cohort ID) of a given variant study metadata (from study ID). * * @param cohortId Cohort ID * @param studyId Study ID */ public void removeCohort(String cohortId, String studyId) { // Sanity check if (StringUtils.isEmpty(cohortId)) { logger.error("Cohort ID {} is null or empty.", cohortId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Study not found. Check your study ID: '{}'", studyId); return; } if (variantStudyMetadata.getCohorts() != null) { for (int i = 0; i < variantStudyMetadata.getCohorts().size(); i++) { if (cohortId.equals(variantStudyMetadata.getCohorts().get(i).getId())) { variantStudyMetadata.getCohorts().remove(i); return; } } } }
public AbstractVariantReader(Path metadataPath, VariantStudyMetadata metadata) { this.metadataPath = metadataPath; this.samplesPositions = null; if (metadata.getFiles().isEmpty()) { fileMetadata = new VariantFileMetadata("", ""); metadata.getFiles().add(fileMetadata.getImpl()); } else { fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); } }
/** * Retrieve the variant study metadata from its study ID. * * @param studyId Study ID * @return VariantStudyMetadata object */ public VariantStudyMetadata getVariantStudyMetadata(String studyId) { if (studyId != null) { if (variantMetadata.getStudies() == null) { variantMetadata.setStudies(new ArrayList<>()); } for (VariantStudyMetadata study : variantMetadata.getStudies()) { if (studyId.equals(study.getId())) { return study; } } } else { logger.error("Study ID is null"); } return null; }
private Individual getIndividualBySampleName(String sampleName, VariantStudyMetadata studyMetadata) { for (Individual individual: studyMetadata.getIndividuals()) { for (Sample sample: individual.getSamples()) { if (sampleName.equals(sample.getId())) { return individual; } } } return null; } }
headerSimpleLines); return VariantStudyMetadata.newBuilder() .setId(studyConfiguration.getStudyName()) .setDescription(null)