public VariantVcfHtsjdkReader(Path input, VariantStudyMetadata metadata, VariantNormalizer normalizer) { this.input = Objects.requireNonNull(input); this.inputStream = null; this.metadata = Objects.requireNonNull(metadata); this.fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); this.normalizer = normalizer; this.closeInputStream = true; // Close input stream }
public VariantVcfHtsjdkReader(InputStream inputStream, VariantStudyMetadata metadata, VariantNormalizer normalizer) { this.input = null; this.inputStream = Objects.requireNonNull(inputStream); this.metadata = Objects.requireNonNull(metadata); this.fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); this.normalizer = normalizer; this.closeInputStream = false; // Do not close input stream }
public AbstractVariantReader(Path metadataPath, VariantStudyMetadata metadata) { this.metadataPath = metadataPath; this.samplesPositions = null; if (metadata.getFiles().isEmpty()) { fileMetadata = new VariantFileMetadata("", ""); metadata.getFiles().add(fileMetadata.getImpl()); } else { fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); } }
@Override public VariantFileMetadata getVariantFileMetadata() { return new VariantFileMetadata(metadata.getFiles().get(0)); }
/** * Remove a variant file metadata (from file ID) of a given variant study metadata (from study ID). * * @param fileId File ID * @param studyId Study ID */ public void removeFile(String fileId, String studyId) { // Sanity check if (StringUtils.isEmpty(fileId)) { logger.error("Variant file metadata ID {} is null or empty.", fileId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Study not found. Check your study ID: '{}'", studyId); return; } if (variantStudyMetadata.getFiles() != null) { for (int i = 0; i < variantStudyMetadata.getFiles().size(); i++) { if (fileId.equals(variantStudyMetadata.getFiles().get(i).getId())) { variantStudyMetadata.getFiles().remove(i); return; } } } }
/** * Print to the standard output a summary of the variant metadata manager. * * @throws IOException IOException */ public void printSummary() { StringBuilder res = new StringBuilder(); res.append("Num. studies: ").append(variantMetadata.getStudies().size()).append("\n"); int counter, studyCounter = 0; for (VariantStudyMetadata study: variantMetadata.getStudies()) { studyCounter++; res.append("\tStudy #").append(studyCounter).append(": ").append(study.getId()).append("\n"); res.append("\tNum. files: ").append(study.getFiles().size()).append("\n"); counter = 0; for (VariantFileMetadata file: study.getFiles()) { counter++; res.append("\t\tFile #").append(counter).append(": ").append(file.getId()); res.append(" (").append(file.getSampleIds().size()).append(" samples)\n"); } res.append("\tNum. cohorts: ").append(study.getCohorts().size()).append("\n"); counter = 0; for (Cohort cohort: study.getCohorts()) { counter++; res.append("\t\tCohort #").append(counter).append(": ").append(cohort.getId()); res.append(" (").append(cohort.getSampleIds().size()).append(" samples)\n"); } } System.out.println(res.toString()); }
@Override public boolean pre() { try { processHeader(); // Copy all the read metadata to the VariantSource object // TODO May it be that Vcf4 wasn't necessary anymore? // This Vcf4 object is not necessary anymore. Do not include it's information. // The header parser contains bugs and misses information. // Use htsjdk parser instead // source.addMetadata("fileformat", vcf4.getFileFormat()); // source.addMetadata("INFO", vcf4.getInfo().values()); // source.addMetadata("FILTER", vcf4.getFilter().values()); // source.addMetadata("FORMAT", vcf4.getFormat().values()); // for (Map.Entry<String, String> otherMeta : vcf4.getMetaInformation().entrySet()) { // source.addMetadata(otherMeta.getKey(), otherMeta.getValue()); // } metadata.getFiles().get(0).setSampleIds(vcf4.getSampleNames()); } catch (IOException | FileFormatException ex) { Logger.getLogger(VariantVcfReader.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; }
/** * Calculate global statistics for the whole study. i.e. cohort ALL * @param metadata VariantStudyMetadata */ public VariantSetStatsCalculator(VariantStudyMetadata metadata) { this.studyId = metadata.getId(); this.metadata = metadata; files = metadata.getFiles() .stream() .map(org.opencb.biodata.models.variant.metadata.VariantFileMetadata::getId) .collect(Collectors.toSet()); samples = metadata.getFiles() .stream() .flatMap(fileMetadata -> fileMetadata.getSampleIds().stream()) .collect(Collectors.toSet()); header = metadata.getAggregatedHeader(); stats = new VariantSetStats(); if (metadata.getStats() == null) { metadata.setStats(new VariantStudyStats(new HashMap<>(), new HashMap<>())); } if (metadata.getStats().getCohortStats() == null) { metadata.getStats().setCohortStats(new HashMap<>()); } metadata.getStats().getCohortStats().put(StudyEntry.DEFAULT_COHORT, stats.getImpl()); }
.collect(Collectors.toList()); StudyEntry entry = new StudyEntry(metadata.getId(), secondaryAlternatesMap, Arrays.asList(format.split(":"))); VariantFileMetadata fileMetadata = new VariantFileMetadata(metadata.getFiles().get(0)); entry.setFileId(fileMetadata.getId()); variant.addStudyEntry(entry);
protected VariantMetadata makeVariantMetadata(List<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles, QueryOptions queryOptions) throws StorageEngineException { VariantMetadata metadata = new VariantMetadataConverter() .toVariantMetadata(studyConfigurations, projectMetadata, returnedSamples, returnedFiles); Map<String, StudyConfiguration> studyConfigurationMap = studyConfigurations.stream() .collect(Collectors.toMap(StudyConfiguration::getStudyName, Function.identity())); for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { StudyConfiguration studyConfiguration = studyConfigurationMap.get(studyMetadata.getId()); List<Integer> fileIds = studyMetadata.getFiles().stream() .map(fileMetadata -> { Integer fileId = studyConfiguration.getFileIds().get(fileMetadata.getId()); if (fileId == null) { fileId = studyConfiguration.getFileIds().get(fileMetadata.getPath()); } return fileId; }).collect(Collectors.toList()); if (fileIds != null && !fileIds.isEmpty()) { Query query = new Query() .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.STUDY_ID.key(), studyConfiguration.getStudyId()) .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.FILE_ID.key(), fileIds); scm.variantFileMetadataIterator(query, new QueryOptions()).forEachRemaining(fileMetadata -> { studyMetadata.getFiles().removeIf(file -> file.getId().equals(fileMetadata.getId())); studyMetadata.getFiles().add(fileMetadata.getImpl()); }); } } return metadata; }
public MongoDBVariantWriteResult loadFile1() throws StorageEngineException { return loadFile1("X", Integer.parseInt(metadata1.getFiles().get(0).getId()), Collections.emptyList()); }
public MongoDBVariantWriteResult loadFile2() throws StorageEngineException { return loadFile2("X", Integer.parseInt(metadata2.getFiles().get(0).getId()), Collections.emptyList()); }
public MongoDBVariantWriteResult loadFile3() throws StorageEngineException { return loadFile3("X", Integer.parseInt(metadata3.getFiles().get(0).getId()), Collections.emptyList()); }
VariantToDocumentConverter(List<StudyConfiguration> studies, VariantMetadata metadata, ProgressLogger progressLogger) { DocumentToSamplesConverter samplesConverter = new DocumentToSamplesConverter(studies); DocumentToStudyVariantEntryConverter studyConverter = new DocumentToStudyVariantEntryConverter(false, samplesConverter); DocumentToVariantStatsConverter statsConverter = new DocumentToVariantStatsConverter(studies); variantConverter = new DocumentToVariantConverter(studyConverter, statsConverter); this.studiesIdRemap = new HashMap<>(); this.fileIdRemap = new HashMap<>(); VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(metadata); studies.forEach((sc) -> { VariantStudyMetadata studyMetadata = metadataManager.getVariantStudyMetadata(sc.getStudyName()); this.studiesIdRemap.put(sc.getStudyName(), String.valueOf(sc.getStudyId())); sc.getFileIds().forEach((name, id) -> fileIdRemap.put(name, String.valueOf(id))); for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { String id = fileIdRemap.get(fileMetadata.getPath()); if (id != null) { fileIdRemap.put(fileMetadata.getId(), id); } } }); this.progressLogger = progressLogger; }
public List<Variant> createFile1Variants() { return createFile1Variants("X", metadata1.getFiles().get(0).getId(), metadata1.getId()); } public List<Variant> createFile2Variants() {
public List<Variant> createFile2Variants() { return createFile2Variants("X", metadata2.getFiles().get(0).getId(), metadata2.getId()); } public List<Variant> createFile3Variants() {
public List<Variant> createFile3Variants() { return createFile3Variants("X", metadata3.getFiles().get(0).getId(), metadata3.getId()); }
public VCFHeader convert(VariantStudyMetadata variantStudyMetadata, List<String> annotations) { VariantFileHeader header = variantStudyMetadata.getAggregatedHeader(); if (header == null) { if (variantStudyMetadata.getFiles() != null && variantStudyMetadata.getFiles().size() == 1) { header = variantStudyMetadata.getFiles().get(0).getHeader();
public List<StudyConfiguration> toStudyConfigurations(VariantMetadata variantMetadata) { List<StudyConfiguration> studyConfigurations = new ArrayList<>(variantMetadata.getStudies().size()); int id = 1; VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(variantMetadata); for (VariantStudyMetadata studyMetadata : variantMetadata.getStudies()) { StudyConfiguration sc = new StudyConfiguration(id++, studyMetadata.getId()); studyConfigurations.add(sc); List<Sample> samples = metadataManager.getSamples(studyMetadata.getId()); for (Sample sample : samples) { sc.getSampleIds().put(sample.getId(), id++); } for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { int fileId = id++; sc.getIndexedFiles().add(fileId); sc.getFileIds().put(fileMetadata.getPath(), fileId); List<Integer> sampleIds = toSampleIds(sc, fileMetadata.getSampleIds()); sc.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds)); } for (Cohort cohort : studyMetadata.getCohorts()) { int cohortId = id++; sc.getCohortIds().put(cohort.getId(), cohortId); sc.getCalculatedStats().add(cohortId); sc.getCohorts().put(cohortId, new HashSet<>(toSampleIds(sc, cohort.getSampleIds()))); } sc.setVariantHeader(studyMetadata.getAggregatedHeader()); sc.setAggregation(studyMetadata.getAggregation()); studyMetadata.getAttributes().forEach(sc.getAttributes()::put); } return studyConfigurations; }