StudyEntry se = new StudyEntry(); se.setStudyId(vse.getStudyId()); if (null != vse.getSamplesPosition()) { se.setSamplesPosition(new HashMap<>(vse.getSamplesPosition())); } else { se.setSamplesPosition(new HashMap<>()); if (null != vse.getFormat()) { se.setFormat(new ArrayList<>(vse.getFormat())); } else { se.setFormat(new ArrayList<>()); List<FileEntry> files = new ArrayList<>(vse.getFiles().size()); for (FileEntry file : vse.getFiles()) { HashMap<String, String> attributes = new HashMap<>(file.getAttributes()); //TODO: Check file attributes files.add(new FileEntry(file.getFileId(), file.getCall(), attributes)); se.setFiles(files); int samplesSize = vse.getSamplesData().size(); List<List<String>> newSampleData = new ArrayList<>(samplesSize); for (int i = 0; i < samplesSize; i++) { List<String> sd = vse.getSamplesData().get(i); newSampleData.add(new ArrayList<>(sd)); se.setSamplesData(newSampleData);
private boolean allSameTypeAndGT(Collection<Variant> conflicts, VariantType type) { boolean differentType = conflicts.stream().filter(v -> !v.getType().equals(type)).findAny().isPresent(); if (differentType) { return false; } StudyEntry studyEntry = conflicts.stream().findAny().get().getStudies().get(0); String sample = studyEntry.getSamplesName().stream().findFirst().get(); String gt = studyEntry.getSampleData(sample, GENOTYPE_KEY); long count = conflicts.stream().filter(v -> v.getType().equals(type) && StringUtils.equals(gt, v.getStudies().get(0).getSampleData(sample, GENOTYPE_KEY))).count(); return ((int) count) == conflicts.size(); }
private static FileEntry getFileEntry(Variant variant, String studyId, String fileId) { StudyEntry studyEntry = getStudyEntry(variant, studyId); if (studyEntry == null) { return null; } if (fileId == null) { if (studyEntry.getFiles().size() != 1) { throw new IllegalArgumentException("Required one File per variant. Found " + studyEntry.getFiles().size() + " files instead"); } return studyEntry.getFiles().get(0); } else { return studyEntry.getFile(fileId); } } }
public List<String> getSampleData(String sampleName) { requireSamplesPosition(); Integer samplePosition = samplesPosition.get(sampleName); if (samplePosition == null) { return null; } else { return getSampleData(samplePosition); } }
public Document convertToStorageType(Variant variant, StudyEntry studyEntry) { return convertToStorageType(variant, studyEntry, studyEntry.getFiles(), new LinkedHashSet<>(studyEntry.getOrderedSamplesName())); }
Variant normalizedVariant = newVariant(variant, keyFields, sv); if (keyFields.getPhaseSet() != null) { StudyEntry studyEntry = new StudyEntry(); studyEntry.setSamplesData( Collections.singletonList(Collections.singletonList(keyFields.getPhaseSet()))); studyEntry.setFormat(Collections.singletonList("PS")); studyEntry.setFiles(Collections.singletonList(new FileEntry(keyFields.getPhaseSet(), call, null))); normalizedVariant.setStudies(Collections.singletonList(studyEntry)); List<String> originalAlternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); List<String> alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); alternates.add(alternate); originalAlternates.add(alternate); for (String secondaryAlternatesAllele : entry.getSecondaryAlternatesAlleles()) { alternates.add(normalizeNoVariationAlternate(secondaryAlternatesAllele)); originalAlternates.add(secondaryAlternatesAllele); if (entry.getFiles() != null && StringUtils.isNotEmpty(entry.getFiles().get(0).getCall())) { String call = entry.getFiles().get(0).getCall(); entry.getFiles().forEach(fileEntry -> fileEntry.setCall(sameVariant ? null : call)); samplesData = entry.getSamplesData(); } else { normalizedVariant = newVariant(variant, keyFields, sv); normalizedEntry = new StudyEntry(); normalizedEntry.setStudyId(entry.getStudyId()); normalizedEntry.setSamplesPosition(entry.getSamplesPosition());
variant.getReference(), variant.getAlternate()); StudyEntry studyEntry = new StudyEntry(); studyEntry.setFormat(archiveVariant.getStudies().get(0).getFormat()); studyEntry.setSortedSamplesPosition(new LinkedHashMap<>()); studyEntry.setSamplesData(new ArrayList<>()); if (studyEntry.getFormatPositions().containsKey("GT")) { int samplePosition = 0; Integer gtIdx = studyEntry.getFormatPositions().get("GT"); for (String sampleName : studyEntry.getOrderedSamplesName()) { Integer sampleId = studyConfiguration.getSampleIds().get(sampleName); if (missingSamples.contains(sampleId)) { String gt = studyEntry.getSamplesData().get(samplePosition).get(gtIdx);
StudyEntry se = new StudyEntry("1"); se.setFiles(Collections.singletonList(new FileEntry("1", "", new HashMap<>()))); v1.setStudies(Collections.singletonList(se)); se.setFormat(Arrays.asList(GENOTYPE_KEY, GENOTYPE_FILTER_KEY)); se.setSamplesPosition(asMap("S1", 0)); se.setSamplesData(Collections.singletonList(Arrays.asList("1/2", "LowGQXHetDel"))); se.getSecondaryAlternates().add(new AlternateCoordinate(null, null, 328, "CTT", "CTTTC", INDEL)); addAttribute(v1, FILTER, "LowGQXHetDel"); se = new StudyEntry("1"); se.setFiles(Collections.singletonList(new FileEntry("1", "", new HashMap<>()))); v2.setStudies(Collections.singletonList(se)); se.setSamplesPosition(asMap("S1", 0)); se.setFormat(Arrays.asList(GENOTYPE_KEY, GENOTYPE_FILTER_KEY)); se.setSamplesData(Collections.singletonList(Arrays.asList("0/1", "PASS"))); addAttribute(v2, FILTER, "PASS"); System.out.println(); Collection<Variant> resolved = new VariantLocalConflictResolver().resolveConflicts(variants); resolved.forEach(res -> System.out.println("res = " + res.toString() + " call: " + res.getStudies().get(0).getFiles().get(0).getCall()));
public Variant convert(VcfSliceProtos.VcfRecord vcfRecord, String chromosome, int slicePosition) { int start = getStart(vcfRecord, slicePosition); int end = getEnd(vcfRecord, slicePosition); Variant variant = new Variant(chromosome, start, end, vcfRecord.getReference(), vcfRecord.getAlternate()); variant.setType(getVariantType(vcfRecord.getType())); variant.setIds(vcfRecord.getIdNonDefaultList()); variant.resetLength(); FileEntry fileEntry = new FileEntry(); fileEntry.setFileId(fileId); Map<String, String> attributes = getFileAttributes(vcfRecord); fileEntry.setAttributes(attributes); fileEntry.setCall(vcfRecord.getCall().isEmpty() ? null : vcfRecord.getCall()); if (vcfRecord.getType().equals(VariantProto.VariantType.NO_VARIATION)) { attributes.put("END", Integer.toString(end)); } StudyEntry studyEntry = new StudyEntry(studyId); studyEntry.setFiles(Collections.singletonList(fileEntry)); studyEntry.setFormat(getFormat(vcfRecord)); studyEntry.setSamplesData(getSamplesData(vcfRecord, studyEntry.getFormatPositions())); studyEntry.setSamplesPosition(retrieveSamplePosition()); studyEntry.getFormatPositions(); // Initialize the map List<VariantProto.AlternateCoordinate> alts = vcfRecord.getSecondaryAlternatesList(); studyEntry.setSecondaryAlternates(getAlternateCoordinates(alts)); variant.addStudyEntry(studyEntry); studyEntry.getFormatPositions(); // Initialize the map return variant; }
/** * Create an empty Variant (position, ref, alt) from a template with basic Study information without samples. * @param target Variant to take as a template * @return Variant filled with chromosome, start, end, ref, alt, study ID and format set to GT only, BUT no samples. */ public Variant createFromTemplate(Variant target) { Variant var = new Variant(target.getChromosome(), target.getStart(), target.getEnd(), target.getReference(), target.getAlternate()); var.setType(target.getType()); for(StudyEntry tse : target.getStudies()){ StudyEntry se = new StudyEntry(tse.getStudyId()); se.setFiles(Collections.singletonList(new FileEntry("", "", new HashMap<>()))); se.setFormat(Arrays.asList(getGtKey(), getFilterKey())); se.setSamplesPosition(new HashMap<>()); se.setSamplesData(new ArrayList<>()); var.addStudyEntry(se); } return var; }
VariantOverlappingStatus overlappingStatus = REFERENCE; FileEntry fileEntry = archiveVariant.getStudies().get(0).getFiles().get(0); fileEntry.getAttributes().remove(VCFConstants.END_KEY); if (StringUtils.isEmpty(fileEntry.getCall())) { variant.getAlternate()); StudyEntry studyEntry = new StudyEntry(); studyEntry.setFormat(archiveVariant.getStudies().get(0).getFormat()); studyEntry.setSortedSamplesPosition(new LinkedHashMap<>()); studyEntry.setSamplesData(new ArrayList<>()); mergedVariant.addStudyEntry(studyEntry); mergedVariant.setType(variant.getType());
variant.setSv(sv); if (hasStudyId()) { StudyEntry studyEntry = new StudyEntry(studyId); if (fileId != null) { FileEntry fileEntry = new FileEntry(fileId, call, attributes); studyEntry.setFiles(Collections.singletonList(fileEntry)); studyEntry.setFormat(format); if (alternates.size() > 0) { List<AlternateCoordinate> secondaryAlternates = new ArrayList<>(alternates.size() - 1); secondaryAlternates.add(new AlternateCoordinate(chromosome, start, end, reference, alternates.get(i), inferType(reference, alternates.get(i)))); studyEntry.setSecondaryAlternates(secondaryAlternates); studyEntry.setSortedSamplesPosition(samplesPosition); studyEntry.setSamplesData(samplesData); variant.addStudyEntry(studyEntry); } else {
private VariantProto.StudyEntry.Builder toProto(StudyEntry study) { VariantProto.StudyEntry.Builder studyBuilder = VariantProto.StudyEntry.newBuilder(); studyBuilder.setStudyId(study.getStudyId()); set(study::getStudyId, studyBuilder::setStudyId); set(study::getFormat, studyBuilder::addAllFormat); for (List<String> sampleData : study.getSamplesData()) { studyBuilder.addSamplesData(VariantProto.StudyEntry.SamplesDataInfoEntry.newBuilder().addAllInfo(sampleData)); } for (Map.Entry<String, VariantStats> entry : study.getStats().entrySet()) { VariantStats stats = entry.getValue(); VariantProto.VariantStats.Builder variantStats = toProto(stats); studyBuilder.putStats(entry.getKey(), variantStats.build()); } for (FileEntry fileEntry : study.getFiles()) { VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry); studyBuilder.addFiles(fileBuilder); } return studyBuilder; }
/** * @param n * @return studyEntryList */ public List<StudyEntry> getStudies(int n) { int studyID = 2; int fieldID = 3; List<StudyEntry> studyEntryList = new ArrayList<>(); StudyEntry studyEntry = new StudyEntry(); studyEntry.setStudyId(Integer.toString(studyID)); studyEntry.setFileId(Integer.toString(fieldID)); Map<String, String> attributes = genAttributes(); studyEntry.setAttributes(attributes); studyEntry.setFormat(getFormat()); List<List<String>> sampleList = new ArrayList<>(getFormat().size()); for (int i = 0; i < n; i++) { sampleList.add(getRandomample()); } studyEntry.setSamplesData(sampleList); studyEntryList.add(studyEntry); return studyEntryList; }
@Test public void testIncludeAll() { for (Variant variant : allVariants.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNotNull(variant.getAnnotation()); } }
public Put convert(Variant variant, Put put, Set<Integer> sampleIds, VariantOverlappingStatus overlappingStatus) { StudyEntry studyEntry = variant.getStudies().get(0); Integer gtIdx = studyEntry.getFormatPositions().get(VariantMerger.GT_KEY); int[] formatReMap = buildFormatRemap(studyEntry); int sampleIdx = 0; List<String> samplesName = studyEntry.getOrderedSamplesName(); if (sampleIds == null || sampleIds.contains(sampleId)) { byte[] column = VariantPhoenixHelper.buildSampleColumnKey(studyConfiguration.getStudyId(), sampleId); List<String> sampleData = studyEntry.getSamplesData().get(sampleIdx); for (FileEntry fileEntry : studyEntry.getFiles()) { int fileId = Integer.parseInt(fileEntry.getFileId()); if (writeAllFileAttributes || filesToWrite.contains(fileId)) {
if (variant != null && !variant.getStudies().isEmpty()) { StudyEntry studyEntry = variant.getStudies().get(0); Integer psIdx = studyEntry.getFormatPositions().get(VCFConstants.PHASE_SET_KEY); if (psIdx != null) { String ps = studyEntry.getSamplesData().get(0).get(psIdx); if (!ps.equals(DocumentToSamplesConverter.UNKNOWN_FIELD)) { sampleName = studyEntry.getOrderedSamplesName().get(0); Variant next = iterator.next(); if (!next.getStudies().isEmpty()) { if (!ps.equals(next.getStudies().get(0).getSampleData(sampleName, VCFConstants.PHASE_SET_KEY))) { iterator.remove();
protected StudyEntry newStudyEntry(StudyConfiguration studyConfiguration, List<String> fixedFormat) { StudyEntry studyEntry; if (studyNameAsStudyId) { studyEntry = new StudyEntry(studyConfiguration.getStudyName()); } else { studyEntry = new StudyEntry(String.valueOf(studyConfiguration.getStudyId())); } if (expectedFormat == null) { studyEntry.setFormat(new ArrayList<>(fixedFormat)); } else { studyEntry.setFormat(new ArrayList<>(expectedFormat)); } LinkedHashMap<String, Integer> returnedSamplesPosition; if (mutableSamplesPosition) { returnedSamplesPosition = new LinkedHashMap<>(getReturnedSamplesPosition(studyConfiguration)); } else { returnedSamplesPosition = getReturnedSamplesPosition(studyConfiguration); } studyEntry.setSamplesData(new ArrayList<>(returnedSamplesPosition.size())); studyEntry.setSortedSamplesPosition(returnedSamplesPosition); return studyEntry; }
protected void checkFillMissing(VariantHadoopDBAdaptor dbAdaptor, List<Integer> newFiles, String... processedSamples) { Set<Integer> newFilesSet = new HashSet<>(newFiles); Set<String> samplesSet = new HashSet<>(Arrays.asList(processedSamples)); StudyConfiguration studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(STUDY_ID, null).first(); boolean missingGenotypesUpdated = studyConfiguration.getAttributes().getBoolean(MISSING_GENOTYPES_UPDATED); for (Variant variant : dbAdaptor) { StudyEntry studyEntry = variant.getStudies().get(0); boolean newVariant = !missingGenotypesUpdated && studyEntry.getFiles().stream().map(FileEntry::getFileId) .map(studyConfiguration.getFileIds()::get).allMatch(newFilesSet::contains); List<List<String>> samplesData = studyEntry.getSamplesData(); for (int i = 0; i < samplesData.size(); i++) { List<String> data = samplesData.get(i); String sampleName = studyEntry.getOrderedSamplesName().get(i); if (!newVariant && samplesSet.contains(sampleName)) { assertFalse((newVariant ? "new variant " : "") + variant + " _ " + sampleName + " should not have GT=?/?", data.get(0).equals("?/?")); } else { assertFalse((newVariant ? "new variant " : "") + variant + " _ " + sampleName + " should not have GT=0/0", data.get(0).equals("0/0")); } } } }
private void fillStudyEntryFields(StudyEntry study, LinkedHashMap<String, Integer> samplesPositionToReturn, List<String> extraFields, List<List<String>> samplesData, boolean excludeGenotypes) { if (study != null) { //Set FORMAT if (extraFields.isEmpty()) { if (excludeGenotypes) { study.setFormat(Collections.emptyList()); } else { study.setFormat(Collections.singletonList("GT")); } } else { List<String> format = new ArrayList<>(1 + extraFields.size()); if (!excludeGenotypes) { format.add("GT"); } format.addAll(extraFields); study.setFormat(format); } //Set Samples Position study.setSamplesPosition(samplesPositionToReturn); //Set Samples Data study.setSamplesData(samplesData); } }