public static VariantAnnotationProto.ConsequenceType createConsequenceType(ConsequenceType type) { VariantAnnotationProto.ConsequenceType.Builder builder= VariantAnnotationProto.ConsequenceType.newBuilder() .setGeneName(type.getGeneName()) .setEnsemblGeneId(type.getEnsemblGeneId()) .setEnsemblTranscriptId(type.getEnsemblTranscriptId()) .setStrand(type.getStrand()) .setBiotype(type.getBiotype()) .setCDnaPosition(type.getCdnaPosition()) .setCdsPosition(type.getCdsPosition()) .setCodon(type.getCodon()) .setProteinVariantAnnotation(createProteinVariantAnnotation(type.getProteinVariantAnnotation())); List<SequenceOntologyTerm> sequenceOntologyTerms = type.getSequenceOntologyTerms(); if (sequenceOntologyTerms != null) { for (SequenceOntologyTerm so : sequenceOntologyTerms) { VariantAnnotationProto.SequenceOntologyTerm.Builder soBuilder = VariantAnnotationProto.SequenceOntologyTerm.newBuilder() .setAccession(so.getAccession()) .setName(so.getName()); builder.addSequenceOntologyTerms(soBuilder.build()); } } return builder.build(); }
protected void solveIntergenic(List<ConsequenceType> consequenceTypeList, boolean isIntergenic) { if (consequenceTypeList.size() == 0 && isIntergenic) { HashSet<String> intergenicName = new HashSet<>(); intergenicName.add(VariantAnnotationUtils.INTERGENIC_VARIANT); ConsequenceType consequenceType = new ConsequenceType(); consequenceType.setSequenceOntologyTerms(getSequenceOntologyTerms(intergenicName)); consequenceTypeList.add(consequenceType); } }
private ConsequenceType findCodingOverlappingConsequenceType(ConsequenceType consequenceType, List<ConsequenceType> consequenceTypeList) { for (ConsequenceType consequenceType1 : consequenceTypeList) { if (isCoding(consequenceType1) && consequenceType.getEnsemblTranscriptId().equals(consequenceType1.getEnsemblTranscriptId()) && consequenceType.getProteinVariantAnnotation().getPosition() .equals(consequenceType1.getProteinVariantAnnotation().getPosition())) { return consequenceType1; } } return null; }
private Set<SequenceOntologyTermComparisonObject> getSequenceOntologySet(List<ConsequenceType> consequenceTypeList) { if (consequenceTypeList != null) { Set<SequenceOntologyTermComparisonObject> set = new HashSet<>(consequenceTypeList.size()); for (ConsequenceType consequenceType : consequenceTypeList) { for (SequenceOntologyTerm sequenceOntologyTerm : consequenceType.getSequenceOntologyTerms()) { // // Expected many differences depending on the regulatory source databases used by the annotators. // // Better skip regulatory_region_variant annotations // if (!(sequenceOntologyTerm.getName().equals(VariantAnnotationUtils.REGULATORY_REGION_VARIANT) // || sequenceOntologyTerm.getName().equals(VariantAnnotationUtils.TF_BINDING_SITE_VARIANT))) { set.add(new SequenceOntologyTermComparisonObject(consequenceType.getEnsemblTranscriptId(), sequenceOntologyTerm)); // } } } return set; } else { return null; } }
isIntergenic = isIntergenic && (variantEnd < currentTranscript.getStart() || variantStart > currentTranscript.getEnd()); transcript = currentTranscript; consequenceType = new ConsequenceType(); consequenceType.setGeneName(gene.getName()); consequenceType.setEnsemblGeneId(gene.getId()); consequenceType.setEnsemblTranscriptId(transcript.getId()); consequenceType.setStrand(transcript.getStrand()); consequenceType.setBiotype(transcript.getBiotype()); consequenceType.setTranscriptAnnotationFlags(transcript.getAnnotationFlags() != null ? new ArrayList<>(transcript.getAnnotationFlags()) : null); if (SoNames.size() > 0) { // Variant does not overlap gene region, just may have upstream/downstream annotations consequenceType.setSequenceOntologyTerms(getSequenceOntologyTerms(SoNames)); consequenceTypeList.add(consequenceType); if (SoNames.size() > 0) { // Variant does not overlap gene region, just has upstream/downstream annotations consequenceType.setSequenceOntologyTerms(getSequenceOntologyTerms(SoNames)); consequenceTypeList.add(consequenceType);
&& !transcriptAnnotationUpdated(variant0, consequenceType1.getEnsemblTranscriptId())) { variant1 = (Variant) variantArray[1]; ConsequenceType consequenceType2 int cdnaPosition = consequenceType1.getCdnaPosition(); int cdsPosition = consequenceType1.getCdsPosition(); String codon = null; String referenceCodon = consequenceType1.getCodon().split("/")[0].toUpperCase(); soTerms = updatePhasedSoTerms(consequenceType1.getSequenceOntologyTerms(), String.valueOf(referenceCodon), String.valueOf(alternateCodon), variant1.getChromosome().equals("MT")); consequenceType3.setCdnaPosition(cdnaPosition); consequenceType3.setCdsPosition(cdsPosition); consequenceType3.setCodon(codon); consequenceType3.setProteinVariantAnnotation(newProteinVariantAnnotation); consequenceType3.setSequenceOntologyTerms(soTerms); flagTranscriptAnnotationUpdated(variant2, consequenceType1.getEnsemblTranscriptId()); int codonIdx1 = getUpperCaseLetterPosition(consequenceType1.getCodon().split("/")[0]); int codonIdx2 = getUpperCaseLetterPosition(consequenceType2.getCodon().split("/")[0]); char[] referenceCodonArray = consequenceType1.getCodon().split("/")[0].toLowerCase().toCharArray(); referenceCodonArray[codonIdx1] = Character.toUpperCase(referenceCodonArray[codonIdx1]); referenceCodonArray[codonIdx2] = Character.toUpperCase(referenceCodonArray[codonIdx2]);
if (consequenceType.getGeneName() != null) { stringBuilder.append(consequenceType.getGeneName()); if (consequenceType.getEnsemblGeneId() != null) { stringBuilder.append(consequenceType.getEnsemblGeneId()); if (consequenceType.getEnsemblTranscriptId() != null) { stringBuilder.append(consequenceType.getEnsemblTranscriptId()); if (consequenceType.getBiotype() != null) { stringBuilder.append(consequenceType.getBiotype()); stringBuilder.append(consequenceType.getSequenceOntologyTerms().stream() .map(SequenceOntologyTerm::getName) .collect(Collectors.joining(","))); if (consequenceType.getProteinVariantAnnotation() != null) { stringBuilder.append(consequenceType.getProteinVariantAnnotation().getPosition()); stringBuilder.append(FIELD_SEPARATOR); stringBuilder.append(consequenceType.getProteinVariantAnnotation().getReference()) .append("/") .append(consequenceType.getProteinVariantAnnotation().getAlternate()); stringBuilder.append(FIELD_SEPARATOR); if (consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<String> sift = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("sift")) .map(Score::getDescription) List<String> polyphen = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream()
private ReportedEvent newReportedEvent(int id, Phenotype phenotype, ConsequenceType ct, String panelId, ModeOfInheritance moi, Penetrance penetrance, Variant variant) { ReportedEvent reportedEvent = new ReportedEvent() .setId("OPENCB-" + id); if (phenotype != null) { reportedEvent.setPhenotypes(Collections.singletonList(phenotype)); } if (ct != null) { reportedEvent.setConsequenceTypeIds(Collections.singletonList(ct.getBiotype())) .setGenomicFeature(new GenomicFeature(ct.getEnsemblGeneId(), ct.getEnsemblTranscriptId(), ct.getGeneName(), null, null)); } if (panelId != null) { reportedEvent.setPanelId(panelId); } if (moi != null) { reportedEvent.setModeOfInheritance(moi); } if (penetrance != null) { reportedEvent.setPenetrance(penetrance); } List<String> acmg = VariantClassification.calculateAcmgClassification(variant, reportedEvent); VariantClassification variantClassification = new VariantClassification().setAcmg(acmg); reportedEvent.setClassification(variantClassification); return reportedEvent; } }
ConsequenceType consequenceType = new ConsequenceType(); if (fields.length > 2) { consequenceType.setEnsemblTranscriptId(fields[1]); consequenceType.setBiotype(fields[2]); consequenceType.setCdnaPosition(Integer.parseInt(fields[3])); consequenceType.setCdsPosition(Integer.parseInt(fields[4])); if (fields.length > 5) { consequenceType.setCodon(fields[5]); consequenceType.setProteinVariantAnnotation(protVarAnnotation); consequenceType.setGeneName(gene); consequenceType.setEnsemblGeneId(ensGene); sequenceOntologyTerm.setAccession("SO:" + String.format("%07d", soAcc)); sequenceOntologyTerm.setName(ConsequenceTypeMappings.accessionToTerm.get(soAcc)); if (consequenceTypeMap.get(fields[0]).getSequenceOntologyTerms() == null) { consequenceTypeMap.get(fields[0]).setSequenceOntologyTerms(new ArrayList<>()); consequenceTypeMap.get(fields[0]).getSequenceOntologyTerms().add(sequenceOntologyTerm); ConsequenceType consequenceType = new ConsequenceType(); consequenceType.setEnsemblGeneId(""); consequenceType.setGeneName(""); consequenceType.setEnsemblTranscriptId(""); consequenceType.setSequenceOntologyTerms(Collections.singletonList(sequenceOntologyTerm)); consequenceTypes.add(consequenceType);
System.out.println(inCT.getBiotype() + " vs " + outCT.getBiotype()); assert(inCT.getBiotype().equals(outCT.getBiotype())); int inCdnaPosition = inCT.getCdnaPosition() == null ? 0 : inCT.getCdnaPosition(); int inCdsPosition = inCT.getCdsPosition() == null ? 0 : inCT.getCdsPosition(); int outCdnaPosition = outCT.getCdnaPosition() == null ? 0 : outCT.getCdnaPosition(); int outCdsPosition = outCT.getCdsPosition() == null ? 0 : outCT.getCdsPosition(); String inCodon = inCT.getCodon() == null ? "" : inCT.getCodon().trim(); String outCodon = outCT.getCodon() == null ? "" : outCT.getCodon().trim(); System.out.println(inCdnaPosition + " vs " + outCdnaPosition + " ; " + inCdsPosition + " vs " + outCdsPosition if (inCT.getProteinVariantAnnotation() != null && outCT.getProteinVariantAnnotation() != null) { checkScore(inCT.getProteinVariantAnnotation().getSubstitutionScores(), outCT.getProteinVariantAnnotation().getSubstitutionScores(), "sift"); checkScore(inCT.getProteinVariantAnnotation().getSubstitutionScores(), outCT.getProteinVariantAnnotation().getSubstitutionScores(), "polyphen"); String inUniprotAccession = inCT.getProteinVariantAnnotation().getUniprotAccession() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotAccession(); String outUniprotAccession = outCT.getProteinVariantAnnotation().getUniprotAccession() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotAccession(); String inUniprotName = inCT.getProteinVariantAnnotation().getUniprotName() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotName(); String outUniprotName = outCT.getProteinVariantAnnotation().getUniprotName() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotName(); String inUniprotVariantId = inCT.getProteinVariantAnnotation().getUniprotVariantId() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotVariantId(); String outUniprotVariantId = outCT.getProteinVariantAnnotation().getUniprotVariantId() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotVariantId(); System.out.println(inUniprotAccession + " vs " + outUniprotAccession + " ; " + inUniprotName + " vs " + outUniprotName int inPosition = inCT.getProteinVariantAnnotation().getPosition() == null ? 0 : inCT.getProteinVariantAnnotation().getPosition(); int outPosition = outCT.getProteinVariantAnnotation().getPosition() == null ? 0 : outCT.getProteinVariantAnnotation().getPosition(); String inRef = inCT.getProteinVariantAnnotation().getReference() == null ? "" : inCT.getProteinVariantAnnotation().getReference();
protected boolean setInsertionAlleleAminoacidChange(String referenceCodon, char[] modifiedCodonArray, char[] formattedReferenceCodonArray, char[] formattedModifiedCodonArray, boolean useMitochondrialCode, boolean firstCodon) { // Set codon str, protein ref and protein alt ONLY for the first codon mofified by the insertion if (firstCodon) { firstCodon = false; // Only the exact codon where the deletion starts is set consequenceType.setCodon(String.valueOf(formattedReferenceCodonArray) + "/" + String.valueOf(formattedModifiedCodonArray)); // Assumes proteinVariantAnnotation attribute is already initialized consequenceType .getProteinVariantAnnotation() .setReference(VariantAnnotationUtils.getAminoacid(useMitochondrialCode, referenceCodon)); consequenceType .getProteinVariantAnnotation() .setAlternate(VariantAnnotationUtils.getAminoacid(useMitochondrialCode, String.valueOf(modifiedCodonArray))); } return firstCodon; }
for (SequenceOntologyTerm so: consequenceType.getSequenceOntologyTerms()) { if (PROTEIN_LENGTH_CHANGING.contains(so.getName()) && "protein_coding".equals(consequenceType.getBiotype())) { acmg.add("PM4"); if (consequenceType.getProteinVariantAnnotation() != null && ListUtils.isNotEmpty(consequenceType.getProteinVariantAnnotation().getSubstitutionScores()) && ListUtils.isNotEmpty(variant.getAnnotation().getFunctionalScore()) && ListUtils.isNotEmpty(variant.getAnnotation().getConservation())) { double scaledCadd = Double.MIN_VALUE; double gerp = Double.MIN_VALUE; for (Score score: consequenceType.getProteinVariantAnnotation().getSubstitutionScores()) { switch (score.getSource()) { case "sift":
private void updateVariantSetStats(VariantAnnotation annotation) { if (annotation != null) { for (ConsequenceType consequenceType : annotation.getConsequenceTypes()) { String biotype = consequenceType.getBiotype(); if (StringUtils.isNotEmpty(biotype)) { stats.addVariantBiotypeCounts(biotype, 1); } if (consequenceType.getSequenceOntologyTerms() != null) { for (SequenceOntologyTerm term : consequenceType.getSequenceOntologyTerms()) { stats.addConsequenceTypeCounts(term.getAccession(), 1); } } } } }
@Override public List<VariantAnnotation> annotate(List<Variant> variants) throws VariantAnnotatorException { if (fail) { throw new VariantAnnotatorException("Fail because reasons"); } return variants.stream().map(v -> { VariantAnnotation a = new VariantAnnotation(); a.setChromosome(v.getChromosome()); a.setStart(v.getStart()); a.setEnd(v.getEnd()); a.setReference(v.getReference()); a.setAlternate(v.getAlternate()); a.setId("an id -- " + key); ConsequenceType ct = new ConsequenceType(); ct.setGeneName("a gene"); ct.setSequenceOntologyTerms(Collections.emptyList()); ct.setExonOverlap(Collections.emptyList()); ct.setTranscriptAnnotationFlags(Collections.emptyList()); a.setConsequenceTypes(Collections.singletonList(ct)); a.setAdditionalAttributes( Collections.singletonMap(GROUP_NAME.key(), new AdditionalAttribute(Collections.singletonMap(VARIANT_ID.key(), v.toString())))); return a; }).collect(Collectors.toList()); }
private boolean isCoding(ConsequenceType consequenceType) { for (SequenceOntologyTerm sequenceOntologyTerm : consequenceType.getSequenceOntologyTerms()) { if (VariantAnnotationUtils.CODING_SO_NAMES.contains(sequenceOntologyTerm.getName())) { return true; } } return false; }
@Override protected Iterable<T> featureValueOf(VariantAnnotation actual) { if (actual.getConsequenceTypes() != null) { Set<T> set = new HashSet<>(); for (ConsequenceType ct : actual.getConsequenceTypes()) { if (ct != null && ct.getProteinVariantAnnotation() != null && ct.getProteinVariantAnnotation().getSubstitutionScores() != null) { for (Score score : ct.getProteinVariantAnnotation().getSubstitutionScores()) { if (score != null && source.equals(score.getSource())) { set.add(mapper.apply(score)); } } } } return set; } return Collections.emptyList(); } };
HashSet<String> genesInVariant = new HashSet<>(); for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) { String geneName = consequenceType.getGeneName(); if (geneName != null) { genesInVariant.add(geneName); geneName = consequenceType.getEnsemblGeneId(); if (geneName != null) { genesInVariant.add(geneName);
private void addProteinSubstitutionScore(ConsequenceType consequenceType, Score score) { ProteinVariantAnnotation proteinVariantAnnotation = consequenceType.getProteinVariantAnnotation(); List<Score> proteinSubstitionScores; if(proteinVariantAnnotation==null) { proteinVariantAnnotation = new ProteinVariantAnnotation(); consequenceType.setProteinVariantAnnotation(proteinVariantAnnotation); proteinSubstitionScores = new ArrayList<>(); proteinVariantAnnotation.setSubstitutionScores(proteinSubstitionScores); } else { proteinSubstitionScores = proteinVariantAnnotation.getSubstitutionScores(); } proteinSubstitionScores.add(score); }
private Map<String, ConsequenceType> getConsequenceTypeMap (Variant variant){ Map<String, ConsequenceType> map = new HashMap<>(); if (variant.getAnnotation() != null && ListUtils.isNotEmpty(variant.getAnnotation().getConsequenceTypes())) { for (ConsequenceType consequenceType: variant.getAnnotation().getConsequenceTypes()) { if (StringUtils.isNotEmpty(consequenceType.getEnsemblTranscriptId())) { map.put(consequenceType.getEnsemblTranscriptId(), consequenceType); } } } return map; }
private ConsequenceType buildConsequenceType(String geneName, String ensemblGeneId, String ensemblTranscriptId, String strand, String biotype, List<Document> exonOverlap, List<String> transcriptAnnotationFlags, Integer cDnaPosition, Integer cdsPosition, String codon, List<String> soNameList, ProteinVariantAnnotation proteinVariantAnnotation) { List<SequenceOntologyTerm> soTerms = new ArrayList<>(soNameList.size()); for (String soName : soNameList) { soTerms.add(new SequenceOntologyTerm(ConsequenceTypeMappings.getSoAccessionString(soName), soName)); } List<ExonOverlap> exonOverlapList = new ArrayList<>(exonOverlap.size()); for (Document document : exonOverlap) { ExonOverlap e = new ExonOverlap( document.getString(CT_EXON_OVERLAP_NUMBER_FIELD), getDefault(document, CT_EXON_OVERLAP_PERCENTAGE_FIELD, 0F)); exonOverlapList.add(e); } return new ConsequenceType(geneName, ensemblGeneId, ensemblTranscriptId, strand, biotype, exonOverlapList, transcriptAnnotationFlags, cDnaPosition, cdsPosition, codon, proteinVariantAnnotation, soTerms); }