VariantAnnotationProto.ProteinVariantAnnotation.Builder builder = VariantAnnotationProto.ProteinVariantAnnotation.newBuilder() .setUniprotAccession(proteinAnnotation.getUniprotAccession()) .setUniprotName(proteinAnnotation.getUniprotName()) .setPosition(proteinAnnotation.getPosition()) .setReference(proteinAnnotation.getReference()) .setAlternate(proteinAnnotation.getAlternate()) .setUniprotVariantId(proteinAnnotation.getUniprotVariantId()) .setFunctionalDescription(proteinAnnotation.getFunctionalDescription()); List<Score> substitutionScores = proteinAnnotation.getSubstitutionScores(); if (substitutionScores != null) { for (Score score: substitutionScores) { List<String> keywords = proteinAnnotation.getKeywords(); if (keywords != null && keywords.size() > 0) { builder.addAllKeywords(keywords); List<ProteinFeature> features = proteinAnnotation.getFeatures(); if (features != null) { for (ProteinFeature feature : features) {
private void parseRemainingFields(ConsequenceType consequenceType, String[] lineFields) { consequenceType.setEnsemblGeneId(lineFields[3]); // fill Ensembl gene id consequenceType.setEnsemblTranscriptId(lineFields[4]); // fill Ensembl transcript id ProteinVariantAnnotation proteinVariantAnnotation = new ProteinVariantAnnotation(); if(!lineFields[7].equals("-")) { consequenceType.setCdnaPosition(parseStringInterval(lineFields[7])); // fill cdna position } if(!lineFields[8].equals("-")) { consequenceType.setCdsPosition(parseStringInterval(lineFields[8])); // fill cds position } if(!lineFields[9].equals("-")) { proteinVariantAnnotation.setPosition(parseStringInterval(lineFields[9])); // fill aa position } if(!lineFields[10].equals("-")) { String parts[] = lineFields[10].split("/"); if(parts.length == 2) { proteinVariantAnnotation.setReference(parts[0]); // fill aa change proteinVariantAnnotation.setAlternate(parts[1]); // fill aa change } } consequenceType.setProteinVariantAnnotation(proteinVariantAnnotation); consequenceType.setCodon(lineFields[11]); // fill codon change if(!lineFields[6].equals("") && !lineFields.equals("-")) { // VEP may leave this field empty consequenceType.setSequenceOntologyTerms( getSequenceOntologyTerms(Arrays.asList(lineFields[6].split(",")))); // fill so terms // consequenceType.setSoTermsFromSoNames(Arrays.asList(lineFields[6].split(","))); // fill so terms } }
private void addProteinSubstitutionScore(ConsequenceType consequenceType, Score score) { ProteinVariantAnnotation proteinVariantAnnotation = consequenceType.getProteinVariantAnnotation(); List<Score> proteinSubstitionScores; if(proteinVariantAnnotation==null) { proteinVariantAnnotation = new ProteinVariantAnnotation(); consequenceType.setProteinVariantAnnotation(proteinVariantAnnotation); proteinSubstitionScores = new ArrayList<>(); proteinVariantAnnotation.setSubstitutionScores(proteinSubstitionScores); } else { proteinSubstitionScores = proteinVariantAnnotation.getSubstitutionScores(); } proteinSubstitionScores.add(score); }
protected int setCdsAndProteinPosition(int cdnaVariantPosition, int firstCdsPhase, int cdnaCodingStart) { if (cdnaVariantPosition != -1) { // cdnaVariantStart may be null if variantEnd falls in an intron if (transcript.unconfirmedStart()) { cdnaCodingStart -= ((3 - firstCdsPhase) % 3); } int cdsVariantStart = cdnaVariantPosition - cdnaCodingStart + 1; consequenceType.setCdsPosition(cdsVariantStart); // First place where protein variant annotation is added to the Consequence type, // must create the ProteinVariantAnnotation object ProteinVariantAnnotation proteinVariantAnnotation = new ProteinVariantAnnotation(); proteinVariantAnnotation.setPosition(((cdsVariantStart - 1) / 3) + 1); consequenceType.setProteinVariantAnnotation(proteinVariantAnnotation); } return cdnaCodingStart; }
private ProteinVariantAnnotation getProteinAnnotation(ConsequenceType consequenceType) { if (consequenceType.getProteinVariantAnnotation() != null) { QueryResult<ProteinVariantAnnotation> proteinVariantAnnotation = proteinDBAdaptor.getVariantAnnotation( consequenceType.getEnsemblTranscriptId(), consequenceType.getProteinVariantAnnotation().getPosition(), consequenceType.getProteinVariantAnnotation().getReference(), consequenceType.getProteinVariantAnnotation().getAlternate(), new QueryOptions()); if (proteinVariantAnnotation.getNumResults() > 0) { return proteinVariantAnnotation.getResult().get(0); } } return null; }
checkScore(inCT.getProteinVariantAnnotation().getSubstitutionScores(), outCT.getProteinVariantAnnotation().getSubstitutionScores(), "sift"); checkScore(inCT.getProteinVariantAnnotation().getSubstitutionScores(), outCT.getProteinVariantAnnotation().getSubstitutionScores(), "polyphen"); String inUniprotAccession = inCT.getProteinVariantAnnotation().getUniprotAccession() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotAccession(); String outUniprotAccession = outCT.getProteinVariantAnnotation().getUniprotAccession() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotAccession(); String inUniprotName = inCT.getProteinVariantAnnotation().getUniprotName() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotName(); String outUniprotName = outCT.getProteinVariantAnnotation().getUniprotName() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotName(); String inUniprotVariantId = inCT.getProteinVariantAnnotation().getUniprotVariantId() == null ? "" : inCT.getProteinVariantAnnotation().getUniprotVariantId(); String outUniprotVariantId = outCT.getProteinVariantAnnotation().getUniprotVariantId() == null ? "" : outCT.getProteinVariantAnnotation().getUniprotVariantId(); System.out.println(inUniprotAccession + " vs " + outUniprotAccession + " ; " + inUniprotName + " vs " + outUniprotName int inPosition = inCT.getProteinVariantAnnotation().getPosition() == null ? 0 : inCT.getProteinVariantAnnotation().getPosition(); int outPosition = outCT.getProteinVariantAnnotation().getPosition() == null ? 0 : outCT.getProteinVariantAnnotation().getPosition(); String inRef = inCT.getProteinVariantAnnotation().getReference() == null ? "" : inCT.getProteinVariantAnnotation().getReference(); String outRef = outCT.getProteinVariantAnnotation().getReference() == null ? "" : outCT.getProteinVariantAnnotation().getReference(); String inAlt = inCT.getProteinVariantAnnotation().getAlternate() == null ? "" : inCT.getProteinVariantAnnotation().getAlternate(); String outAlt = outCT.getProteinVariantAnnotation().getAlternate() == null ? "" : outCT.getProteinVariantAnnotation().getAlternate(); System.out.println(inPosition + " vs " + outPosition + " ; " + inRef + " vs " + outRef
stringBuilder.append(consequenceType.getProteinVariantAnnotation().getPosition()); stringBuilder.append(FIELD_SEPARATOR); stringBuilder.append(consequenceType.getProteinVariantAnnotation().getReference()) .append("/") .append(consequenceType.getProteinVariantAnnotation().getAlternate()); stringBuilder.append(FIELD_SEPARATOR); if (consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<String> sift = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("sift")) .map(Score::getDescription) List<String> polyphen = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("polyphen")) .map(Score::getDescription)
if (StringUtils.isNotEmpty(proteinVariantAnnotation.getUniprotAccession())) { gnSo.add(buildGeneSO(proteinVariantAnnotation.getUniprotAccession(), so)); if (StringUtils.isNotEmpty(proteinVariantAnnotation.getUniprotName())) { gnSo.add(buildGeneSO(proteinVariantAnnotation.getUniprotName(), so)); if (proteinVariantAnnotation.getSubstitutionScores() != null) { for (Score score : proteinVariantAnnotation.getSubstitutionScores()) { if (score.getSource().equalsIgnoreCase("sift")) { addNotNull(sift, score.getScore()); if (proteinVariantAnnotation.getKeywords() != null) { proteinKeywords.addAll(proteinVariantAnnotation.getKeywords()); addNotNull(xrefs, proteinVariantAnnotation.getUniprotName()); addNotNull(xrefs, proteinVariantAnnotation.getUniprotAccession()); addNotNull(xrefs, proteinVariantAnnotation.getUniprotVariantId());
ProteinVariantAnnotation protVarAnnotation = new ProteinVariantAnnotation(); scores.add(score); protVarAnnotation.setSubstitutionScores(scores); protVarAnnotation.setUniprotAccession(fields[10]); protVarAnnotation.setUniprotName(fields[11]); protVarAnnotation.setUniprotVariantId(fields[12]); if (StringUtils.isNotEmpty(fields[13])) { try { protVarAnnotation.setPosition(Integer.parseInt(fields[13])); } catch (NumberFormatException e) { logger.warn("Parsing position: " + e.getMessage()); protVarAnnotation.setReference(refAlt[0]); protVarAnnotation.setAlternate(refAlt[1]);
private ProteinVariantAnnotation processProteinVariantData(ProteinVariantAnnotation proteinVariantAnnotation, String shortAlternativeAa, Document proteinVariantData) { proteinVariantAnnotation.setUniprotAccession(proteinVariantData.get("_id", ArrayList.class).get(0).toString()); proteinVariantAnnotation.setKeywords(new ArrayList<>()); ArrayList keywordList = (ArrayList) proteinVariantData.get("keyword", ArrayList.class).get(0); for (Object keywordObject : keywordList) { proteinVariantAnnotation.getKeywords().add((String) ((Document) keywordObject).get("value")); proteinVariantAnnotation.setFeatures(new ArrayList<>()); ArrayList featureList = proteinVariantData.get("feature", ArrayList.class); for (Object featureObject : featureList) { proteinVariantAnnotation.setUniprotVariantId((String) featureDocument.get("id")); proteinVariantAnnotation.setFunctionalDescription((String) featureDocument.get("description")); proteinVariantAnnotation.getFeatures().add(proteinFeature);
case "proteinPosition": if (consequenceType.getProteinVariantAnnotation() != null) { stringBuilder.append(consequenceType.getProteinVariantAnnotation().getPosition()); && consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<Double> sift = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("sift")) .map(Score::getScore) case "polyphen": if (consequenceType.getProteinVariantAnnotation() != null && consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<Double> polyphen = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("polyphen")) .map(Score::getScore)
@Override protected Iterable<T> featureValueOf(VariantAnnotation actual) { if (actual.getConsequenceTypes() != null) { Set<T> set = new HashSet<>(); for (ConsequenceType ct : actual.getConsequenceTypes()) { if (ct != null && ct.getProteinVariantAnnotation() != null && ct.getProteinVariantAnnotation().getSubstitutionScores() != null) { for (Score score : ct.getProteinVariantAnnotation().getSubstitutionScores()) { if (score != null && source.equals(score.getSource())) { set.add(mapper.apply(score)); } } } } return set; } return Collections.emptyList(); } };
private ProteinVariantAnnotation buildProteinVariantAnnotation(String uniprotAccession, String uniprotName, int aaPosition, String aaReference, String aaAlternate, String uniprotVariantId, String functionalDescription, List<Score> proteinSubstitutionScores, List<String> keywords, List<ProteinFeature> features) { if (areAllEmpty(uniprotAccession, uniprotName, aaPosition, aaReference, aaAlternate, uniprotVariantId, proteinSubstitutionScores, keywords, features, functionalDescription)) { return null; } else { return new ProteinVariantAnnotation(uniprotAccession, uniprotName, aaPosition, aaReference, aaAlternate, uniprotVariantId, functionalDescription, proteinSubstitutionScores, keywords, features); } }
private ConsequenceType findCodingOverlappingConsequenceType(ConsequenceType consequenceType, List<ConsequenceType> consequenceTypeList) { for (ConsequenceType consequenceType1 : consequenceTypeList) { if (isCoding(consequenceType1) && consequenceType.getEnsemblTranscriptId().equals(consequenceType1.getEnsemblTranscriptId()) && consequenceType.getProteinVariantAnnotation().getPosition() .equals(consequenceType1.getProteinVariantAnnotation().getPosition())) { return consequenceType1; } } return null; }
@Test public void testConvertToDataModelType() throws Exception { DocumentToVariantAnnotationConverter documentToVariantAnnotationConverter = new DocumentToVariantAnnotationConverter(); VariantAnnotation convertedVariantAnnotation = documentToVariantAnnotationConverter.convertToDataModelType(dbObject); assertEquals(convertedVariantAnnotation.getConsequenceTypes().get(2).getProteinVariantAnnotation().getReference(), "CYS"); assertEquals(convertedVariantAnnotation.getVariantTraitAssociation().getCosmic().get(0).getPrimarySite(), "large_intestine"); }
if (variant.getAnnotation().getConsequenceTypes() != null) { for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) { if (consequenceType.getProteinVariantAnnotation() != null && consequenceType.getProteinVariantAnnotation().getKeywords() != null) { keywordsInVariant.addAll(consequenceType.getProteinVariantAnnotation().getKeywords());
private ProteinVariantAnnotation getProteinAnnotation(ConsequenceType consequenceType) { if (consequenceType.getProteinVariantAnnotation() != null) { QueryResult<ProteinVariantAnnotation> proteinVariantAnnotation = proteinDBAdaptor.getVariantAnnotation( consequenceType.getEnsemblTranscriptId(), consequenceType.getProteinVariantAnnotation().getPosition(), consequenceType.getProteinVariantAnnotation().getReference(), consequenceType.getProteinVariantAnnotation().getAlternate(), new QueryOptions()); if (proteinVariantAnnotation.getNumResults() > 0) { return proteinVariantAnnotation.getResult().get(0); } } return null; }
protected int setCdsAndProteinPosition(int cdnaVariantPosition, int firstCdsPhase, int cdnaCodingStart) { if (cdnaVariantPosition != -1) { // cdnaVariantStart may be null if variantEnd falls in an intron if (transcript.unconfirmedStart()) { cdnaCodingStart -= ((3 - firstCdsPhase) % 3); } int cdsVariantStart = cdnaVariantPosition - cdnaCodingStart + 1; consequenceType.setCdsPosition(cdsVariantStart); // First place where protein variant annotation is added to the Consequence type, // must create the ProteinVariantAnnotation object ProteinVariantAnnotation proteinVariantAnnotation = new ProteinVariantAnnotation(); proteinVariantAnnotation.setPosition(((cdsVariantStart - 1) / 3) + 1); consequenceType.setProteinVariantAnnotation(proteinVariantAnnotation); } return cdnaCodingStart; }
private ProteinVariantAnnotation processProteinVariantData(ProteinVariantAnnotation proteinVariantAnnotation, String shortAlternativeAa, Document proteinVariantData) { proteinVariantAnnotation.setUniprotAccession(proteinVariantData.get("_id", ArrayList.class).get(0).toString()); proteinVariantAnnotation.setKeywords(new ArrayList<>()); ArrayList keywordList = (ArrayList) proteinVariantData.get("keyword", ArrayList.class).get(0); for (Object keywordObject : keywordList) { proteinVariantAnnotation.getKeywords().add((String) ((Document) keywordObject).get("value")); proteinVariantAnnotation.setFeatures(new ArrayList<>()); ArrayList featureList = proteinVariantData.get("feature", ArrayList.class); for (Object featureObject : featureList) { proteinVariantAnnotation.setUniprotVariantId((String) featureDocument.get("id")); proteinVariantAnnotation.setFunctionalDescription((String) featureDocument.get("description")); proteinVariantAnnotation.getFeatures().add(proteinFeature);
case "proteinPosition": if (consequenceType.getProteinVariantAnnotation() != null) { stringBuilder.append(consequenceType.getProteinVariantAnnotation().getPosition()); && consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<Double> sift = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("sift")) .map(Score::getScore) case "polyphen": if (consequenceType.getProteinVariantAnnotation() != null && consequenceType.getProteinVariantAnnotation().getSubstitutionScores() != null) { List<Double> polyphen = consequenceType.getProteinVariantAnnotation().getSubstitutionScores().stream() .filter(t -> t.getSource().equalsIgnoreCase("polyphen")) .map(Score::getScore)