/** * Remove a CDS or coding sequence from the transcript sequence * @param accession * @return */ public CDSSequence removeCDS(String accession) { for (CDSSequence cdsSequence : cdsSequenceList) { if (cdsSequence.getAccession().getID().equals(accession)) { cdsSequenceList.remove(cdsSequence); cdsSequenceHashMap.remove(accession); return cdsSequence; } } return null; }
/** * A CDS sequence if negative stranded needs to be reverse complement * to represent the actual coding sequence. When getting a ProteinSequence * from a TranscriptSequence this method is callled for each CDSSequence * {@link http://www.sequenceontology.org/gff3.shtml} * {@link http://biowiki.org/~yam/bioe131/GFF.ppt} * @return coding sequence */ public String getCodingSequence() { String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand()); if (getStrand() == Strand.NEGATIVE) { //need to take complement of sequence because it is negative and we are returning a coding sequence StringBuilder b = new StringBuilder(getLength()); CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); for (int i = 0; i < sequence.length(); i++) { String nucleotide = String.valueOf(sequence.charAt(i)); NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); b.append(nucleotideCompound.getComplement().getShortName()); } sequence = b.toString(); } // sequence = sequence.substring(phase); return sequence; } }
/** * Used to sort two CDSSequences where Negative Strand makes it tough * @param o1 * @param o2 * @return val */ @Override public int compare(CDSSequence o1, CDSSequence o2) { if(o1.getStrand() != o2.getStrand()){ return o1.getBioBegin() - o2.getBioBegin(); } if(o1.getStrand() == Strand.NEGATIVE){ return -1 * (o1.getBioBegin() - o2.getBioBegin()); } return o1.getBioBegin() - o2.getBioBegin(); }
@Override public int getLength() { return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1; }
Collections.sort(cdsSequenceList, new SequenceComparator()); for (CDSSequence cdsSequence : cdsSequenceList) { gff3line = key + "\t" + cdsSequence.getSource() + "\t" + "CDS" + "\t" + cdsSequence.getBioBegin() + "\t" + cdsSequence.getBioEnd() + "\t"; score = cdsSequence.getSequenceScore(); if (score == null) { gff3line = gff3line + ".\t"; gff3line = gff3line + score + "\t"; gff3line = gff3line + cdsSequence.getStrand().getStringRepresentation() + "\t"; gff3line = gff3line + cdsSequence.getPhase() + "\t"; gff3line = gff3line + "ID=" + cdsSequence.getAccession().getID() + ";Parent=" + transcriptParentName; gff3line = gff3line + getGFF3Note(cdsSequence.getNotesList());
for (int i = 0; i < cdsSequenceList.size(); i++) { CDSSequence cdsSequence = cdsSequenceList.get(i); String codingSequence = cdsSequence.getCodingSequence(); CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1); if (nextCDSSequence.phase == 1) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 1); } else if (nextCDSSequence.phase == 2) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 2); CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1); if (nextCDSSequence.phase == 1) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 1); } else if (nextCDSSequence.phase == 2) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 2); proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID())); proteinSequence.setParentDNASequence(cdsSequence, 1, cdsSequence.getLength()); proteinSequenceList.add(proteinSequence);
/** * Add a Coding Sequence region with phase to the transcript sequence * @param accession * @param begin * @param end * @param phase 0,1,2 * @return */ public CDSSequence addCDS(AccessionID accession, int begin, int end, int phase) throws Exception { if (cdsSequenceHashMap.containsKey(accession.getID())) { throw new Exception("Duplicate accesion id " + accession.getID()); } CDSSequence cdsSequence = new CDSSequence(this, begin, end, phase); //sense should be the same as parent cdsSequence.setAccession(accession); cdsSequenceList.add(cdsSequence); Collections.sort(cdsSequenceList, new CDSComparator()); cdsSequenceHashMap.put(accession.getID(), cdsSequence); return cdsSequence; }
/** * Get the stitched together CDS sequences then maps to the cDNA * @return */ public DNASequence getDNACodingSequence() { StringBuilder sb = new StringBuilder(); for (CDSSequence cdsSequence : cdsSequenceList) { sb.append(cdsSequence.getCodingSequence()); } DNASequence dnaSequence = null; try { dnaSequence = new DNASequence(sb.toString().toUpperCase()); } catch (CompoundNotFoundException e) { // if I understand this should not happen, please correct if I'm wrong - JD 2014-10-24 logger.error("Could not create DNA coding sequence, {}. This is most likely a bug.", e.getMessage()); } dnaSequence.setAccession(new AccessionID(this.getAccession().getID())); return dnaSequence; }
@Override public int getLength() { return Math.abs(this.getBioEnd() - this.getBioBegin()) + 1; }
/** * Used to sort two CDSSequences where Negative Strand makes it tough * @param o1 * @param o2 * @return val */ @Override public int compare(CDSSequence o1, CDSSequence o2) { if(o1.getStrand() != o2.getStrand()){ return o1.getBioBegin() - o2.getBioBegin(); } if(o1.getStrand() == Strand.NEGATIVE){ return -1 * (o1.getBioBegin() - o2.getBioBegin()); } return o1.getBioBegin() - o2.getBioBegin(); }
for (int i = 0; i < cdsSequenceList.size(); i++) { CDSSequence cdsSequence = cdsSequenceList.get(i); String codingSequence = cdsSequence.getCodingSequence(); CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1); if (nextCDSSequence.phase == 1) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 1); } else if (nextCDSSequence.phase == 2) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 2); CDSSequence nextCDSSequence = cdsSequenceList.get(i + 1); if (nextCDSSequence.phase == 1) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 1); } else if (nextCDSSequence.phase == 2) { String nextCodingSequence = nextCDSSequence.getCodingSequence(); codingSequence = codingSequence + nextCodingSequence.substring(0, 2); proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID())); proteinSequence.setParentDNASequence(cdsSequence, 1, cdsSequence.getLength()); proteinSequenceList.add(proteinSequence);
/** * Add a Coding Sequence region with phase to the transcript sequence * @param accession * @param begin * @param end * @param phase 0,1,2 * @return */ public CDSSequence addCDS(AccessionID accession, int begin, int end, int phase) throws Exception { if (cdsSequenceHashMap.containsKey(accession.getID())) { throw new Exception("Duplicate accesion id " + accession.getID()); } CDSSequence cdsSequence = new CDSSequence(this, begin, end, phase); //sense should be the same as parent cdsSequence.setAccession(accession); cdsSequenceList.add(cdsSequence); Collections.sort(cdsSequenceList, new CDSComparator()); cdsSequenceHashMap.put(accession.getID(), cdsSequence); return cdsSequence; }
/** * Get the stitched together CDS sequences then maps to the cDNA * @return */ public DNASequence getDNACodingSequence() { StringBuilder sb = new StringBuilder(); for (CDSSequence cdsSequence : cdsSequenceList) { sb.append(cdsSequence.getCodingSequence()); } DNASequence dnaSequence = null; try { dnaSequence = new DNASequence(sb.toString().toUpperCase()); } catch (CompoundNotFoundException e) { // if I understand this should not happen, please correct if I'm wrong - JD 2014-10-24 logger.error("Could not create DNA coding sequence, {}. This is most likely a bug.", e.getMessage()); } dnaSequence.setAccession(new AccessionID(this.getAccession().getID())); return dnaSequence; }
/** * A CDS sequence if negative stranded needs to be reverse complement * to represent the actual coding sequence. When getting a ProteinSequence * from a TranscriptSequence this method is callled for each CDSSequence * {@link http://www.sequenceontology.org/gff3.shtml} * {@link http://biowiki.org/~yam/bioe131/GFF.ppt} * @return coding sequence */ public String getCodingSequence() { String sequence = this.getSequenceAsString(getBioBegin(), getBioEnd(), getStrand()); if (getStrand() == Strand.NEGATIVE) { //need to take complement of sequence because it is negative and we are returning a coding sequence StringBuilder b = new StringBuilder(getLength()); CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); for (int i = 0; i < sequence.length(); i++) { String nucleotide = String.valueOf(sequence.charAt(i)); NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); b.append(nucleotideCompound.getComplement().getShortName()); } sequence = b.toString(); } // sequence = sequence.substring(phase); return sequence; } }
hitLabel = uniprotBestHit + "_" + endIndex + "_" + startIndex; int dnaBeginIndex = cdsSequence.getBioBegin() + (3 * offsetStartIndex); int dnaEndIndex = cdsSequence.getBioEnd() - (3 * offsetEndIndex); String scaffold = geneSequence.getParentChromosomeSequence().getAccession().getID();
/** * Remove a CDS or coding sequence from the transcript sequence * @param accession * @return */ public CDSSequence removeCDS(String accession) { for (CDSSequence cdsSequence : cdsSequenceList) { if (cdsSequence.getAccession().getID().equals(accession)) { cdsSequenceList.remove(cdsSequence); cdsSequenceHashMap.remove(accession); return cdsSequence; } } return null; }