static public LinkedHashMap<String, GeneSequence> getGeneSequences(Collection<ChromosomeSequence> chromosomeSequences) throws Exception { LinkedHashMap<String, GeneSequence> geneSequenceHashMap = new LinkedHashMap<String, GeneSequence>(); for (ChromosomeSequence chromosomeSequence : chromosomeSequences) { for (GeneSequence geneSequence : chromosomeSequence.getGeneSequences().values()) { geneSequenceHashMap.put(geneSequence.getAccession().getID(), geneSequence); } } return geneSequenceHashMap; }
static public LinkedHashMap<String, ProteinSequence> getProteinSequences(Collection<ChromosomeSequence> chromosomeSequences) throws Exception { LinkedHashMap<String, ProteinSequence> proteinSequenceHashMap = new LinkedHashMap<String, ProteinSequence>(); for (ChromosomeSequence dnaSequence : chromosomeSequences) { for (GeneSequence geneSequence : dnaSequence.getGeneSequences().values()) { for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) { //TODO remove? // DNASequence dnaCodingSequence = transcriptSequence.getDNACodingSequence(); // logger.info("CDS={}", dnaCodingSequence.getSequenceAsString()); try { ProteinSequence proteinSequence = transcriptSequence.getProteinSequence(); // logger.info("{} {}", proteinSequence.getAccession().getID(), proteinSequence); if (proteinSequenceHashMap.containsKey(proteinSequence.getAccession().getID())) { throw new Exception("Duplicate protein sequence id=" + proteinSequence.getAccession().getID() + " found at Gene id=" + geneSequence.getAccession().getID()); } else { proteinSequenceHashMap.put(proteinSequence.getAccession().getID(), proteinSequence); } } catch (Exception e) { logger.error("Exception: ", e); } } } } return proteinSequenceHashMap; }
ExonSequence exon1 = exonSequenceList.get(i); ExonSequence exon2 = exonSequenceList.get(i + 1); this.addIntron(new AccessionID(this.getAccession().getID() + "-" + "intron" + intronIndex), exon1.getBioEnd() - shift, exon2.getBioBegin() + shift); intronIndex++;
logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd()); } else { for (int i = featureBioBegin; i <= featureBioEnd; i++) {
ExonSequence exon1 = exonSequenceList.get(i); ExonSequence exon2 = exonSequenceList.get(i + 1); this.addIntron(new AccessionID(this.getAccession().getID() + "-" + "intron" + intronIndex), exon1.getBioEnd() - shift, exon2.getBioBegin() + shift); intronIndex++;
logger.warn("Bad Feature, Accession: {}, Sequence Strand: {}, Gene Begin: {}, Gene End: {}, Exon Begin: {}, Exon End: {}", sequence.getAccession().toString(), sequence.getStrand(), geneBioBegin, geneBioEnd, exonSequence.getBioBegin(), exonSequence.getBioEnd()); } else { for (int i = featureBioBegin; i <= featureBioEnd; i++) {
/** * Try to give method clarity where you want a DNASequence coding in the 5' to 3' direction * Returns the DNASequence representative of the 5' and 3' reading based on strand * @return dna sequence */ public DNASequence getSequence5PrimeTo3Prime() { String sequence = getSequenceAsString(this.getBioBegin(), this.getBioEnd(), this.getStrand()); if (getStrand() == Strand.NEGATIVE) { //need to take complement of sequence because it is negative and we are returning the gene sequence from the opposite strand StringBuilder b = new StringBuilder(getLength()); CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); for (int i = 0; i < sequence.length(); i++) { String nucleotide = String.valueOf(sequence.charAt(i)); NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); b.append(nucleotideCompound.getComplement().getShortName()); } sequence = b.toString(); } DNASequence dnaSequence = null; try { dnaSequence = new DNASequence(sequence.toUpperCase()); } catch (CompoundNotFoundException e) { // this should not happen, the sequence is DNA originally, if it does, there's a bug somewhere logger.error("Could not create new DNA sequence in getSequence5PrimeTo3Prime(). Error: {}",e.getMessage()); } dnaSequence.setAccession(new AccessionID(this.getAccession().getID())); return dnaSequence; } }
/** * Try to give method clarity where you want a DNASequence coding in the 5' to 3' direction * Returns the DNASequence representative of the 5' and 3' reading based on strand * @return dna sequence */ public DNASequence getSequence5PrimeTo3Prime() { String sequence = getSequenceAsString(this.getBioBegin(), this.getBioEnd(), this.getStrand()); if (getStrand() == Strand.NEGATIVE) { //need to take complement of sequence because it is negative and we are returning the gene sequence from the opposite strand StringBuilder b = new StringBuilder(getLength()); CompoundSet<NucleotideCompound> compoundSet = this.getCompoundSet(); for (int i = 0; i < sequence.length(); i++) { String nucleotide = String.valueOf(sequence.charAt(i)); NucleotideCompound nucleotideCompound = compoundSet.getCompoundForString(nucleotide); b.append(nucleotideCompound.getComplement().getShortName()); } sequence = b.toString(); } DNASequence dnaSequence = null; try { dnaSequence = new DNASequence(sequence.toUpperCase()); } catch (CompoundNotFoundException e) { // this should not happen, the sequence is DNA originally, if it does, there's a bug somewhere logger.error("Could not create new DNA sequence in getSequence5PrimeTo3Prime(). Error: {}",e.getMessage()); } dnaSequence.setAccession(new AccessionID(this.getAccession().getID())); return dnaSequence; } }
gff3line = gff3line + "ID=" + geneSequence.getAccession().getID() + ";Name=" + geneSequence.getAccession().getID(); gff3line = gff3line + getGFF3Note(geneSequence.getNotesList()); gff3line = gff3line + "\n"; String id = geneSequence.getAccession().getID() + "." + transcriptIndex; gff3line = gff3line + "ID=" + id + ";Parent=" + geneSequence.getAccession().getID() + ";Name=" + id; gff3line = gff3line + getGFF3Note(transcriptSequence.getNotesList()); outputStream.write(gff3line.getBytes()); String transcriptParentName = geneSequence.getAccession().getID() + "." + transcriptIndex; ArrayList<CDSSequence> cdsSequenceList = new ArrayList<CDSSequence>(transcriptSequence.getCDSSequences().values()); Collections.sort(cdsSequenceList, new SequenceComparator());