private static boolean isDNASequence(String str) { try { new DNASequence(str); } catch (CompoundNotFoundException e) { return false; } return true; }
/** * Create and return a new {@link DNASequence} with quality scores from the specified * FASTQ formatted sequence. The quality scores are stored in a {@link QualityFeature} * with a type <code>"qualityScores"</code> the same length as the sequence. * * @param fastq FASTQ formatted sequence, must not be null * @return a new {@link DNASequence} with quality scores from the specified FASTQ formatted sequence * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds */ public static DNASequence createDNASequenceWithQualityScores(final Fastq fastq) throws CompoundNotFoundException { DNASequence sequence = createDNASequence(fastq); sequence.addFeature(1, sequence.getLength(), createQualityScores(fastq)); return sequence; }
static public LinkedHashMap<String, ChromosomeSequence> getChromosomeSequenceFromDNASequence(LinkedHashMap<String, DNASequence> dnaSequenceList) { LinkedHashMap<String, ChromosomeSequence> chromosomeSequenceList = new LinkedHashMap<String, ChromosomeSequence>(); for (String key : dnaSequenceList.keySet()) { DNASequence dnaSequence = dnaSequenceList.get(key); ChromosomeSequence chromosomeSequence = new ChromosomeSequence(dnaSequence.getProxySequenceReader()); //we want the underlying sequence but don't need storage chromosomeSequence.setAccession(dnaSequence.getAccession()); chromosomeSequenceList.put(key, chromosomeSequence); } return chromosomeSequenceList; }
public static void main(String[] args) throws Exception { DNASequence dnaSequence = new DNASequence("ATCG"); logger.info("DNA Sequence: {}", dnaSequence.toString()); StringProxySequenceReader<NucleotideCompound> sequenceStringProxyLoader = new StringProxySequenceReader<NucleotideCompound>("GCTA", DNACompoundSet.getDNACompoundSet()); DNASequence dnaSequenceFromProxy = new DNASequence(sequenceStringProxyLoader); logger.info("DNA Sequence from Proxy: {}", dnaSequenceFromProxy.toString()); } }
public LinkedHashMap<String, DNASequence> getDNACodingSequences() throws Exception { LinkedHashMap<String, DNASequence> dnaSequenceList = new LinkedHashMap<String, DNASequence>(); ArrayList<Element> elementList = XMLHelper.selectElements(geneidDoc.getDocumentElement(), "prediction/gene/cDNA"); logger.info("{} hits", elementList.size()); for (Element dnaElement : elementList) { Element geneElement = (Element) dnaElement.getParentNode(); String sequence = dnaElement.getTextContent().replaceAll("\\W",""); DNASequence dnaSequence = new DNASequence(sequence); String idGene = geneElement.getAttribute("idGene"); dnaSequence.setAccession(new AccessionID(idGene)); dnaSequenceList.put(idGene, dnaSequence); } return dnaSequenceList; }
DNASequence dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); dnaSequence.setAccession(new AccessionID("NM_001126")); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords());
dnaCodingSequence = new DNASequence(codingSequence.toUpperCase()); } catch (CompoundNotFoundException e) { RNASequence rnaCodingSequence = dnaCodingSequence.getRNASequence(TranscriptionEngine.getDefault()); ProteinSequence proteinSequence = rnaCodingSequence.getProteinSequence(TranscriptionEngine.getDefault()); proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID()));
/** * Create and return a new {@link DNASequence} from the specified FASTQ formatted sequence. * * @param fastq FASTQ formatted sequence, must not be null * @return a new {@link DNASequence} from the specified FASTQ formatted sequence * @throws CompoundNotFoundException if DNA sequence in fastq contains unrecognised compounds */ public static DNASequence createDNASequence(final Fastq fastq) throws CompoundNotFoundException { if (fastq == null) { throw new IllegalArgumentException("fastq must not be null"); } DNASequence sequence = new DNASequence(fastq.getSequence()); sequence.setOriginalHeader(fastq.getDescription()); return sequence; }
/** Converts the DNA sequence to protein sequence. * * @param dnaSequence the DNA sequence * * @return the protein sequence */ public static ProteinSequence convertDNAtoProteinSequence(DNASequence dnaSequence) throws CompoundNotFoundException { RNASequence mRNA = dnaSequence.getRNASequence(); return mRNA.getProteinSequence(); } }
/** * Extracts the DNA sequence transcribed from the input genetic coordinates. * * @param chromosome the name of the chromosome * @param exonStarts The list holding the genetic coordinates pointing to the start positions of the exons (including UTR regions) * @param exonEnds The list holding the genetic coordinates pointing to the end positions of the exons (including UTR regions) * @param cdsStart The start position of a coding region * @param cdsEnd The end position of a coding region * @param orientation The orientation of the strand where the gene is living * * @return the DNA sequence transcribed from the input genetic coordinates */ public static DNASequence getTranscriptDNASequence(TwoBitFacade twoBitFacade, String chromosome, List<Integer> exonStarts, List<Integer> exonEnds, int cdsStart, int cdsEnd, Character orientation) throws Exception { List<Range<Integer>> cdsRegion = getCDSRegions(exonStarts, exonEnds, cdsStart, cdsEnd); String dnaSequence = ""; for (Range<Integer> range : cdsRegion) { String exonSequence = twoBitFacade.getSequence(chromosome,range.lowerEndpoint(), range.upperEndpoint()); dnaSequence += exonSequence; } if (orientation.equals('-')) { dnaSequence = new StringBuilder(dnaSequence).reverse().toString(); DNASequence dna = new DNASequence(dnaSequence); SequenceView<NucleotideCompound> compliment = dna.getComplement(); dnaSequence = compliment.getSequenceAsString(); } return new DNASequence(dnaSequence.toUpperCase()); } }
for (String accession : dnaSequenceList.keySet()) { DNASequence contigSequence = dnaSequenceList.get(accession); ChromosomeSequence chromsomeSequence = new ChromosomeSequence(contigSequence.getSequenceAsString()); chromsomeSequence.setAccession(contigSequence.getAccession()); chromosomeSequenceList.put(accession, chromsomeSequence); String geneSequence = dnaSequence.getSequenceAsString(); String lcGeneSequence = geneSequence.toLowerCase(); String reverseGeneSequence = dnaSequence.getReverse().getSequenceAsString(); String lcReverseGeneSequence = reverseGeneSequence.toLowerCase(); Integer bioStart = null; accession = id; contigDNASequence = dnaSequenceList.get(id); String contigSequence = contigDNASequence.getSequenceAsString().toLowerCase(); bioStart = contigSequence.indexOf(lcGeneSequence); if (bioStart != -1) { dnaSequence.getAccession().toString(), contigDNASequence.getAccession().toString(), bioStart, bioEnd, strand); ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(accession); String geneaccession = dnaSequence.getAccession().getID(); String note = geneaccession; String[] values = geneaccession.split(" "); throw new Exception(dnaSequence.getAccession().toString() + " not found"); logger.info("Gene not found {}", dnaSequence.getAccession().toString());
/** * Output a gff3 feature file that will give the length of each scaffold/chromosome in the fasta file. * Used for gbrowse so it knows length. * @param fastaSequenceFile * @param gffFile * @throws Exception */ static public void outputFastaSequenceLengthGFF3(File fastaSequenceFile, File gffFile) throws Exception { LinkedHashMap<String, DNASequence> dnaSequenceList = FastaReaderHelper.readFastaDNASequence(fastaSequenceFile); String fileName = fastaSequenceFile.getName(); FileWriter fw = new FileWriter(gffFile); String newLine = System.getProperty("line.separator"); fw.write("##gff-version 3" + newLine); for (DNASequence dnaSequence : dnaSequenceList.values()) { String gff3line = dnaSequence.getAccession().getID() + "\t" + fileName + "\t" + "contig" + "\t" + "1" + "\t" + dnaSequence.getBioEnd() + "\t.\t.\t.\tName=" + dnaSequence.getAccession().getID() + newLine; fw.write(gff3line); } fw.close(); }
seq = sequence.getSequence5PrimeTo3Prime().getSequenceAsString(); if (showExonUppercase) { StringBuilder sb = new StringBuilder(seq.toLowerCase());
public static double getGCStats(Collection<DNASequence> sequences) { double gcCount = 0; double total = 0; for (DNASequence sequence : sequences) { char[] dna = sequence.toString().toCharArray(); for (char d : dna) { if (d == 'G' || d == 'C' || d == 'g' || d == 'c') { gcCount++; } total++; } } return (gcCount / total) * 100.0; }
public void processNucleotides(File fastaFileName,String uniqueid, File outputDirectory ) throws Exception{ if(!outputDirectory.exists()) outputDirectory.mkdirs(); LinkedHashMap<String,DNASequence> dnaSequenceHashMap = FastaReaderHelper.readFastaDNASequence(fastaFileName); for(DNASequence dnaSequence : dnaSequenceHashMap.values()){ String fileName = outputDirectory.getAbsolutePath() + File.separatorChar; if(uniqueid.length() > 0){ fileName = fileName + dnaSequence.getAccession().getID() + ".fna"; }else{ fileName = fileName + uniqueid + dnaSequence.getAccession().getID() + ".fna"; } ArrayList<DNASequence> dnaList = new ArrayList<DNASequence>(); dnaList.add(dnaSequence); FastaWriterHelper.writeNucleotideSequence(new File(fileName), dnaList); } }
DNASequence dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); dnaSequence.setAccession(new AccessionID("NM_001126")); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords());
/** * Get the stitched together CDS sequences then maps to the cDNA * @return */ public DNASequence getDNACodingSequence() { StringBuilder sb = new StringBuilder(); for (CDSSequence cdsSequence : cdsSequenceList) { sb.append(cdsSequence.getCodingSequence()); } DNASequence dnaSequence = null; try { dnaSequence = new DNASequence(sb.toString().toUpperCase()); } catch (CompoundNotFoundException e) { // if I understand this should not happen, please correct if I'm wrong - JD 2014-10-24 logger.error("Could not create DNA coding sequence, {}. This is most likely a bug.", e.getMessage()); } dnaSequence.setAccession(new AccessionID(this.getAccession().getID())); return dnaSequence; }
public static void main(String[] args) throws Exception { DNASequence dnaSequence = new DNASequence("ATCG"); logger.info("DNA Sequence: {}", dnaSequence.toString()); StringProxySequenceReader<NucleotideCompound> sequenceStringProxyLoader = new StringProxySequenceReader<NucleotideCompound>("GCTA", DNACompoundSet.getDNACompoundSet()); DNASequence dnaSequenceFromProxy = new DNASequence(sequenceStringProxyLoader); logger.info("DNA Sequence from Proxy: {}", dnaSequenceFromProxy.toString()); } }
dnaCodingSequence = new DNASequence(codingSequence.toUpperCase()); } catch (CompoundNotFoundException e) { RNASequence rnaCodingSequence = dnaCodingSequence.getRNASequence(TranscriptionEngine.getDefault()); ProteinSequence proteinSequence = rnaCodingSequence.getProteinSequence(TranscriptionEngine.getDefault()); proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID()));