static public LinkedHashMap<String, ChromosomeSequence> getChromosomeSequenceFromDNASequence(LinkedHashMap<String, DNASequence> dnaSequenceList) { LinkedHashMap<String, ChromosomeSequence> chromosomeSequenceList = new LinkedHashMap<String, ChromosomeSequence>(); for (String key : dnaSequenceList.keySet()) { DNASequence dnaSequence = dnaSequenceList.get(key); ChromosomeSequence chromosomeSequence = new ChromosomeSequence(dnaSequence.getProxySequenceReader()); //we want the underlying sequence but don't need storage chromosomeSequence.setAccession(dnaSequence.getAccession()); chromosomeSequenceList.put(key, chromosomeSequence); } return chromosomeSequenceList; }
public void processNucleotides(File fastaFileName,String uniqueid, File outputDirectory ) throws Exception{ if(!outputDirectory.exists()) outputDirectory.mkdirs(); LinkedHashMap<String,DNASequence> dnaSequenceHashMap = FastaReaderHelper.readFastaDNASequence(fastaFileName); for(DNASequence dnaSequence : dnaSequenceHashMap.values()){ String fileName = outputDirectory.getAbsolutePath() + File.separatorChar; if(uniqueid.length() > 0){ fileName = fileName + dnaSequence.getAccession().getID() + ".fna"; }else{ fileName = fileName + uniqueid + dnaSequence.getAccession().getID() + ".fna"; } ArrayList<DNASequence> dnaList = new ArrayList<DNASequence>(); dnaList.add(dnaSequence); FastaWriterHelper.writeNucleotideSequence(new File(fileName), dnaList); } }
/** * Output a gff3 feature file that will give the length of each scaffold/chromosome in the fasta file. * Used for gbrowse so it knows length. * @param fastaSequenceFile * @param gffFile * @throws Exception */ static public void outputFastaSequenceLengthGFF3(File fastaSequenceFile, File gffFile) throws Exception { LinkedHashMap<String, DNASequence> dnaSequenceList = FastaReaderHelper.readFastaDNASequence(fastaSequenceFile); String fileName = fastaSequenceFile.getName(); FileWriter fw = new FileWriter(gffFile); String newLine = System.getProperty("line.separator"); fw.write("##gff-version 3" + newLine); for (DNASequence dnaSequence : dnaSequenceList.values()) { String gff3line = dnaSequence.getAccession().getID() + "\t" + fileName + "\t" + "contig" + "\t" + "1" + "\t" + dnaSequence.getBioEnd() + "\t.\t.\t.\tName=" + dnaSequence.getAccession().getID() + newLine; fw.write(gff3line); } fw.close(); }
DNASequence contigSequence = dnaSequenceList.get(accession); ChromosomeSequence chromsomeSequence = new ChromosomeSequence(contigSequence.getSequenceAsString()); chromsomeSequence.setAccession(contigSequence.getAccession()); chromosomeSequenceList.put(accession, chromsomeSequence); dnaSequence.getAccession().toString(), contigDNASequence.getAccession().toString(), bioStart, bioEnd, strand); ChromosomeSequence chromosomeSequence = chromosomeSequenceList.get(accession); String geneaccession = dnaSequence.getAccession().getID(); String note = geneaccession; String[] values = geneaccession.split(" "); throw new Exception(dnaSequence.getAccession().toString() + " not found"); logger.info("Gene not found {}", dnaSequence.getAccession().toString());
genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); dnaSequence.setAccession(new AccessionID("NM_001126")); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences());
genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); dnaSequence.setAccession(new AccessionID("NM_001126")); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences()); dnaSequence = new DNASequence(genbankDNAReader); genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); logger.info("Sequence ({},{})={}...", dnaSequence.getAccession(), dnaSequence.getLength(), dnaSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankDNAReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankDNAReader.getDatabaseReferences());