private static boolean isProteinSequence(String str) { try { new ProteinSequence(str); } catch (CompoundNotFoundException e) { return false; } return true; }
public static void main(String[] args) { try { UniprotProxySequenceReader<AminoAcidCompound> uniprotSequence = new UniprotProxySequenceReader<AminoAcidCompound>("YA745_GIBZE", AminoAcidCompoundSet.getAminoAcidCompoundSet()); ProteinSequence proteinSequence = new ProteinSequence(uniprotSequence); logger.info("Accession: {}", proteinSequence.getAccession().getID()); logger.info("Sequence: {}", proteinSequence.getSequenceAsString()); } catch (Exception e) { logger.error("Exception: ", e); } }
@Override public double getEnrichment(ProteinSequence sequence, AminoAcidCompound aminoAcidCode) { double counter = 0.0; char[] seq = this.getSequence(sequence.getSequenceAsString(), true); for(char aa:seq){ if(aminoAcidCode.getShortName().equals(String.valueOf(aa))){ counter++; } } return counter/sequence.getLength(); }
public static void main(String[] args) throws Exception { ProteinSequence proteinSequence = new ProteinSequence("ARNDCEQGHILKMFPSTWYVBZJX"); logger.info("Protein Sequence: {}", proteinSequence.toString()); StringProxySequenceReader<AminoAcidCompound> sequenceStringProxyLoader = new StringProxySequenceReader<AminoAcidCompound>("XRNDCEQGHILKMFPSTWYVBZJA", AminoAcidCompoundSet.getAminoAcidCompoundSet()); ProteinSequence proteinSequenceFromProxy = new ProteinSequence(sequenceStringProxyLoader); logger.info("Protein Sequence from Proxy: {}", proteinSequenceFromProxy.toString()); } }
public static AFPChain cpFastaToAfpChain(String first, String second, Structure structure, int cpSite) throws StructureException, CompoundNotFoundException { ProteinSequence s1 = new ProteinSequence(first); s1.setUserCollection(getAlignedUserCollection(first)); ProteinSequence s2 = new ProteinSequence(second); s2.setUserCollection(getAlignedUserCollection(second)); return cpFastaToAfpChain(s1, s2, structure, cpSite); }
ProteinSequence proteinSequence = new ProteinSequence(genbankProteinReader); genbankProteinReader.getHeaderParser().parseHeader(genbankProteinReader.getHeader(), proteinSequence); logger.info("Sequence ({},{})={}...", proteinSequence.getAccession(), proteinSequence.getLength(), proteinSequence.getSequenceAsString().substring(0, 10)); logger.info("Keywords: {}", genbankProteinReader.getKeyWords()); logger.info("DatabaseReferences: {}", genbankProteinReader.getDatabaseReferences()); proteinSequence.getFeatures();
try { if (cpSite <= 0) { c = second.getSequenceAsString().charAt(gappedCpShift); } else { c = second.getSequenceAsString().charAt(first.getLength()-1 - gappedCpShift); antipermuted = new ProteinSequence(SequenceTools.permuteCyclic(second.getSequenceAsString(), gappedCpShift)); } catch (CompoundNotFoundException e) { if (first.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(first, residues); if (second.getUserCollection() != null) { CasePreservingProteinSequenceCreator.setLowercaseToNull(second, nonpermutedResidues);
private double getNetChargeInnovagen(ProteinSequence sequence, double pHPoint) { Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence); String sequenceString = sequence.getSequenceAsString(); return getNetChargeInnovagen(chargedAA2Count, pHPoint, sequenceString.charAt(0), sequenceString.charAt(sequenceString.length() - 1)); }
UniprotProxySequenceReader<AminoAcidCompound> uniprotSequence = new UniprotProxySequenceReader<AminoAcidCompound>(uniprotBestHit, AminoAcidCompoundSet.getAminoAcidCompoundSet()); ProteinSequence proteinSequence = new ProteinSequence(uniprotSequence); String hitSequence = proteinSequence.getSequenceAsString(); for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) { String predictedProteinSequence = transcriptSequence.getProteinSequence().getSequenceAsString(); ArrayList<ProteinSequence> cdsProteinList = transcriptSequence.getProteinCDSSequences(); String testSequence = ""; for (ProteinSequence cdsProteinSequence : cdsProteinList) { testSequence = testSequence + cdsProteinSequence.getSequenceAsString(); for (int i = 0; i < cdsProteinList.size(); i++) { ProteinSequence peptideSequence = cdsProteinList.get(i); String seq = peptideSequence.getSequenceAsString(); Integer startIndex = null; int offsetStartIndex = 0; FeaturesKeyWordInterface featureKeyWords = proteinSequence.getFeaturesKeyWord(); String notes = ""; if (featureKeyWords != null) { DatabaseReferenceInterface databaseReferences = proteinSequence.getDatabaseReferences(); if (databaseReferences != null) { LinkedHashMap<String, ArrayList<DBReferenceInfo>> databaseReferenceHashMap = databaseReferences.getDatabaseReferences();
public LinkedHashMap<String, ProteinSequence> getProteinSequences() throws Exception { LinkedHashMap<String, ProteinSequence> proteinSequenceList = new LinkedHashMap<String, ProteinSequence>(); ArrayList<Element> elementList = XMLHelper.selectElements(geneidDoc.getDocumentElement(), "prediction/gene/protein"); logger.info("{} hits", elementList.size()); for (Element proteinElement : elementList) { Element geneElement = (Element) proteinElement.getParentNode(); String sequence = proteinElement.getTextContent().replaceAll("\\W",""); ProteinSequence proteinSequence = new ProteinSequence(sequence); String idGene = geneElement.getAttribute("idGene"); proteinSequence.setAccession(new AccessionID(idGene)); proteinSequenceList.put(idGene, proteinSequence); } return proteinSequenceList; }
CompoundSet<AminoAcidCompound> aaSet = gapped.getCompoundSet(); AminoAcidCompound[] gaps = new AminoAcidCompound[gapStrings.length]; for(int i=1; i<=gapped.getLength();i++) { //1-indexed AminoAcidCompound aa = gapped.getCompoundAt(i); boolean isGap = false; for(AminoAcidCompound gap : gaps) { ungapped = new ProteinSequence(seq.toString()); } catch (CompoundNotFoundException e) {
ProteinSequence sequence = null; try { sequence = new ProteinSequence(""); } catch (CompoundNotFoundException e) { logger.info("accession = {}", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.GENBANK); logger.info("accession = {}", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.ENA); logger.info("accession = {}", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.DDBJ); logger.info("entry = {}", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.NBRF); logger.info("name = {}", sequence.getAccession()); logger.info("Data source: {}", sequence.getAccession().getDataSource(), DataSource.PRF); logger.info("accession = ", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.UNIPROT); logger.info("entry:chain = ", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.PDB1); logger.info("entry:chain = {}", sequence.getAccession()); logger.info("Data source: {} = {}", sequence.getAccession().getDataSource(), DataSource.PDB2);
@Override public double getNetCharge(ProteinSequence sequence, boolean useExpasyValues, double pHPoint){ if(useExpasyValues){ return getNetChargeExpasy(sequence.toString().toUpperCase(), pHPoint); }else{ return getNetChargeInnovagen(sequence, pHPoint); } }
/** * Parses the fasta file and loads it into memory. * * Information can be subsequently accessed through * {@link #getSequences()}, * {@link #getStructures()}, * {@link #getResidues()}, and * {@link #getAccessions()}. * * @throws IOException * @throws StructureException */ public void process() throws IOException, StructureException { if(sequences == null) { // only process once, then return cached values LinkedHashMap<String, ProteinSequence> sequenceMap = reader.process(); sequences = sequenceMap.values().toArray(new ProteinSequence[0]); accessions = new String[sequences.length]; structures = new Structure[sequences.length]; residues = new ResidueNumber[sequences.length][]; // Match each sequence to a series of PDB Residue numbers for(int i=0;i<sequences.length;i++) { accessions[i] = sequences[i].getAccession().getID(); //System.out.println("Fetching "+accession); structures[i] = cache.getStructure(accessions[i]); residues[i] = StructureSequenceMatcher.matchSequenceToStructure(sequences[i], structures[i]); assert( residues[i].length == sequences[i].getLength()); } } }
private static void readInputAndGenerateOutput(String outputLocation, List<Character> propertyList, List<Character> specificList, String delimiter, String inputLocation, AminoAcidCompositionTable aaTable, int decimalPlace) throws Exception{ PrintStream output; if(outputLocation != null) output = new PrintStream(new File(outputLocation)); else output = System.out; printHeader(output, propertyList, specificList, delimiter); LinkedHashMap<String, ProteinSequence> a = readInputFile(inputLocation, aaTable); //Need for the last sequence for(Entry<String, ProteinSequence> entry:a.entrySet()){ compute(output, entry.getValue().getOriginalHeader(), entry.getValue().getSequenceAsString().trim(), delimiter, aaTable, propertyList, specificList, decimalPlace); } output.close(); }
/** A method to check whether an array of sequences contains at least two sequences having an equal length. * * @param sequences the array of {@link org.biojava.nbio.core.sequence.ProteinSequence} sequences * @return true if any two sequences are of an equal length */ public static boolean equalLengthSequences(ProteinSequence[] sequences) { for (int i=0; i<sequences.length-1; i++) { if (sequences[i]==null) continue; for (int j=i+1; j<sequences.length; j++) { if (sequences[j]==null) continue; if (sequences[i].getLength() == sequences[j].getLength()) return true; } } return false; } }
/** Utility method to convert a BioJava ProteinSequence object to the FastaSequence * object used internally in JRonn. * * @param sequence * @return */ public static FastaSequence convertProteinSequencetoFasta(ProteinSequence sequence){ StringBuffer buf = new StringBuffer(); for (AminoAcidCompound compound : sequence) { String c = compound.getShortName(); if (! SequenceUtil.NON_AA.matcher(c).find()) { buf.append(c); } else { buf.append("X"); } } return new FastaSequence(sequence.getAccession().getID(),buf.toString()); }
proteinSequence.setAccession(new AccessionID(cdsSequence.getAccession().getID())); proteinSequence.setParentDNASequence(cdsSequence, 1, cdsSequence.getLength()); proteinSequenceList.add(proteinSequence);
/** * Get the protein sequence with user defined TranscriptEngine * @param engine * @return */ public ProteinSequence getProteinSequence(TranscriptionEngine engine) { DNASequence dnaCodingSequence = getDNACodingSequence(); RNASequence rnaCodingSequence = dnaCodingSequence.getRNASequence(engine); ProteinSequence proteinSequence = rnaCodingSequence.getProteinSequence(engine); proteinSequence.setAccession(new AccessionID(this.getAccession().getID())); return proteinSequence; }
/** * Takes a {@link ProteinSequence} which was created by a * {@link CasePreservingProteinSequenceCreator}. Uses the case info * stored in the user collection to modify the output array. * * <p>Sets elements of the output array which correspond to lowercase letters * to null. * * @param seq Input sequence with case stored as the user collection * @param out */ public static void setLowercaseToNull( ProteinSequence seq, Object[] out) { // should have been set by seq creator Collection<Object> userCollection = seq.getUserCollection(); if(userCollection == null) throw new IllegalArgumentException("Sequence doesn't contain valid case info"); if(userCollection.size() != out.length) throw new IllegalArgumentException("Sequence length doesn't math output array length"); int pos = 0; for(Object isAligned : userCollection) { assert(isAligned instanceof Boolean); if(!(Boolean)isAligned) { out[pos] = null; } pos++; } } }