/** * Some algorithms may return an end coordinate that is off the end of the sequence. (PRINTS is one known case). * This method returns the stop coordinate of the match or the coordinate of the last residue on the protein, * whichever is smallest. * * @param protein that the match is on * @param rawMatch for which a sensible end location is required. * @return the stop coordinate of the match or the coordinate of the last residue on the protein, * whichever is smallest. */ protected int boundedLocationEnd(Protein protein, RawMatch rawMatch) { return (rawMatch.getLocationEnd() > protein.getSequenceLength()) ? protein.getSequenceLength() : rawMatch.getLocationEnd(); }
/** * check if the location is withing the sequence length * * @param protein * @param rawMatch * @return */ public boolean pantherLocationWithinRange(Protein protein, RawMatch rawMatch){ if (protein.getSequenceLength() < rawMatch.getLocationEnd() || protein.getSequenceLength() < rawMatch.getLocationStart()){ return false; } return true; }
/** * Check if the location is withing the sequence length * * @param protein * @param rawMatch * @return */ public boolean isLocationWithinRange(Protein protein, RawMatch rawMatch){ if (protein.getSequenceLength() < rawMatch.getLocationEnd() || protein.getSequenceLength() < rawMatch.getLocationStart()){ return false; } return true; }
/** * Build up a list of the protein lengths. * * @param bottomProtein Protein Id to start from * @param topProtein Protein Id to end on * @return Map of values */ private Map<Long, Integer> getProteinSequenceLengths(long bottomProtein, long topProtein) { Map<Long, Integer> proteinLengthMap = new HashMap<Long, Integer>(); List<Protein> proteins = proteinDAO.getProteinsBetweenIds(bottomProtein, topProtein); for (Protein protein : proteins) { proteinLengthMap.put(protein.getId(), protein.getSequenceLength()); } return proteinLengthMap; } }
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
int locationCount = 0; List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
int locationCount = 0; List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
List<String> proteinAcs = getProteinAccessions(protein); final int length = protein.getSequenceLength(); final String sequence = protein.getSequence(); final String crc64 = getCrc64(sequence);
int locationCount = 0; List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
List<String> proteinAcs = getProteinAccessions(protein); final int length = protein.getSequenceLength(); final String sequence = protein.getSequence(); final String crc64 = getCrc64(sequence);
for (T rawMatch: rp.getMatches()){ if (! isLocationWithinRange(protein, rawMatch)){ LOGGER.error("Location coordinates Error - sequenceLength: " + protein.getSequenceLength() + " Location : " + rawMatch.getLocationStart() + "-" + rawMatch.getLocationEnd()); throw new IllegalStateException("Attempting to persist a match location outside sequence range " +
int locationCount = 0; List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
LOGGER.debug(" protein length = " + protein.getSequenceLength() + " start location of raw match : " + rawMatch.getLocationStart() + " end location of raw match : " + rawMatch.getLocationEnd()); + " protein length = " + protein.getSequenceLength() + " raw match : " + rawMatch.toString()); throw new IllegalStateException("PANTHER match location is out of range " + currentSignatureAc + " protein length = " + protein.getSequenceLength() + " raw match : " + rawMatch.toString());
int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());