for (Protein protein : matchingProteins) { LOGGER.trace("Protein ID: " + protein.getId() + " MD5: " + protein.getMd5()); LOGGER.trace("Has " + protein.getMatches().size() + " matches"); for (ProteinXref xref : protein.getCrossReferences()) { LOGGER.trace("Xref: " + xref.getIdentifier());
/** * Remove sites from any protein match locations (make sites NULL so they don't appear at all in the XML output) * @param proteins The proteins * @param all Remove all site data (not just empty sites)? */ private void removeSites(List<Protein> proteins, boolean all) { for (Protein protein : proteins) { Set<Match> matches = protein.getMatches(); if (matches != null && matches.size() > 0) { for (Match match : matches) { Set<Location> locations = match.getLocations(); if (locations != null && locations.size() > 0) { for (Location location : locations) { if (location instanceof LocationWithSites) { LocationWithSites l = (LocationWithSites) location; Set<Site> sites = l.getSites(); if (sites != null) { if (all || sites.size() < 1) { l.setSites(null); } } } } } } } } } }
private IMatchesHolder getMatchesHolder(StepInstance stepInstance, String sequenceType, List<Protein> proteins, boolean isSlimOutput) { IMatchesHolder matchesHolder; if (sequenceType.equalsIgnoreCase("n")) { matchesHolder = new NucleicAcidMatchesHolder(interProScanVersion); } else { matchesHolder = new ProteinMatchesHolder(interProScanVersion); } final Map<String, String> parameters = stepInstance.getParameters(); final boolean excludeSites = Boolean.TRUE.toString().equals(parameters.get(StepInstanceCreatingStep.EXCLUDE_SITES)); if (excludeSites || this.excludeSites) { // Command line argument takes preference over proprties file config removeSites(proteins, true); } else if (isSlimOutput) { removeSites(proteins, false); } if (isSlimOutput) { // Only include a protein in the output if it has at least one match for (Protein protein : proteins) { Set<Match> matches = protein.getMatches(); if (matches != null && matches.size() > 0) { matchesHolder.addProtein(protein); } } } else { // Include all proteins in the output, whether they have any matches or not matchesHolder.addProteins(proteins); } return matchesHolder; }
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); for (String proteinAc: proteinAcs) { for (Match match : matches) {
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches();
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); for (String proteinAc: proteinAcs) { for (Match match : matches) {
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); for (String proteinAc: proteinAcs) { for (Match match : matches) {
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches();
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches();
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { processMatches(protein.getMatches(), proteinIdForGFF, date, protein, getNucleotideId()); } else { throw new IllegalStateException("Cannot find the ORF object that maps to protein with MD5: " + protein.getMd5());
String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { processMatches(protein.getMatches(), proteinIdForGFF, date, protein, getNucleotideId()); } else { throw new IllegalStateException("Cannot find the ORF object that maps to protein with PK / MD5: " + protein.getId() + " / " + protein.getMd5());