private void addProtein( final List<Protein> proteins, Protein current_protein ) { if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), isIgnoreEngulfedDomains(), current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_ignored_due_to_overlap += domains_removed; if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) { final Set<String> domain_ids_in_protein = new HashSet<String>(); for( final Domain d : current_protein.getProteinDomains() ) { domain_ids_in_protein.add( d.getDomainId() ); domain_ids_in_protein.retainAll( getFilter() ); if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) { if ( domain_ids_in_protein.size() > 0 ) { actuallyAddProtein( proteins, current_protein ); actuallyAddProtein( proteins, current_protein ); actuallyAddProtein( proteins, current_protein );
public List<Protein> parse() throws IOException { if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) { throw new RuntimeException( "attempt to use individual cuttoffs with having set them" ); intitCounts(); final Set<String> prev_queries = new HashSet<String>(); final String error = ForesterUtil.isReadableFile( getInputFile() ); if ( !ForesterUtil.isEmpty( error ) ) { throw new IOException( error ); final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) ); String line; final List<Protein> proteins = new ArrayList<Protein>(); final String target_id = tokens[ 0 ]; final String target_acc = tokens[ 1 ]; final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" ); final String query = tokens[ 3 ]; final String query_acc = tokens[ 4 ]; final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" ); final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" ); final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" ); final int domain_number = parseInt( tokens[ 9 ], line_number, "count" ); final int total_domains = parseInt( tokens[ 10 ], line_number, "total" ); final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" ); final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" ); final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" ); final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" ); final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ), input_file_properties[ i ][ 1 ], filter, parser = new HmmscanPerDomainTableParser( new File( input_file_properties[ i ][ 0 ] ), input_file_properties[ i ][ 1 ], ind_score_cutoff, parser.setFsEValueMaximum( fs_e_value_max ); parser.setIEValueMaximum( ie_value_max ); parser.setIgnoreDufs( ignore_dufs ); parser.setIgnoreVirusLikeIds( ignore_virus_like_ids ); parser.setIgnoreEngulfedDomains( no_engulfing_overlaps ); if ( max_allowed_overlap != surfacing.MAX_ALLOWED_OVERLAP_DEFAULT ) { parser.setMaxAllowedOverlap( max_allowed_overlap ); parser.setReturnType( HmmscanPerDomainTableParser.ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ); if ( individual_score_cutoffs != null ) { parser.setIndividualScoreCutoffs( individual_score_cutoffs ); protein_list = parser.parse(); final double coverage = ( double ) protein_list.size() / parser.getProteinsEncountered(); protein_coverage_stats.addValue( coverage ); int distinct_das = -1; System.out.println( "Number of proteins encountered : " + parser.getProteinsEncountered() );
private static boolean testHmmscanOutputParser() { final String test_dir = Test.PATH_TO_TEST_DATA; try { final HmmscanPerDomainTableParser parser1 = new HmmscanPerDomainTableParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_1" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); parser1.parse(); final HmmscanPerDomainTableParser parser2 = new HmmscanPerDomainTableParser( new File( test_dir + ForesterUtil.getFileSeparator() + "hmmscan30b3_output_2" ), "MONBR", INDIVIDUAL_SCORE_CUTOFF.NONE ); final List<Protein> proteins = parser2.parse(); if ( parser2.getProteinsEncountered() != 4 ) { return false; return false; if ( parser2.getDomainsEncountered() != 69 ) { return false; if ( parser2.getDomainsIgnoredDueToDuf() != 0 ) { return false; if ( parser2.getDomainsIgnoredDueToFsEval() != 0 ) { return false; if ( parser2.getDomainsIgnoredDueToIEval() != 0 ) { return false;