/** * Uses two sequences each with a corresponding structure to create an AFPChain corresponding to the alignment. Provided only for convenience since FastaReaders return such maps. * * @param sequences * A Map containing exactly two entries from sequence names as Strings to gapped ProteinSequences; the name is ignored * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure) * @throws StructureException */ public static AFPChain fastaToAfpChain(Map<String, ProteinSequence> sequences, Structure structure1, Structure structure2) throws StructureException { if (sequences.size() != 2) { throw new IllegalArgumentException("There must be exactly 2 sequences, but there were " + sequences.size()); } if (structure1 == null || structure2 == null) { throw new IllegalArgumentException("A structure is null"); } List<ProteinSequence> seqs = new ArrayList<ProteinSequence>(); List<String> names = new ArrayList<String>(2); for (Map.Entry<String, ProteinSequence> entry : sequences.entrySet()) { seqs.add(entry.getValue()); names.add(entry.getKey()); } return fastaToAfpChain(seqs.get(0), seqs.get(1), structure1, structure2); }
/** * Returns an AFPChain corresponding to the alignment between {@code structure1} and {@code structure2}, which is given by the gapped protein sequences {@code sequence1} and {@code sequence2}. The * sequences need not correspond to the entire structures, since local alignment is performed to match the sequences to structures. * @throws StructureException * @throws CompoundNotFoundException */ public static AFPChain fastaStringToAfpChain(String sequence1, String sequence2, Structure structure1, Structure structure2) throws StructureException, CompoundNotFoundException { ProteinSequence seq1 = new ProteinSequence(sequence1); ProteinSequence seq2 = new ProteinSequence(sequence2); return fastaToAfpChain(seq1, seq2, structure1, structure2); }
/** * Provided only for convenience. * * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure) * @throws StructureException */ public static AFPChain fastaToAfpChain(SequencePair<Sequence<AminoAcidCompound>, AminoAcidCompound> alignment, Structure structure1, Structure structure2) throws StructureException { List<AlignedSequence<Sequence<AminoAcidCompound>, AminoAcidCompound>> seqs = alignment.getAlignedSequences(); StringBuilder sb1 = new StringBuilder(); for (AminoAcidCompound a : seqs.get(0)) { sb1.append(a.getBase()); } try { ProteinSequence seq1 = new ProteinSequence(sb1.toString()); StringBuilder sb2 = new StringBuilder(); for (AminoAcidCompound a : seqs.get(1)) { sb1.append(a.getBase()); } ProteinSequence seq2 = new ProteinSequence(sb2.toString()); LinkedHashMap<String, ProteinSequence> map = new LinkedHashMap<String, ProteinSequence>(); map.put(structure1.getName(), seq1); map.put(structure2.getName(), seq2); return fastaToAfpChain(map, structure1, structure2); } catch (CompoundNotFoundException e) { logger.error("Unexpected error while creating protein sequences: {}. This is most likely a bug.",e.getMessage()); return null; } }
/** * Reads the file {@code fastaFile}, expecting exactly two sequences which give a pairwise alignment. Uses this and two structures to create an AFPChain corresponding to the alignment. Uses a * {@link CasePreservingProteinSequenceCreator} and assumes that a residue is aligned if and only if it is given by an uppercase letter. * * @see #fastaToAfpChain(ProteinSequence, ProteinSequence, Structure, Structure) * @throws IOException * @throws StructureException */ public static AFPChain fastaFileToAfpChain(File fastaFile, Structure structure1, Structure structure2) throws IOException, StructureException { InputStream inStream = new FileInputStream(fastaFile); SequenceCreatorInterface<AminoAcidCompound> creator = new CasePreservingProteinSequenceCreator( AminoAcidCompoundSet.getAminoAcidCompoundSet()); SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(); FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( inStream, headerParser, creator); LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process(); inStream.close(); return fastaToAfpChain(sequences, structure1, structure2); }
/** * TODO Write comment * @param sequence1 * @param sequence2 * @param structure1 * @param structure2 * @return * @throws StructureException * @throws CompoundNotFoundException */ public static AFPChain fastaToAfpChain(String sequence1, String sequence2, Structure structure1, Structure structure2) throws StructureException, CompoundNotFoundException { ProteinSequence s1 = new ProteinSequence(sequence1); s1.setUserCollection(getAlignedUserCollection(sequence1)); ProteinSequence s2 = new ProteinSequence(sequence2); s2.setUserCollection(getAlignedUserCollection(sequence2)); return fastaToAfpChain(s1, s2, structure1, structure2); }