/** * Returns a fingerprint from a Base64 encoded Pubchem fingerprint. * * @param enc The Base64 encoded fingerprint * @return A BitSet corresponding to the input fingerprint */ public static BitSet decode(String enc) { byte[] fp = base64Decode(enc); if (fp.length < 4) { throw new IllegalArgumentException("Input is not a proper PubChem base64 encoded fingerprint"); } int len = (fp[0] << 24) | (fp[1] << 16) | (fp[2] << 8) | (fp[3] & 0xff); if (len != FP_SIZE) { throw new IllegalArgumentException("Input is not a proper PubChem base64 encoded fingerprint"); } // note the IChemObjectBuilder is passed as null because the SMARTSQueryTool // isn't needed when decoding PubchemFingerprinter pc = new PubchemFingerprinter(null); for (int i = 0; i < pc.m_bits.length; ++i) { pc.m_bits[i] = fp[i + 4]; } BitSet ret = new BitSet(FP_SIZE); for (int i = 0; i < FP_SIZE; i++) { if (pc.isBitOn(i)) ret.set(i); } return ret; }
private void generateFp(IAtomContainer mol) throws CDKException { for (int i = 0; i < m_bits.length; ++i) { m_bits[i] = 0; } _generateFp(m_bits, mol); }
private String encode() { byte[] pack = new byte[4 + m_bits.length]; pack[0] = (byte) ((FP_SIZE & 0xffffffff) >> 24); pack[1] = (byte) ((FP_SIZE & 0x00ffffff) >> 16); pack[2] = (byte) ((FP_SIZE & 0x0000ffff) >> 8); pack[3] = (byte) (FP_SIZE & 0x000000ff); for (int i = 0; i < m_bits.length; ++i) { pack[i + 4] = m_bits[i]; } return base64Encode(pack); }
private void _generateFp(byte[] fp, IAtomContainer mol) throws CDKException { SmartsPattern.prepare(mol); countElements(fp, mol); countRings(fp, mol); countSubstructures(fp, mol); }
/** * Calculate 881 bit Pubchem fingerprint for a molecule. * * See * <a href="ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt">here</a> * for a description of each bit position. * * @param atomContainer the molecule to consider * @return the fingerprint * @throws CDKException if there is an error during substructure * searching or atom typing * @see #getFingerprintAsBytes() */ @Override public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer) throws CDKException { generateFp(atomContainer); BitSet fp = new BitSet(FP_SIZE); for (int i = 0; i < FP_SIZE; i++) { if (isBitOn(i)) fp.set(i); } return new BitSetFingerprint(fp); }
@Override public IFingerprinter getBitFingerprinter() { return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); }
@Test public void testBenzene() throws CDKException { IAtomContainer mol = parser.parseSmiles("c1ccccc1"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
@Test public void testGetFingerprintAsBytes() throws CDKException { IAtomContainer mol = parser.parseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); PubchemFingerprinter printer = new PubchemFingerprinter(mol.getBuilder()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); byte[] actual = printer.getFingerprintAsBytes(); byte[] expected = Arrays.copyOf(toByteArray(fp), actual.length); Assert.assertArrayEquals(expected, actual); }
@Test(expected = IllegalArgumentException.class) public void testDecode_invalid() { PubchemFingerprinter.decode("a"); }
public static List<IFingerprinter> createExtendedListOfFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), (IFingerprinter) new SubstructureFingerprinter(), (IFingerprinter) new MACCSFingerprinter(), (IFingerprinter) new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()), (IFingerprinter) new KlekotaRothFingerprinter(), (IFingerprinter) new SphericalFingerprint() ); }
/** * Test case for Pubchem CID 25181308. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C13H24O10S/c1-20-12-8(18)6(16)10(4(2-14)21-12)23-13-9(19)7(17)11(24)5(3-15)22-13/h4-19,24H,2-3H2,1H3/t4-,5-,6-,7-,8-,9-,10-,11-,12-,13+/m1/s1 */ @Test public void testCID2518130() throws CDKException { IAtomContainer mol = parser.parseSmiles("COC1C(C(C(C(O1)CO)OC2C(C(C(C(O2)CO)S)O)O)O)O"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceBwPABAAAAAAAAAAAAAAAAAAAAAAAAkSAAAAAAAAAAAAAAAGgQACAAACBS0wAOCCAAABgQAAAAAAAAAAAAAAAAAAAAAAAAREAIAAAAiQAAFAAAHAAHAYAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
@Test public void testDecode() { BitSet bitSet = PubchemFingerprinter .decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); int[] setBits = new int[]{0, 9, 10, 178, 179, 255, 283, 284, 332, 344, 355, 370, 371, 384, 416, 434, 441, 446, 470, 490, 516, 520, 524, 552, 556, 564, 570, 578, 582, 584, 595, 599, 603, 608, 618, 634, 640, 660, 664, 668, 677, 678, 679}; for (int set : setBits) { Assert.assertTrue("bit " + set + " was not set", bitSet.get(set)); } }
public static List<IFingerprinter> createListOfFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), (IFingerprinter) new SubstructureFingerprinter(), (IFingerprinter) new MACCSFingerprinter(), (IFingerprinter) new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()), (IFingerprinter) new KlekotaRothFingerprinter(), (IFingerprinter) new ECFPFingerprinter() ); }
/** * Test case for Pubchem CID 25181289. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C14H10Cl3N3O3/c1-6(7-2-4-8(21)5-3-7)19-20-11-9(15)12(14(22)23)18-13(17)10(11)16/h2-5,19,21H,1H2,(H,18,20)(H,22,23) */ @Test public void testCID25181289() throws CDKException { IAtomContainer mol = parser.parseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADccBzMAAGAAAAAAAAAAAAAAAAAAAAAAA8QAAAAAAAAAABwAAAHgIYCAAADA6BniAwzpJqEgCoAyTyTASChCAnJiIYumGmTtgKJnLD1/PEdQhkwBHY3Qe82AAOIAAAAAAAAABAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
public static List<IFingerprinter> createListOfAllFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), // 55 (0..54) (IFingerprinter) new SubstructureFingerprinter(), // 307 (55..361) (IFingerprinter) new MACCSFingerprinter(),// 166 (362..527) (IFingerprinter) new PubchemFingerprinter( DefaultChemObjectBuilder.getInstance()), // 881 (528..1408) (IFingerprinter) new KlekotaRothFingerprinter(), // 4860 (1409..6269) (IFingerprinter) new SphericalFingerprint(), (IFingerprinter) new ECFPFingerprinter() ); }
/** * Test case for Pubchem CID 5934166. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C32H26N/c1-5-13-26(14-6-1)21-22-31-23-30(28-17-9-3-10-18-28)24-32(29-19-11-4-12-20-29)33(31)25-27-15-7-2-8-16-27/h1-24H,25H2/q+1/b22-21+ */ @Test public void testCID5934166() throws CDKException { IAtomContainer mol = parser.parseSmiles("C1=CC=C(C=C1)C[N+]2=C(C=C(C=C2C=CC3=CC=CC=C3)C4=CC=CC=C4)C5=CC=CC=C5"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceB+AAAAAAAAAAAAAAAAAAAAAAAAAAA8YMGCAAAAAAAB1AAAHAAAAAAADAjBHgQwgJMMEACgAyRiRACCgCAhAiAI2CA4ZJgIIOLAkZGEIAhggADIyAcQgMAOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
public Fingerprinter getFingerprinter(ComputeOpts opts) throws CDKException { if (opts.isCfm() || opts.isKlekotha() || opts.isMaccs() || opts.isOpenbabel() || opts.isPubchem()) { ArrayList<IFingerprinter> fingerprinters = new ArrayList<IFingerprinter>(); if (opts.isOpenbabel()) fingerprinters.add(new OpenBabelFingerprinter()); if (opts.isMaccs()) fingerprinters.add(new MACCSFingerprinter()); if (opts.isPubchem()) fingerprinters.add(new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance())); if (opts.isKlekotha()) fingerprinters.add(new KlekotaRothFingerprinter()); if (opts.isPath()) fingerprinters.add(new MarcusPathFingerprinter()); if (opts.isCfm()) fingerprinters.add(new CFMFingerprinter()); if (opts.isNeighbourhood()) fingerprinters.add(new NeighbourhoodFingerprinter()); return new Fingerprinter(fingerprinters); } else return new Fingerprinter(); }
private static List<IFingerprinter> getFingerprintList(ComputeOpts opts) { final List<IFingerprinter> list = new ArrayList<IFingerprinter>(); if (opts.isAll()) return Fingerprinter.createListOfAllFingerprints(); if (opts.isExtended()) return Fingerprinter.createExtendedListOfFingerprints(); if (opts.isOpenbabel()) { list.add(new OpenBabelFingerprinter()); list.add(new SubstructureFingerprinter()); } if (opts.isMaccs()) list.add(new MACCSFingerprinter()); if (opts.isPubchem()) list.add(new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance())); if (opts.isKlekotha()) list.add(new KlekotaRothFingerprinter()); if (opts.isPath()) list.add(new MarcusPathFingerprinter()); if (opts.isNeighbourhood()) list.add(new NeighbourhoodFingerprinter()); if (opts.isSpherical()) list.add(new SphericalFingerprint()); if (list.size() > 0) { return list; } else { return Fingerprinter.createListOfFingerprints(); } }
public static IFingerprinter getFingerprinterByName(String name) { switch (name.toLowerCase()) { case "openbabel": return new OpenBabelFingerprinter(); case "substructure": return new SubstructureFingerprinter(); case "maccs": return new MACCSFingerprinter(); case "pubchem": return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); case "klekota": case "klekota_roth": return new KlekotaRothFingerprinter(); case "path": return new MarcusPathFingerprinter(); case "neighbours": return new NeighbourhoodFingerprinter(); case "spheres": return new SphericalFingerprint(); case "ecfp": return new ECFPFingerprinter(); default: throw new IllegalArgumentException("Unknown fingerprinter: " + name); } }
public static IFingerprinter getFingerprinter(CdkFingerprintVersion.USED_FINGERPRINTS fp) { switch (fp) { case OPENBABEL: return new OpenBabelFingerprinter(); case SUBSTRUCTURE: return new SubstructureFingerprinter(); case MACCS: return new MACCSFingerprinter(); case PUBCHEM: return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); case KLEKOTA_ROTH: return new KlekotaRothFingerprinter(); case ECFP: return new ECFPFingerprinter(); case CLASSYFIRE_SMARTS: return new ClassyFireSmartsFingerprint(); case SHORTEST_PATH: return new ShortestPathFingerprinter(); case BIOSMARTS: return new BiosmartsFingerprinter(); case RINGSYSTEMS: return new RingsystemFingerprinter(); default: throw new IllegalArgumentException(); } }