public BitSet[] computeFingerprints(IAtomContainer mol) throws CDKException { final BitSet[] fingerprints = new BitSet[fingerprinters.size()]; int k=0; for (IFingerprinter fp : fingerprinters) { fingerprints[k] = fp.getBitFingerprint(mol).asBitSet(); assert (fingerprints[k].size() >= fp.getSize() && fingerprints[k].length() <= fp.getSize()); ++k; } return fingerprints; }
@Test public void testFingerprint() throws Exception { SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance()); IFingerprinter printer = new GraphOnlyFingerprinter(); IBitFingerprint bs1 = printer.getBitFingerprint(parser.parseSmiles("C=C-C#N")); IBitFingerprint bs2 = printer.getBitFingerprint(parser.parseSmiles("CCCN")); Assert.assertEquals(bs1, bs2); }
@Test @Override public void testBug706786() throws Exception { IAtomContainer superStructure = bug706786_1(); IAtomContainer subStructure = bug706786_2(); addImplicitHydrogens(superStructure); addImplicitHydrogens(subStructure); // SMARTS is now correct and D will include H atoms, CDK had this wrong // for years (had it has non-H count). Whilst you can set the optional // SMARTS flavor CDK_LEGACY this is not correct AtomContainerManipulator.suppressHydrogens(superStructure); AtomContainerManipulator.suppressHydrogens(subStructure); IFingerprinter fpr = getBitFingerprinter(); IBitFingerprint superBits = fpr.getBitFingerprint(superStructure); IBitFingerprint subBits = fpr.getBitFingerprint(subStructure); assertThat(superBits.asBitSet(), is(asBitSet(0, 11, 13, 17, 40, 48, 136, 273, 274, 278, 286, 294, 299, 301, 304, 306))); assertThat(subBits.asBitSet(), is(asBitSet(1, 17, 273, 274, 278, 294, 306))); }
@Test public void testChebi() throws Exception { IAtomContainer searchmol = null; IAtomContainer findmol = null; String filename = "data/mdl/chebisearch.mol"; InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); MDLV2000Reader reader = new MDLV2000Reader(ins); searchmol = reader.read(new AtomContainer()); reader.close(); filename = "data/mdl/chebifind.mol"; ins = this.getClass().getClassLoader().getResourceAsStream(filename); reader = new MDLV2000Reader(ins); findmol = reader.read(new AtomContainer()); reader.close(); IFingerprinter fingerprinter = new ExtendedFingerprinter(); BitSet superBS = fingerprinter.getBitFingerprint(findmol).asBitSet(); BitSet subBS = fingerprinter.getBitFingerprint(searchmol).asBitSet(); boolean isSubset = FingerprinterTool.isSubset(superBS, subBS); boolean isSubset2 = FingerprinterTool.isSubset(subBS, superBS); Assert.assertFalse(isSubset); Assert.assertFalse(isSubset2); }
@Test public void testExtendedFingerprinter_int_int() throws java.lang.Exception { IFingerprinter fingerprinter = new ExtendedFingerprinter(512, 7); Assert.assertNotNull(fingerprinter); IAtomContainer mol = TestMoleculeFactory.makeIndole(); BitSet bs = fingerprinter.getBitFingerprint(mol).asBitSet(); IAtomContainer frag1 = TestMoleculeFactory.makePyrrole(); BitSet bs1 = fingerprinter.getBitFingerprint(frag1).asBitSet(); Assert.assertTrue(FingerprinterTool.isSubset(bs, bs1)); Assert.assertFalse(FingerprinterTool.isSubset(bs1, bs)); }
@Test public void testExtendedFingerprinter_int() throws java.lang.Exception { IFingerprinter fingerprinter = new ExtendedFingerprinter(512); Assert.assertNotNull(fingerprinter); IAtomContainer mol = TestMoleculeFactory.makeIndole(); BitSet bs = fingerprinter.getBitFingerprint(mol).asBitSet(); IAtomContainer frag1 = TestMoleculeFactory.makePyrrole(); BitSet bs1 = fingerprinter.getBitFingerprint(frag1).asBitSet(); Assert.assertTrue(FingerprinterTool.isSubset(bs, bs1)); Assert.assertFalse(FingerprinterTool.isSubset(bs1, bs)); }
@Test public void testgetBitFingerprint_IAtomContainer() throws java.lang.Exception { IFingerprinter fingerprinter = new ExtendedFingerprinter(); Assert.assertNotNull(fingerprinter); IAtomContainer mol = TestMoleculeFactory.makeIndole(); BitSet bs = fingerprinter.getBitFingerprint(mol).asBitSet(); IAtomContainer frag1 = TestMoleculeFactory.makePyrrole(); BitSet bs1 = fingerprinter.getBitFingerprint(frag1).asBitSet(); Assert.assertTrue(FingerprinterTool.isSubset(bs, bs1)); Assert.assertFalse(FingerprinterTool.isSubset(bs1, bs)); }
@Test public void testFingerprint() throws Exception { SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance()); IFingerprinter printer = getBitFingerprinter(); BitSet bs1 = printer.getBitFingerprint(parser.parseSmiles("C=C-C#N")).asBitSet(); BitSet bs2 = printer.getBitFingerprint(parser.parseSmiles("C=CCC(O)CC#N")).asBitSet(); Assert.assertEquals(4860, printer.getSize()); Assert.assertTrue(FingerprinterTool.isSubset(bs2, bs1)); } }
/** * @cdk.bug 706786 */ @Test public void testBug706786() throws Exception { // inlined molecules - note this test fails if implicit hydrogens are // included. generally MACCS and ESTATE can't be used for substructure filter // check those subclasses which check the bits are set IAtomContainer superStructure = bug706786_1(); IAtomContainer subStructure = bug706786_2(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(superStructure); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(subStructure); addImplicitHydrogens(superStructure); addImplicitHydrogens(subStructure); IFingerprinter fingerprinter = getBitFingerprinter(); BitSet superBS = fingerprinter.getBitFingerprint(superStructure).asBitSet(); BitSet subBS = fingerprinter.getBitFingerprint(subStructure).asBitSet(); Assert.assertThat(and(superBS, subBS), is(subBS)); }
/** * Fingerprint not subset. * * @cdk.bug 934819 */ @Test public void testBug934819() throws Exception { // inlined molecules - note this test fails if implicit hydrogens are // included. generally PubCheMFingerprint can't be used for substructure filter IAtomContainer superStructure = bug934819_2(); IAtomContainer subStructure = bug934819_1(); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(superStructure); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(subStructure); addImplicitHydrogens(superStructure); addImplicitHydrogens(subStructure); IFingerprinter fingerprinter = getBitFingerprinter(); BitSet superBS = fingerprinter.getBitFingerprint(superStructure).asBitSet(); BitSet subBS = fingerprinter.getBitFingerprint(subStructure).asBitSet(); Assert.assertThat(and(superBS, subBS), is(subBS)); }
@Test public void testFingerprint() throws Exception { SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance()); IFingerprinter printer = new EStateFingerprinter(); IBitFingerprint bs1 = printer.getBitFingerprint(parser.parseSmiles("C=C-C#N")); IBitFingerprint bs2 = printer.getBitFingerprint(parser.parseSmiles("C=CCC(O)CC#N")); Assert.assertEquals(79, printer.getSize()); Assert.assertTrue(bs1.get(7)); Assert.assertTrue(bs1.get(10)); Assert.assertTrue(FingerprinterTool.isSubset(bs2.asBitSet(), bs1.asBitSet())); }
/** * @cdk.bug 2871303 * * While this test fails, Daylight says that the * SMARTS pattern used for vinylogous ester should * match benzaldehyde twice. So according to the * supplied definition this answer is actually correct. */ @Ignore("the SMARTS pattern vinylogous ester is not strict enough - we can not fix this") public void testVinylogousEster() throws Exception { String benzaldehyde = "c1ccccc1C=O"; IFingerprinter fprinter = new SubstructureFingerprinter(); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IBitFingerprint fp = fprinter.getBitFingerprint(sp.parseSmiles(benzaldehyde)); Assert.assertFalse("Bit 136 (vinylogous ester) is set to true", fp.get(136)); }
@Test public void testfp2() throws Exception { IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = parser.parseSmiles("CC(N)CCCN"); IAtomContainer mol2 = parser.parseSmiles("CC(N)CCC"); IAtomContainer mol3 = parser.parseSmiles("CCCC"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol3); Aromaticity.cdkLegacy().apply(mol1); Aromaticity.cdkLegacy().apply(mol2); Aromaticity.cdkLegacy().apply(mol3); BitSet bs1 = printer.getBitFingerprint(mol1).asBitSet(); BitSet bs2 = printer.getBitFingerprint(mol2).asBitSet(); BitSet bs3 = printer.getBitFingerprint(mol3).asBitSet(); Assert.assertTrue(FingerprinterTool.isSubset(bs1, bs2)); Assert.assertTrue(FingerprinterTool.isSubset(bs2, bs3)); }
@Test public void testUserFunctionalGroups() throws Exception { String[] smarts = {"c1ccccc1", "[CX4H3][#6]", "[CX2]#[CX2]"}; IFingerprinter printer = new SubstructureFingerprinter(smarts); Assert.assertEquals(3, printer.getSize()); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = sp.parseSmiles("c1ccccc1CCC"); IBitFingerprint fp = printer.getBitFingerprint(mol1); Assert.assertNotNull(fp); Assert.assertTrue(fp.get(0)); Assert.assertTrue(fp.get(1)); Assert.assertFalse(fp.get(2)); mol1 = sp.parseSmiles("C=C=C"); fp = printer.getBitFingerprint(mol1); Assert.assertNotNull(fp); Assert.assertFalse(fp.get(0)); Assert.assertFalse(fp.get(1)); Assert.assertFalse(fp.get(2)); }
@Test public void testFingerprint() throws Exception { IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = parser.parseSmiles("c1ccccc1CCc1ccccc1"); IAtomContainer mol2 = parser.parseSmiles("c1ccccc1CC"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2); adder.addImplicitHydrogens(mol1); adder.addImplicitHydrogens(mol2); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol1); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol2); Aromaticity.cdkLegacy().apply(mol1); Aromaticity.cdkLegacy().apply(mol2); BitSet bs1 = printer.getBitFingerprint(mol1).asBitSet(); BitSet bs2 = printer.getBitFingerprint(mol2).asBitSet(); Assert.assertEquals(881, printer.getSize()); Assert.assertFalse("c1ccccc1CC was detected as a subset of c1ccccc1CCc1ccccc1", FingerprinterTool.isSubset(bs1, bs2)); }
@Test public void testBenzene() throws CDKException { IAtomContainer mol = parser.parseSmiles("c1ccccc1"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
@Test public void testFunctionalGroupsBinary() throws Exception { IFingerprinter printer = new SubstructureFingerprinter(); Assert.assertEquals(307, printer.getSize()); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = sp.parseSmiles("c1ccccc1CCC"); IBitFingerprint fp = printer.getBitFingerprint(mol1); Assert.assertNotNull(fp); Assert.assertTrue(fp.get(273)); Assert.assertTrue(fp.get(0)); Assert.assertTrue(fp.get(1)); Assert.assertFalse(fp.get(100)); }
/** * Test case for Pubchem CID 25181289. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C14H10Cl3N3O3/c1-6(7-2-4-8(21)5-3-7)19-20-11-9(15)12(14(22)23)18-13(17)10(11)16/h2-5,19,21H,1H2,(H,18,20)(H,22,23) */ @Test public void testCID25181289() throws CDKException { IAtomContainer mol = parser.parseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADccBzMAAGAAAAAAAAAAAAAAAAAAAAAAA8QAAAAAAAAAABwAAAHgIYCAAADA6BniAwzpJqEgCoAyTyTASChCAnJiIYumGmTtgKJnLD1/PEdQhkwBHY3Qe82AAOIAAAAAAAAABAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
/** * Test case for Pubchem CID 25181308. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C13H24O10S/c1-20-12-8(18)6(16)10(4(2-14)21-12)23-13-9(19)7(17)11(24)5(3-15)22-13/h4-19,24H,2-3H2,1H3/t4-,5-,6-,7-,8-,9-,10-,11-,12-,13+/m1/s1 */ @Test public void testCID2518130() throws CDKException { IAtomContainer mol = parser.parseSmiles("COC1C(C(C(C(O1)CO)OC2C(C(C(C(O2)CO)S)O)O)O)O"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceBwPABAAAAAAAAAAAAAAAAAAAAAAAAkSAAAAAAAAAAAAAAAGgQACAAACBS0wAOCCAAABgQAAAAAAAAAAAAAAAAAAAAAAAAREAIAAAAiQAAFAAAHAAHAYAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
/** * Test case for Pubchem CID 5934166. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C32H26N/c1-5-13-26(14-6-1)21-22-31-23-30(28-17-9-3-10-18-28)24-32(29-19-11-4-12-20-29)33(31)25-27-15-7-2-8-16-27/h1-24H,25H2/q+1/b22-21+ */ @Test public void testCID5934166() throws CDKException { IAtomContainer mol = parser.parseSmiles("C1=CC=C(C=C1)C[N+]2=C(C=C(C=C2C=CC3=CC=CC=C3)C4=CC=CC=C4)C5=CC=CC=C5"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceB+AAAAAAAAAAAAAAAAAAAAAAAAAAA8YMGCAAAAAAAB1AAAHAAAAAAADAjBHgQwgJMMEACgAyRiRACCgCAhAiAI2CA4ZJgIIOLAkZGEIAhggADIyAcQgMAOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }