/** * Calculates Tanimoto distance for two count fingerprints using method 1. * * The feature/count type fingerprints may be of different length. * Uses Tanimoto method from {@cdk.cite Steffen09}. * * @param fp1 count fingerprint 1 * @param fp2 count fingerprint 2 * @return a Tanimoto distance */ public static double method1(ICountFingerprint fp1, ICountFingerprint fp2) { long xy = 0, x = 0, y = 0; for (int i = 0; i < fp1.numOfPopulatedbins(); i++) { int hash = fp1.getHash(i); for (int j = 0; j < fp2.numOfPopulatedbins(); j++) { if (hash == fp2.getHash(j)) { xy += fp1.getCount(i) * fp2.getCount(j); } } x += fp1.getCount(i) * fp1.getCount(i); } for (int j = 0; j < fp2.numOfPopulatedbins(); j++) { y += fp2.getCount(j) * fp2.getCount(j); } return ((double) xy / (x + y - xy)); }
@Test public void makeCountFingerprint() { Map<String,Integer> features = new HashMap<String,Integer>(); features.put("CCO", 1); features.put("CC", 2); features.put("C", 2); ICountFingerprint fp = FingerprinterTool.makeCountFingerprint(features); assertThat(fp.numOfPopulatedbins(), is(3)); assertThat(fp.getCountForHash("CCO".hashCode()), is(1)); assertThat(fp.getCountForHash("CC".hashCode()), is(2)); assertThat(fp.getCountForHash("C".hashCode()), is(2)); } }
@Test public void testGetCountFingerprint() throws Exception { IFingerprinter printer = new AtomPairs2DFingerprinter(); IAtomContainer mol1 = parser.parseSmiles("cccccccccc"); ICountFingerprint icfp = printer.getCountFingerprint(mol1); Assert.assertEquals(9, icfp.numOfPopulatedbins()); }
@Test @Override public void testGetCountFingerprint() throws Exception { LingoFingerprinter fpr = new LingoFingerprinter(4); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol = sp.parseSmiles("Oc1ccccc1"); ICountFingerprint fp = fpr.getCountFingerprint(mol); assertThat(fp.getCountForHash("cccc".hashCode()), is(2)); assertThat(fp.getCountForHash("Oc0c".hashCode()), is(1)); assertThat(fp.getCountForHash("c0cc".hashCode()), is(1)); assertThat(fp.getCountForHash("0ccc".hashCode()), is(1)); assertThat(fp.getCountForHash("ccc0".hashCode()), is(1)); }
@Test public void testComparingBitFingerprintAndCountBehavingAsBit() throws Exception { IAtomContainer mol1 = TestMoleculeFactory.make123Triazole(); IAtomContainer mol2 = TestMoleculeFactory.makeImidazole(); SignatureFingerprinter fingerprinter = new SignatureFingerprinter(1); ICountFingerprint countFp1 = fingerprinter.getCountFingerprint(mol1); ICountFingerprint countFp2 = fingerprinter.getCountFingerprint(mol2); countFp1.setBehaveAsBitFingerprint(true); countFp2.setBehaveAsBitFingerprint(true); IBitFingerprint bitFp1 = fingerprinter.getBitFingerprint(mol1); IBitFingerprint bitFp2 = fingerprinter.getBitFingerprint(mol2); double bitTanimoto = Tanimoto.calculate(bitFp1, bitFp2); double countTanimoto1 = Tanimoto.method1(countFp1, countFp2); double countTanimoto2 = Tanimoto.method2(countFp1, countFp2); Assert.assertEquals(countTanimoto1, countTanimoto2, 0.001); Assert.assertEquals(bitTanimoto, countTanimoto1, 0.001); } }
@Test @Override public void testGetCountFingerprint() throws Exception { SignatureFingerprinter fingerprinter = new SignatureFingerprinter(0); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol = sp.parseSmiles("O(NC)CC"); ICountFingerprint bitFP = fingerprinter.getCountFingerprint(mol); Assert.assertNotNull(bitFP); Assert.assertNotSame(0, bitFP.size()); } }
cfp = printer.getCountFingerprint(mol); Assert.assertEquals(cfp.getCountForHash(46), 2); Assert.assertEquals(cfp.getCountForHash(27), 1); Assert.assertEquals(cfp.getCountForHash(59), 2); Assert.assertEquals(cfp.getCountForHash(49), 1); Assert.assertEquals(cfp.getCountForHash(111), 1); Assert.assertEquals(cfp.getCountForHash(129), 3); Assert.assertEquals(cfp.getCountForHash(115), 2); Assert.assertEquals(cfp.getCountForHash(120), 3); Assert.assertEquals(cfp.getCountForHash(41), 3); Assert.assertEquals(cfp.getCountForHash(93), 0); Assert.assertEquals(cfp.getCountForHash(91), 0); Assert.assertEquals(cfp.getCountForHash(24), 0); cfp = printer.getCountFingerprint(mol); Assert.assertEquals(cfp.getCountForHash(15), 3); Assert.assertEquals(cfp.getCountForHash(135), 3); Assert.assertEquals(cfp.getCountForHash(139), 4); Assert.assertEquals(cfp.getCountForHash(93), 3); Assert.assertEquals(cfp.getCountForHash(73), 6); Assert.assertEquals(cfp.getCountForHash(91), 0);
while (i < fp1.numOfPopulatedbins() || j < fp2.numOfPopulatedbins()) { Integer hash1 = i < fp1.numOfPopulatedbins() ? fp1.getHash(i) : null; Integer hash2 = j < fp2.numOfPopulatedbins() ? fp2.getHash(j) : null; Integer count1 = i < fp1.numOfPopulatedbins() ? fp1.getCount(i) : null; Integer count2 = j < fp2.numOfPopulatedbins() ? fp2.getCount(j) : null;
Assert.assertEquals(cfp.getCountForHash(128), 2); // 6-ring Assert.assertEquals(cfp.getCountForHash(111), 2); // aromaticity Assert.assertEquals(cfp.getCountForHash(7), 0); // 7-ring Assert.assertEquals(cfp.getCountForHash(82), 0); // 5-ring Assert.assertEquals(cfp.getCountForHash(128), 2); // 6-ring Assert.assertEquals(cfp.getCountForHash(111), 0); // aromaticity Assert.assertEquals(cfp.getCountForHash(7), 0); // 7-ring Assert.assertEquals(cfp.getCountForHash(82), 0); // 5-ring Assert.assertEquals(cfp.getCountForHash(128), 1); // 6-ring Assert.assertEquals(cfp.getCountForHash(111), 1); // aromaticity Assert.assertEquals(cfp.getCountForHash(10), 1); // 3-ring Assert.assertEquals(cfp.getCountForHash(1), 1); // 4-ring Assert.assertEquals(cfp.getCountForHash(7), 0); // 7-ring Assert.assertEquals(cfp.getCountForHash(82), 0); // 5-ring Assert.assertEquals(cfp.getCountForHash(128), 1); // 6-ring Assert.assertEquals(cfp.getCountForHash(111), 1); // aromaticity Assert.assertEquals(cfp.getCountForHash(10), 5); // 3-ring Assert.assertEquals(cfp.getCountForHash(1), 1); // 4-ring Assert.assertEquals(cfp.getCountForHash(7), 0); // 7-ring Assert.assertEquals(cfp.getCountForHash(82), 0); // 5-ring
@Override public void merge(ICountFingerprint fp) { Map<Integer, Integer> newFp = new HashMap<>(); for (int i = 0; i < hitHashes.length; i++) { newFp.put(hitHashes[i], numOfHits[i]); } for (int i = 0; i < fp.numOfPopulatedbins(); i++) { Integer count = newFp.get(fp.getHash(i)); if (count == null) { count = 0; } newFp.put(fp.getHash(i), count + fp.getCount(i)); } List<Integer> keys = new ArrayList<>(newFp.keySet()); Collections.sort(keys); hitHashes = new int[keys.size()]; numOfHits = new int[keys.size()]; int i = 0; for (Integer key : keys) { hitHashes[i] = key; numOfHits[i++] = newFp.get(key); } }
@Override public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException { final BitSetFingerprint bf = new BitSetFingerprint(getSize()); final ICountFingerprint icfp = circularFingerprinter.getCountFingerprint(container); for (int k=0, n = icfp.numOfPopulatedbins(); k < n; ++k) { final int index = HASH_SET.get(icfp.getHash(k)); if (index >= 0) { if (icfp.getCount(k)>0) bf.set(index, true); } } return bf; }
@Test public void testGetCountFingerprint() throws Exception { assert (trivialMol != null); CircularFingerprinter circ = new CircularFingerprinter(); ICountFingerprint result = circ.getCountFingerprint(trivialMol); final int[] ANSWER_KEY = {-414937772, 1, -1027418143, 1, 1627608083, 1, -868007456, 1, -1006701866, 1, -1059145289, 1, -801752141, 1, 790592664, 1, -289109509, 1, -1650154758, 1, 1286833445, 1}; int wantBits = ANSWER_KEY.length >> 1; boolean fail = result.numOfPopulatedbins() != wantBits; for (int n = 0; !fail && n < result.numOfPopulatedbins(); n++) { int gotHash = result.getHash(n), gotCount = result.getCount(n); boolean found = false; for (int i = 0; i < wantBits; i++) { int wantHash = ANSWER_KEY[i * 2], wantCount = ANSWER_KEY[i * 2 + 1]; if (gotHash == wantHash) { found = true; if (gotCount != wantCount) throw new CDKException("For hash " + gotHash + " got count " + gotCount + " but wanted " + wantCount); } } if (!found) { fail = true; break; } } if (fail) throw new CDKException("Hash values do not match."); }