/** * For a given molecule, determines its fingerprints and uses them to calculate a Bayesian prediction. Note that this * value is unscaled, and so it only has relative meaning within the confines of the model, i.e. higher is more likely to * be active. * * @param mol molecular structure which cannot be blank or null * @return predictor value */ public double predict(IAtomContainer mol) throws CDKException { if (mol == null || mol.getAtomCount() == 0) throw new CDKException("Molecule cannot be blank or null."); CircularFingerprinter circ = new CircularFingerprinter(classType); circ.setPerceiveStereo(optPerceiveStereo); circ.calculate(mol); // gather all of the (folded) fingerprints (eliminating duplicates) final int AND_BITS = folding - 1; // e.g. 1024/0x400 -> 1023/0x3FF: chop off higher order bits Set<Integer> hashset = new HashSet<Integer>(); for (int n = circ.getFPCount() - 1; n >= 0; n--) { int code = circ.getFP(n).hashCode; if (folding > 0) code &= AND_BITS; hashset.add(code); } // sums the corresponding contributor for each hash code generated from the molecule; note that if the // molecule generates hash codes not originally in the model, they are discarded (i.e. 0 contribution) double val = 0; for (int h : hashset) { Double c = contribs.get(h); if (c != null) val += c; } return val; }
private synchronized int[] getCircularFP(IAtomContainer mol) throws CDKException { CircularFingerprinter circularFingerprinter = new CircularFingerprinter(6, 1024); circularFingerprinter.setPerceiveStereo(true); IBitFingerprint bitFingerprint = circularFingerprinter.getBitFingerprint(mol); return bitFingerprint.getSetbits(); }
/** * Size of the fingerprint * * @return */ public static int getFingerprinterSize() { return new CircularFingerprinter(CLASS_ECFP4).getSize(); }
@Deprecated public CircularFingerprinter.FP[] getRelevantFingerprintDetails() { final ArrayList<CircularFingerprinter.FP> list = new ArrayList<>(); for (int k=0; k < circularFingerprinter.getFPCount(); ++k) { final CircularFingerprinter.FP fp = circularFingerprinter.getFP(k); if (HASH_SET.containsKey(fp.hashCode)) { list.add(fp); } } Collections.sort(list, new Comparator<CircularFingerprinter.FP>() { @Override public int compare(CircularFingerprinter.FP o1, CircularFingerprinter.FP o2) { return Integer.compare(o1.hashCode, o2.hashCode); } }); return list.toArray(new CircularFingerprinter.FP[list.size()]); }
@Test public void testGetBitFingerprint() throws Exception { assert (trivialMol != null); CircularFingerprinter circ = new CircularFingerprinter(); IBitFingerprint result = circ.getBitFingerprint(trivialMol); BitSet wantBits = new BitSet(), gotBits = result.asBitSet(); final int[] REQUIRE_BITS = {19, 152, 293, 340, 439, 480, 507, 726, 762, 947, 993}; for (int b : REQUIRE_BITS) wantBits.set(b); if (!wantBits.equals(gotBits)) throw new CDKException("Got " + gotBits + ", wanted " + wantBits); }
IAtomContainer mol3 = mdlr.read(bldr.newAtomContainer()); CircularFingerprinter fpr = new CircularFingerprinter(); fpr.setPerceiveStereo(true); Assert.assertThat(fpr.getFingerprint(mol1), is(fpr.getFingerprint(mol2))); Assert.assertThat(fpr.getFingerprint(mol2), is(not(fpr.getFingerprint(mol3)))); fpr.setPerceiveStereo(false); Assert.assertThat(fpr.getFingerprint(mol1), is(fpr.getFingerprint(mol2))); Assert.assertThat(fpr.getFingerprint(mol2), is(fpr.getFingerprint(mol3)));
int a1 = mol.indexOf(bond.getBegin()), a2 = mol.indexOf(bond.getEnd()); if (amask[a1] && amask[a2]) { atomAdj[a1] = appendInteger(atomAdj[a1], a2); bondAdj[a1] = appendInteger(bondAdj[a1], n); atomAdj[a2] = appendInteger(atomAdj[a2], a1); bondAdj[a2] = appendInteger(bondAdj[a2], n); if (bond.getOrder() == IBond.Order.SINGLE) bondOrder[n] = 1; markRingBlocks(); if (ringBlock[n] > 0) { path[0] = n; recursiveRingFind(path, 1, rsz, ringBlock[n], rings); detectStrictAromaticity(); tetra[n] = rubricTetrahedral(n); } else { rubricTetrahedralsCdk();
@Test public void testGetCountFingerprint() throws Exception { assert (trivialMol != null); CircularFingerprinter circ = new CircularFingerprinter(); ICountFingerprint result = circ.getCountFingerprint(trivialMol); final int[] ANSWER_KEY = {-414937772, 1, -1027418143, 1, 1627608083, 1, -868007456, 1, -1006701866, 1, -1059145289, 1, -801752141, 1, 790592664, 1, -289109509, 1, -1650154758, 1, 1286833445, 1}; int wantBits = ANSWER_KEY.length >> 1; boolean fail = result.numOfPopulatedbins() != wantBits; for (int n = 0; !fail && n < result.numOfPopulatedbins(); n++) { int gotHash = result.getHash(n), gotCount = result.getCount(n); boolean found = false; for (int i = 0; i < wantBits; i++) { int wantHash = ANSWER_KEY[i * 2], wantCount = ANSWER_KEY[i * 2 + 1]; if (gotHash == wantHash) { found = true; if (gotCount != wantCount) throw new CDKException("For hash " + gotHash + " got count " + gotCount + " but wanted " + wantCount); } } if (!found) { fail = true; break; } } if (fail) throw new CDKException("Hash values do not match."); }
@Test public void testVersion() { CircularFingerprinter fpr = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP4); String expected = "CDK-CircularFingerprinter/" + CDK.getVersion() + " classType=ECFP4 perceiveStereochemistry=false"; Assert.assertThat(fpr.getVersionDescription(), CoreMatchers.is(expected)); }
/** * Calculates the circular fingerprint for the given {@link IAtomContainer}, and <b>folds</b> the result into a single bitset * (see getSize()). * * @param mol IAtomContainer for which the fingerprint should be calculated. * @return the fingerprint */ @Override public IBitFingerprint getBitFingerprint(IAtomContainer mol) throws CDKException { calculate(mol); final BitSet bits = new BitSet(length); for (int n = 0; n < fplist.size(); n++) { int i = fplist.get(n).hashCode; long b = i >= 0 ? i : ((i & 0x7FFFFFFF) | (1L << 31)); bits.set((int) (b % length)); } return new BitSetFingerprint(bits); }
/** * */ public FingerprintGenerator() { fingerprinter = new CircularFingerprinter(CLASS_ECFP4); }
@Override public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException { final BitSetFingerprint bf = new BitSetFingerprint(getSize()); final ICountFingerprint icfp = circularFingerprinter.getCountFingerprint(container); for (int k=0, n = icfp.numOfPopulatedbins(); k < n; ++k) { final int index = HASH_SET.get(icfp.getHash(k)); if (index >= 0) { if (icfp.getCount(k)>0) bf.set(index, true); } } return bf; }
/** * * @param mol * @return * @throws CDKException */ @Override public synchronized BitSet getFingerprint(IAtomContainer mol) throws CDKException { if (!has2DCoordinates(mol)) { StructureDiagramGenerator structureDiagramGenerator = new StructureDiagramGenerator(); structureDiagramGenerator.setMolecule(mol, true); if (isConnected(mol)) { structureDiagramGenerator.generateCoordinates(); mol = structureDiagramGenerator.getMolecule(); } else { LOGGER.debug("Disconnected components needs to be layout separately"); } } return fingerprinter.getBitFingerprint(mol).asBitSet(); }
private boolean determineDonor(int aidx) { // must have a hydrogen atom, either implicit or explicit if (hcount[aidx] == 0) return false; IAtom atom = mol.getAtom(aidx); final String el = atom.getSymbol(); if (el.equals("N") || el.equals("O")) { // tetrazoles do not donate if (tetrazole[aidx]) return false; // see if any of the neighbours is an oxide of some sort; this is grounds for disqualification, with the exception // of amides, which are consider nonacidic for (int n = 0; n < atomAdj[aidx].length; n++) if (isOxide[atomAdj[aidx][n]]) { if (!mol.getAtom(atomAdj[aidx][n]).getSymbol().equals("C") || !el.equals("N")) return false; } return true; } else if (el.equals("S")) { // any kind of adjacent double bond disqualifies -SH for (int n = 0; n < atomAdj[aidx].length; n++) if (hasDouble[atomAdj[aidx][n]]) return false; return true; } else if (el.equals("C")) { // terminal alkynes qualify for (int n = 0; n < bondAdj[aidx].length; n++) if (bondOrderBioType(bondAdj[aidx][n]) == 3) return true; return false; } return false; }
adj = appendInteger(adj, -1); xp[3] = 0; yp[3] = 0;
@Test public void protonsDontCauseNPE() throws Exception { IAtomContainer proton = new AtomContainer(1, 0, 0, 0); proton.addAtom(atom("H", +1, 0)); CircularFingerprinter circ = new CircularFingerprinter(CircularFingerprinter.CLASS_FCFP2); assertThat(circ.getBitFingerprint(proton).cardinality(), is(0)); }
public CircularFingerprinter.FP[] getFingerprintDetails() { final CircularFingerprinter.FP[] ary = new CircularFingerprinter.FP[HASH_SET.size()]; for (int k=0; k < circularFingerprinter.getFPCount(); ++k) { final CircularFingerprinter.FP fp = circularFingerprinter.getFP(k); if (HASH_SET.containsKey(fp.hashCode)) { ary[HASH_SET.get(fp.hashCode)] = fp; } } return ary; }
calculate(mol);
private boolean determineNegative(int aidx) { IAtom atom = mol.getAtom(aidx); // consider formal ionic charge first final int chg = atom.getFormalCharge(); if (chg > 0) return false; if (chg < 0) { for (int n = 0; n < atomAdj[aidx].length; n++) if (mol.getAtom(atomAdj[aidx][n]).getFormalCharge() > 0) return false; return true; } final String el = atom.getSymbol(); // tetrazole nitrogens get negative charges if (tetrazole[aidx] && el.equals("N")) return true; // centres with an oxide and an -OH group qualify as negative if (isOxide[aidx] && (el.equals("C") || el.equals("S") || el.equals("P"))) { for (int n = 0; n < atomAdj[aidx].length; n++) if (bondOrderBioType(bondAdj[aidx][n]) == 1) { final int a = atomAdj[aidx][n]; if (mol.getAtom(a).getSymbol().equals("O") && hcount[a] > 0) return true; } } return false; }
if (mol == null || mol.getAtomCount() == 0) throw new CDKException("Molecule cannot be blank or null."); CircularFingerprinter circ = new CircularFingerprinter(classType); circ.setPerceiveStereo(optPerceiveStereo); circ.calculate(mol); for (int n = circ.getFPCount() - 1; n >= 0; n--) { int code = circ.getFP(n).hashCode; if (folding > 0) code &= AND_BITS; hashset.add(code);