From 840e210b3ae7b63f10036f4f50f83e0b24efc559 Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Fri, 28 Apr 2023 18:07:38 +0200 Subject: [PATCH] fix: CanonizerUtil with NOSTEREO yielded to a wrong result (#165) --- __tests__/canonizer.test.js | 62 +++++++ openchemlib | 2 +- scripts/openchemlib/classes.js | 5 - .../research/gwt/chemlib/com/Test.java | 35 ++++ .../com/actelion/research/chem/Canonizer.java | 135 +------------- .../actelion/research/chem/CanonizerUtil.java | 2 +- .../research/chem/ExtendedMolecule.java | 169 +++++++++++++++++- .../chem/conf/gen/ConformerGenerator.java | 36 ++-- .../chem/conf/gen/RigidFragmentProvider.java | 12 ++ .../chem/conf/so/ConformationRule.java | 2 +- .../conf/so/ConformationSelfOrganizer.java | 15 +- 11 files changed, 315 insertions(+), 160 deletions(-) create mode 100644 __tests__/canonizer.test.js create mode 100644 src/com/actelion/research/gwt/chemlib/com/Test.java diff --git a/__tests__/canonizer.test.js b/__tests__/canonizer.test.js new file mode 100644 index 00000000..c7045d41 --- /dev/null +++ b/__tests__/canonizer.test.js @@ -0,0 +1,62 @@ +'use strict'; + +const OCL = require('../core'); + +test('canonizer chiral', () => { + const molecule = OCL.Molecule.fromSmiles('C[C@H](Cl)CC'); + const idCode = molecule.getIDCode(); + + expect(idCode).toBe('gJPHADILuTb@'); + expect(OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NORMAL)).toBe( + idCode, + ); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NOSTEREO), + ).toBe('gJPHADILuP@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.BACKBONE), + ).toBe('gJPHADILuP@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.TAUTOMER), + ).toBe('gJPHADILuTb@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NOSTEREO_TAUTOMER), + ).toBe('gJPHADILuP@'); + + molecule.stripStereoInformation(); + expect(molecule.getIDCode()).toBe('gJPHADILuP@'); +}); + +test('canonizer tautomer', () => { + const molecule = OCL.Molecule.fromSmiles('CC=C(O)CC'); + + const idCode = molecule.getIDCode(); + + expect(idCode).toBe('gGQ@@drsT@@'); + expect(OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NORMAL)).toBe( + idCode, + ); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NOSTEREO), + ).toBe('gGQ@@drsT@@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.BACKBONE), + ).toBe('gGQ@@druT@@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.TAUTOMER), + ).toBe('gGQ@@druTAkabXVOtfBicxX~F@'); + + expect( + OCL.CanonizerUtil.getIDCode(molecule, OCL.CanonizerUtil.NOSTEREO_TAUTOMER), + ).toBe('gGQ@@druTAkabXVOtfBicxX~F@'); + + molecule.stripStereoInformation(); + expect(molecule.getIDCode()).toBe('gGQ@@drsT@@'); +}); diff --git a/openchemlib b/openchemlib index 1534a1b5..f7678684 160000 --- a/openchemlib +++ b/openchemlib @@ -1 +1 @@ -Subproject commit 1534a1b5d69ca091a0b6d7351e7dbe6b1d919c6b +Subproject commit f767868487998719be4dd09d52df499cd3d60721 diff --git a/scripts/openchemlib/classes.js b/scripts/openchemlib/classes.js index f19705c8..e8f8049a 100644 --- a/scripts/openchemlib/classes.js +++ b/scripts/openchemlib/classes.js @@ -515,11 +515,6 @@ function changeBaseConformer(code) { 'mFrequency[bondIndex] = rotatableBond[bondIndex].getDefaultFrequencies().clone();', 'short[] frequencies = rotatableBond[bondIndex].getDefaultFrequencies();\n mFrequency[bondIndex] = Arrays.copyOf(frequencies, frequencies.length);', ); - code = replaceChecked( - code, - '\n mTorsion[bondIndex] = rotatableBond[bondIndex].getDefaultTorsions().clone();', - '', - ); return code; } diff --git a/src/com/actelion/research/gwt/chemlib/com/Test.java b/src/com/actelion/research/gwt/chemlib/com/Test.java new file mode 100644 index 00000000..e54c203b --- /dev/null +++ b/src/com/actelion/research/gwt/chemlib/com/Test.java @@ -0,0 +1,35 @@ +// to run this code: bash buildOpenChemLib && java -classpath build com.Test + +package com; + +import java.io.IOException; +import java.util.TreeMap; + +import com.actelion.research.chem.Canonizer; +import com.actelion.research.chem.CanonizerUtil; +import com.actelion.research.chem.SmilesParser; +import com.actelion.research.chem.StereoMolecule; +import com.actelion.research.chem.contrib.HydrogenHandler; +import com.actelion.research.chem.contrib.DiastereotopicAtomID; +import com.actelion.research.chem.io.SDFileParser; +import java.io.File; + + +public class Test { + + public static void main(String[] args) throws Exception { + + SmilesParser sp = new SmilesParser(); + StereoMolecule mol = new StereoMolecule(); + sp.parse(mol, "CC"); + + System.out.println(mol.getIDCode()); + mol.stripStereoInformation(); + System.out.println(mol.getIDCode()); + System.out.println(CanonizerUtil.getIDCode(mol, CanonizerUtil.IDCODE_TYPE.NORMAL, false)); + System.out.println(CanonizerUtil.getIDCode(mol, CanonizerUtil.IDCODE_TYPE.NOSTEREO, false)); + System.out + .println(CanonizerUtil.getIDCode(mol, CanonizerUtil.IDCODE_TYPE.NOSTEREO_TAUTOMER, false)); + + } +} diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java index 84c675e5..74083fd8 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java @@ -284,141 +284,10 @@ private void canFindNitrogenQualifyingForParity() { continue; } - if (mMol.getAtomRingBondCount(atom) != 3) - continue; - - // For any neutral nitrogen with three ring bonds - // we find the smallest ring. If this is not larger than 7 members - // then we find that connBond, which does not belong to that ring. - // The we find the bridge size (atom count) to where it touches the smallest ring. - // We also find the path length from the touch point on the smallest ring back - // to the nitrogen atom. - int smallRingSize = mMol.getAtomRingSize(atom); - if (smallRingSize > 7) - continue; - - RingCollection ringSet = mMol.getRingSet(); - int smallRingNo = 0; - while (smallRingNo= RingCollection.MAX_SMALL_RING_COUNT - && smallRingNo == ringSet.getSize()) - continue; // very rare case, but found with wrongly highly bridged CSD entry JORFAZ - - int firstBridgeAtom = -1; - int firstBridgeBond = -1; - for (int i=0; i<3; i++) { - int connBond = mMol.getConnBond(atom, i); - if (!ringSet.isBondMember(smallRingNo, connBond)) { - firstBridgeAtom = mMol.getConnAtom(atom, i); - firstBridgeBond = connBond; - break; - } - } - - boolean[] neglectBond = new boolean[mMol.getBonds()]; - neglectBond[firstBridgeBond] = true; - int[] pathAtom = new int[11]; - int pathLength = mMol.getPath(pathAtom, firstBridgeAtom, atom, 10, neglectBond); - if (pathLength == -1) - continue; - - int bridgeAtomCount = 1; - while (!ringSet.isAtomMember(smallRingNo, pathAtom[bridgeAtomCount])) - bridgeAtomCount++; - - int bondCountToBridgeHead = pathLength - bridgeAtomCount; - - int bridgeHead = pathAtom[bridgeAtomCount]; - - if (smallRingSize == 6 && bondCountToBridgeHead == 2 && bridgeAtomCount == 3) { - if (mMol.getAtomRingBondCount(pathAtom[1]) >= 3) { - boolean isAdamantane = false; - int[] ringAtom = ringSet.getRingAtoms(smallRingNo); - for (int i=0; i<6; i++) { - if (atom == ringAtom[i]) { - int potentialOtherBridgeHeadIndex = ringSet.validateMemberIndex(smallRingNo, - (bridgeHead == ringAtom[ringSet.validateMemberIndex(smallRingNo, i+2)]) ? i-2 : i+2); - int potentialOtherBridgeHead = ringAtom[potentialOtherBridgeHeadIndex]; - if (mMol.getAtomRingBondCount(potentialOtherBridgeHead) >= 3 - && mMol.getPathLength(pathAtom[1], potentialOtherBridgeHead, 2, null) == 2) - isAdamantane = true; - break; - } - } - if (isAdamantane) { - mNitrogenQualifiesForParity[atom] = true; - continue; - } - } - } - - boolean bridgeHeadIsFlat = (mMol.getAtomPi(bridgeHead) == 1 - || mMol.isAromaticAtom(bridgeHead) - || mMol.isFlatNitrogen(bridgeHead)); - boolean bridgeHeadMayInvert = !bridgeHeadIsFlat - && mMol.getAtomicNo(bridgeHead) == 7 - && mMol.getAtomCharge(bridgeHead) != 1; - - if (bondCountToBridgeHead == 1) { - if (!bridgeHeadIsFlat && !bridgeHeadMayInvert && smallRingSize <= 4 && bridgeAtomCount <= 3) - mNitrogenQualifiesForParity[atom] = true; + if (mMol.isPyramidalBridgeHead(atom)) { + mNitrogenQualifiesForParity[atom] = true; continue; } - - switch (smallRingSize) { - // case 3 is fully handled - case 4: // must be bondCountToBridgeHead == 2 - if (!bridgeHeadIsFlat && !bridgeHeadMayInvert) { - if (bridgeAtomCount <= 4) - mNitrogenQualifiesForParity[atom] = true; - } - break; - case 5: // must be bondCountToBridgeHead == 2 - if (bridgeHeadMayInvert) { - if (bridgeAtomCount <= 3) - mNitrogenQualifiesForParity[atom] = true; - } - else if (!bridgeHeadIsFlat) { - if (bridgeAtomCount <= 4) - mNitrogenQualifiesForParity[atom] = true; - } - break; - case 6: - if (bondCountToBridgeHead == 2) { - if (bridgeHeadIsFlat) { - if (bridgeAtomCount <= 4) - mNitrogenQualifiesForParity[atom] = true; - } - else if (!bridgeHeadMayInvert) { - if (bridgeAtomCount <= 3) - mNitrogenQualifiesForParity[atom] = true; - } - } - else if (bondCountToBridgeHead == 3) { - if (bridgeHeadIsFlat) { - if (bridgeAtomCount <= 6) - mNitrogenQualifiesForParity[atom] = true; - } - else { - if (bridgeAtomCount <= 4) - mNitrogenQualifiesForParity[atom] = true; - } - } - break; - case 7: - if (bondCountToBridgeHead == 3) { - if (bridgeAtomCount <= 3) - mNitrogenQualifiesForParity[atom] = true; - } - break; - } } } } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/CanonizerUtil.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/CanonizerUtil.java index d4d0e05a..8a072264 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/CanonizerUtil.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/CanonizerUtil.java @@ -1 +1 @@ -/* * Copyright (c) 1997 - 2016 * Actelion Pharmaceuticals Ltd. * Gewerbestrasse 16 * CH-4123 Allschwil, Switzerland * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of the the copyright holder nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ package com.actelion.research.chem; public class CanonizerUtil { public enum IDCODE_TYPE { NORMAL, NOSTEREO, BACKBONE, TAUTOMER, NOSTEREO_TAUTOMER } /** * Generates an IDCODE for a defined and simplified state of a given molecule. * Optionally the simplification and idcode generation are done for the largest * unconnected fragment rather than considering all molecule fragments. * Allowed simplification types are:
* - NORMAL (with complete stereo information)
* - NOSTEREO (without stereo information)
* - TAUTOMER (generic tautomer: tautomer regions marked as [cBondQFSingle | cBondQFDouble] plus atom label encoding count of pi electrons, deuterium and tritium)
* - NOSTEREO_TAUTOMER (combines NOSTEREO and TAUTOMER)
* - BACKBONE (without stereo information; all bonds are changed to single bonds)
* * @param mol source molecule to generate the IDCODE * @param type type of IDCODE requested * @param largestFragmentOnly * @return */ public static String getIDCode(StereoMolecule mol, IDCODE_TYPE type, boolean largestFragmentOnly) { switch (type) { case NORMAL: return getIDCode(mol, largestFragmentOnly); case NOSTEREO: return getIDCodeNoStereo(mol, largestFragmentOnly); case BACKBONE: return getIDCodeBackBone(mol, largestFragmentOnly); case TAUTOMER: return getIDCodeTautomer(mol, largestFragmentOnly); case NOSTEREO_TAUTOMER: return getIDCodeNoStereoTautomer(mol, largestFragmentOnly); default: break; } return null; } public static long getHash(StereoMolecule m, IDCODE_TYPE type, boolean largestFragmentOnly) { String code = getIDCode(m, type, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereo(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereoTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getBackboneHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeBackBone(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } private static String getIDCode(StereoMolecule mol, boolean largestFragmentOnly) { try { if (!largestFragmentOnly) return new Canonizer(mol).getIDCode(); mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCode() " + e); return null; } } private static String getIDCodeNoStereo(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); return new SimpleCanonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereo() " + e); return null; } } private static String getIDCodeTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { if (largestFragmentOnly) { mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeTautomer() " + e); return null; } } private static String getIDCodeNoStereoTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereoTautomer() " + e); return null; } } private static String getIDCodeBackBone(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); int b = mol.getAllBonds(); for (int i = 0; i < b; i++) mol.setBondType(i, Molecule.cBondTypeSingle); return new SimpleCanonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeBackBone() " + e); return null; } } /** * 64 bit hash, derived from numerical recipes * Will move this class later to dd_core. */ public static class StrongHasher { private static final long[] byteTable; private static final long HSTART = 0xBB40E64DA205B064L; private static final long HMULT = 7664345821815920749L; static { byteTable = new long[256]; long h = 0x544B2FBACAAF1684L; for (int i = 0; i < 256; i++) { for (int j = 0; j < 31; j++) { h = (h >>> 7) ^ h; h = (h << 11) ^ h; h = (h >>> 10) ^ h; } byteTable[i] = h; } } public static long hash(String cs) { if (cs == null) return 1L; long h = HSTART; final long hmult = HMULT; final long[] ht = byteTable; for (int i = cs.length()-1; i >= 0; i--) { char ch = cs.charAt(i); h = (h * hmult) ^ ht[ch & 0xff]; h = (h * hmult) ^ ht[(ch >>> 8) & 0xff]; } return h; } } } \ No newline at end of file +/* * Copyright (c) 1997 - 2016 * Actelion Pharmaceuticals Ltd. * Gewerbestrasse 16 * CH-4123 Allschwil, Switzerland * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of the the copyright holder nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ package com.actelion.research.chem; public class CanonizerUtil { public enum IDCODE_TYPE { NORMAL, NOSTEREO, BACKBONE, TAUTOMER, NOSTEREO_TAUTOMER } /** * Generates an IDCODE for a defined and simplified state of a given molecule. * Optionally the simplification and idcode generation are done for the largest * unconnected fragment rather than considering all molecule fragments. * Allowed simplification types are:
* - NORMAL (with complete stereo information)
* - NOSTEREO (without stereo information)
* - TAUTOMER (generic tautomer: tautomer regions marked as [cBondQFSingle | cBondQFDouble] plus atom label encoding count of pi electrons, deuterium and tritium)
* - NOSTEREO_TAUTOMER (combines NOSTEREO and TAUTOMER)
* - BACKBONE (without stereo information; all bonds are changed to single bonds)
* * @param mol source molecule to generate the IDCODE * @param type type of IDCODE requested * @param largestFragmentOnly * @return */ public static String getIDCode(StereoMolecule mol, IDCODE_TYPE type, boolean largestFragmentOnly) { switch (type) { case NORMAL: return getIDCode(mol, largestFragmentOnly); case NOSTEREO: return getIDCodeNoStereo(mol, largestFragmentOnly); case BACKBONE: return getIDCodeBackBone(mol, largestFragmentOnly); case TAUTOMER: return getIDCodeTautomer(mol, largestFragmentOnly); case NOSTEREO_TAUTOMER: return getIDCodeNoStereoTautomer(mol, largestFragmentOnly); default: break; } return null; } public static long getHash(StereoMolecule m, IDCODE_TYPE type, boolean largestFragmentOnly) { String code = getIDCode(m, type, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereo(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereoTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getBackboneHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeBackBone(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } private static String getIDCode(StereoMolecule mol, boolean largestFragmentOnly) { try { if (!largestFragmentOnly) return new Canonizer(mol).getIDCode(); mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCode() " + e); return null; } } private static String getIDCodeNoStereo(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereo() " + e); return null; } } private static String getIDCodeTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { if (largestFragmentOnly) { mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeTautomer() " + e); return null; } } private static String getIDCodeNoStereoTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereoTautomer() " + e); return null; } } private static String getIDCodeBackBone(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); int b = mol.getAllBonds(); for (int i = 0; i < b; i++) mol.setBondType(i, Molecule.cBondTypeSingle); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeBackBone() " + e); return null; } } /** * 64 bit hash, derived from numerical recipes * Will move this class later to dd_core. */ public static class StrongHasher { private static final long[] byteTable; private static final long HSTART = 0xBB40E64DA205B064L; private static final long HMULT = 7664345821815920749L; static { byteTable = new long[256]; long h = 0x544B2FBACAAF1684L; for (int i = 0; i < 256; i++) { for (int j = 0; j < 31; j++) { h = (h >>> 7) ^ h; h = (h << 11) ^ h; h = (h >>> 10) ^ h; } byteTable[i] = h; } } public static long hash(String cs) { if (cs == null) return 1L; long h = HSTART; final long hmult = HMULT; final long[] ht = byteTable; for (int i = cs.length()-1; i >= 0; i--) { char ch = cs.charAt(i); h = (h * hmult) ^ ht[ch & 0xff]; h = (h * hmult) ^ ht[(ch >>> 8) & 0xff]; } return h; } } } \ No newline at end of file diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ExtendedMolecule.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ExtendedMolecule.java index 815f3162..75cb2ca3 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ExtendedMolecule.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/ExtendedMolecule.java @@ -2525,14 +2525,23 @@ public boolean isCentralAlleneAtom(int atom) { * @return */ public boolean isFlatNitrogen(int atom) { - if (mAtomicNo[atom] != 7) + return isFlatNitrogen(atom, true); + } + + private boolean isFlatNitrogen(int atom, boolean checkForPyramidalBridgeHead) { + if (mAtomicNo[atom] != 7 || mConnAtoms[atom] == 4) return false; if (isAromaticAtom(atom) || mPi[atom] != 0 || (mAtomQueryFeatures[atom] & cAtomQFFlatNitrogen) != 0) return true; if (mAtomCharge[atom] == 1) return false; + + for (int i=0; i + * For this, after checking for at least 3 ring bonds, we find the smallest ring that atom is member of. + * If this ring is not larger than 7 members, then we check for all neighbours (1 or 2) + * that do not belong to the smallest ring:
+ * - We find the bridge size (atom count) to where it touches the smallest ring.
+ * - We also find the path length from the touch point on the smallest ring back to atom.
+ * - Using heuristics we decide with this information, whether the ring system prevents a flat geometry of atom. + * @param atom + * @return true, if the attached ring system prevents a flat geometry of atom + */ + public boolean isPyramidalBridgeHead(int atom) { + if (isAromaticAtom(atom) + || mPi[atom] != 0 + || (mAtomQueryFeatures[atom] & cAtomQFFlatNitrogen) != 0 + || getAtomRingBondCount(atom) < 3) + return false; + + int smallRingSize = getAtomRingSize(atom); + if (smallRingSize > 7) + return false; + + int smallRingNo = 0; + while (smallRingNo= RingCollection.MAX_SMALL_RING_COUNT + && smallRingNo == mRingSet.getSize()) + return false; // very rare case, but found with wrongly highly bridged CSD entry JORFAZ + + for (int i=0; i= 3) { + int[] ringAtom = mRingSet.getRingAtoms(ringNo); + for (int i=0; i<6; i++) { + if (atom == ringAtom[i]) { + int potentialOtherBridgeHeadIndex = mRingSet.validateMemberIndex(ringNo, + (bridgeHead == ringAtom[mRingSet.validateMemberIndex(ringNo, i+2)]) ? i-2 : i+2); + int potentialOtherBridgeHead = ringAtom[potentialOtherBridgeHeadIndex]; + if (getAtomRingBondCount(potentialOtherBridgeHead) >= 3 + && getPathLength(pathAtom[1], potentialOtherBridgeHead, 2, null) == 2) + return true; // adamantane like + break; + } + } + } + } + + boolean bridgeHeadIsFlat = (getAtomPi(bridgeHead) == 1 + || isAromaticAtom(bridgeHead) + || isFlatNitrogen(bridgeHead, false)); + boolean bridgeHeadMayInvert = !bridgeHeadIsFlat + && getAtomicNo(bridgeHead) == 7 + && getAtomCharge(bridgeHead) != 1; + + if (bondCountToBridgeHead == 1 + && !bridgeHeadIsFlat + && !bridgeHeadMayInvert + && smallestRingSize <= 4 + && bridgeAtomCount <= 3) + return true; + + switch (smallestRingSize) { + // case 3 is fully handled + case 4: // must be bondCountToBridgeHead == 2 + if (!bridgeHeadIsFlat + && !bridgeHeadMayInvert + && bridgeAtomCount <= 4) + return true; + break; + case 5: // must be bondCountToBridgeHead == 2 + if (bridgeHeadMayInvert) { + if (bridgeAtomCount <= 3) + return true; + } + else if (!bridgeHeadIsFlat) { + if (bridgeAtomCount <= 4) + return true; + } + break; + case 6: + if (bondCountToBridgeHead == 2) { + if (bridgeHeadIsFlat) { + if (bridgeAtomCount <= 4) + return true; + } + else if (!bridgeHeadMayInvert) { + if (bridgeAtomCount <= 3) + return true; + } + } + else if (bondCountToBridgeHead == 3) { + if (bridgeHeadIsFlat) { + if (bridgeAtomCount <= 6) + return true; + } + else { + if (bridgeAtomCount <= 4) + return true; + } + } + break; + case 7: + if (bondCountToBridgeHead == 3) { + if (bridgeAtomCount <= 3) + return true; + } + break; + } + return false; + } + /** * Checks whether bond is an axial chirality bond of the BINAP type. * Condition: non-aromatic, non-small-ring (<= 7 members) single bond diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerGenerator.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerGenerator.java index 16e1f4c3..f2f0be65 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerGenerator.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerGenerator.java @@ -290,11 +290,12 @@ public ConformerSetDiagnostics getDiagnostics() { /** * If the conformer generation must be stopped from outside, for instance because of user - * intervention or because of a defined timeout, the provide a ThreadMaster with this method. + * intervention or because of a defined timeout, then provide a ThreadMaster with this method. * @param tm */ public void setThreadMaster(ThreadMaster tm) { mThreadMaster = tm; + mRigidFragmentProvider.setThreadMaster(tm); } /** @@ -338,6 +339,7 @@ public Conformer getOneConformer(StereoMolecule mol) { ConformationSelfOrganizer sampler = new ConformationSelfOrganizer(mol, true); Conformer conformer = sampler.generateOneConformer(mRandomSeed); separateDisconnectedFragments(conformer); + conformer.setName("SO#1"); return conformer; } } @@ -508,7 +510,6 @@ public Conformer getNextConformer(TorsionSet[] torsionSetHolder) { SelfOrganizedConformer conformer = mSelfOrganizer.getNextConformer(); if (conformer != null) { separateDisconnectedFragments(conformer); - mAllConformerCount++; mReturnedConformerCount++; conformer.setName("SO#"+(++mAllConformerCount)); return conformer; @@ -552,7 +553,8 @@ public Conformer getNextConformer(TorsionSet[] torsionSetHolder) { if (mTorsionSet != null || mReturnedConformerCount != 0) continue; - if (mUseSelfOrganizerIfAllFails) { + if (mTorsionSet == null && mUseSelfOrganizerIfAllFails) { + // We couldn't create torsion strategy based conformers: switch to self organizer! mSelfOrganizer = new ConformationSelfOrganizer(mMolecule, true); mSelfOrganizer.setThreadMaster(mThreadMaster); mSelfOrganizer.initializeConformers(mRandomSeed, -1); @@ -571,17 +573,29 @@ public Conformer getNextConformer(TorsionSet[] torsionSetHolder) { mIsFinished = true; // we are finished with conformers } - separateDisconnectedFragments(mTorsionSet.getConformer()); - mTorsionSet.setUsed(); - mReturnedConformerCount++; + if (mTorsionSet != null) { + separateDisconnectedFragments(mTorsionSet.getConformer()); + mTorsionSet.setUsed(); + mReturnedConformerCount++; - if(torsionSetHolder != null) - torsionSetHolder[0] = new TorsionSet(mTorsionSet); + if(torsionSetHolder != null) + torsionSetHolder[0] = new TorsionSet(mTorsionSet); - if (mIsDiagnosticsMode) - mDiagnostics.get(mTorsionSet).setSuccess(true); + if (mIsDiagnosticsMode) + mDiagnostics.get(mTorsionSet).setSuccess(true); - return mTorsionSet.getConformer(); + return mTorsionSet.getConformer(); + } +// This poses the danger to produce highly strained unrealistic conformers, e.g. for impossible compounds... +// else if (mReturnedConformerCount == 0) { +// // We couldn't create torsion strategy based conformers: try creating one conformer with self organizer! +// ConformationSelfOrganizer sampler = new ConformationSelfOrganizer(mMolecule, true); +// Conformer conformer = sampler.generateOneConformer(mRandomSeed); +// separateDisconnectedFragments(conformer); +// mReturnedConformerCount++; +// conformer.setName("SO#1"); +// return conformer; +// } } return null; diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentProvider.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentProvider.java index eb159f2b..6920683e 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentProvider.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/RigidFragmentProvider.java @@ -28,6 +28,7 @@ package org.openmolecules.chem.conf.gen; +import com.actelion.research.calc.ThreadMaster; import com.actelion.research.chem.Canonizer; import com.actelion.research.chem.Coordinates; import com.actelion.research.chem.Molecule; @@ -88,6 +89,7 @@ public class RigidFragmentProvider { private long mRandomSeed; private boolean mOptimizeFragments; private RigidFragmentCache mCache; + private ThreadMaster mThreadMaster; public RigidFragmentProvider(long randomSeed, RigidFragmentCache cache, boolean optimizeRigidFragments) { mRandomSeed = randomSeed; @@ -97,6 +99,15 @@ public RigidFragmentProvider(long randomSeed, RigidFragmentCache cache, boolean forceFieldInitialize(); } + /** + * If the conformer generation must be stopped from outside, for instance because of user + * intervention or because of a defined timeout, then provide a ThreadMaster with this method. + * @param tm + */ + public void setThreadMaster(ThreadMaster tm) { + mThreadMaster = tm; + } + public void setCache(RigidFragmentCache cache) { mCache = cache; } @@ -271,6 +282,7 @@ public RigidFragment createFragment(StereoMolecule mol, int[] fragmentNo, int fr } ConformationSelfOrganizer selfOrganizer = new ConformationSelfOrganizer(fragment, true); + selfOrganizer.setThreadMaster(mThreadMaster); selfOrganizer.initializeConformers(mRandomSeed, MAX_CONFORMERS); // Generate multiple low constraint conformers diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationRule.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationRule.java index ce66568a..2c34cd2c 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationRule.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationRule.java @@ -231,7 +231,7 @@ protected static void rotateAtom(Conformer conformer, int atom, Coordinates p, C * @param atom * @param cog receives the center of gravity * @param n receives normal vector of plane nearest to all atoms - * @param coords receives original atom coordinates translated towards center of gravity [atom count][3] + * @param coords receives original atom coordinates minus the center of gravity [atom count][3] */ public static void calculateNearestPlane(Conformer conformer, int[] atom, Coordinates cog, Coordinates n, double[][] coords) { for (int i=0; i 2) { + for (int j=0; j