From 70edd49cc5b7d45ac0e7060ad81f3281fdc4cef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Wed, 10 Apr 2024 15:11:45 +0200 Subject: [PATCH] feat: update OCL to v2024.4.0 (#197) --- __tests__/SmilesParser.js | 3 +- __tests__/molfileAndAtomMapNo.test.js | 30 +- openchemlib | 2 +- scripts/openchemlib/classes.js | 22 +- .../research/chem/AbstractDepictor.java | 10 +- .../com/actelion/research/chem/Canonizer.java | 69 +- .../research/chem/ExtendedMolecule.java | 71 +- .../chem/ExtendedMoleculeFunctions.java | 32 +- .../com/actelion/research/chem/Molecule.java | 48 +- .../research/chem/MoleculeStandardizer.java | 3 +- .../actelion/research/chem/SmilesParser.java | 657 ++++++++++++------ .../research/chem/io/CompoundFileHelper.java | 6 +- .../chem/io/CompoundTableConstants.java | 1 + .../research/chem/mcs/BondVector2IdCode.java | 90 +-- .../chem/reaction/ReactionEncoder.java | 21 +- .../editor/AtomQueryFeatureDialogBuilder.java | 40 +- .../layout/TableLayoutConstraints.java | 8 +- .../conf/gen/ConformerSetDiagnostics.java | 39 +- .../conf/so/ConformationSelfOrganizer.java | 3 +- .../chem/conf/so/TetrahedralStereoRule.java | 29 +- .../gwt/jre/java/io/BufferedWriter.java | 4 + .../research/gwt/jre/java/lang/Character.java | 30 +- .../research/gwt/minimal/JSSmilesParser.java | 17 +- types.d.ts | 9 +- 24 files changed, 814 insertions(+), 430 deletions(-) diff --git a/__tests__/SmilesParser.js b/__tests__/SmilesParser.js index c8f08075..3be26ee0 100644 --- a/__tests__/SmilesParser.js +++ b/__tests__/SmilesParser.js @@ -65,11 +65,10 @@ it('should optionally skip coordinate templates', () => { it('should optionally make hydrogens explicit', () => { const smiles = '[CH4]'; const molecule = new Molecule(0, 0); - const parserWithoutExplicitH = new SmilesParser({ smartsMode: 'smarts' }); + const parserWithoutExplicitH = new SmilesParser(); parserWithoutExplicitH.parseMolecule(smiles, { molecule }); expect(molecule.getAllAtoms()).toBe(1); const parserWithExplicitH = new SmilesParser({ - smartsMode: 'smarts', makeHydrogenExplicit: true, }); parserWithExplicitH.parseMolecule(smiles, { molecule }); diff --git a/__tests__/molfileAndAtomMapNo.test.js b/__tests__/molfileAndAtomMapNo.test.js index 5a7a1512..7ab8e43d 100644 --- a/__tests__/molfileAndAtomMapNo.test.js +++ b/__tests__/molfileAndAtomMapNo.test.js @@ -4,24 +4,24 @@ const { readFileSync } = require('fs'); const { Molecule } = require('../minimal'); - - test('molfile with atomMapNo', () => { - const molfile = readFileSync( - `${__dirname}/data/atomMapNo.mol`, - 'utf8', - ); + const molfile = readFileSync(`${__dirname}/data/atomMapNo.mol`, 'utf8'); const molecule = Molecule.fromMolfile(molfile); const newMolfile = molecule.toMolfile(); - const atomMapNo = newMolfile.split(/\r?\n/).filter((line) => line.match(/ [OCH] /)) - .map(line => line.replace(/.* ([OCH]) .*(.) {2}0 {2}0$/, '$1 $2')); - expect(atomMapNo).toStrictEqual(['O 5', 'C 1', 'C 3', 'C 4', 'H 2']) + const atomMapNo = newMolfile + .split(/\r?\n/) + .filter((line) => line.match(/ [OCH] /)) + // eslint-disable-next-line prefer-named-capture-group + .map((line) => line.replace(/.* ([OCH]) .*(.) {2}0 {2}0$/, '$1 $2')); + expect(atomMapNo).toStrictEqual(['O 5', 'C 1', 'C 3', 'C 4', 'H 2']); const svg = molecule.toSVG(300, 200); - const mapNos = svg.split(/\r?\n/).filter((line) => line.includes('data-atom-map')).map(line => line.replace(/.*atom-map-no="(.).*/, '$1')); - expect(mapNos).toStrictEqual(["5", "1", "3", "4", "2"]) - - - -}) \ No newline at end of file + const mapNos = svg + .split(/\r?\n/) + .filter((line) => line.includes('data-atom-map')) + .map((line) => + line.replace(/.*atom-map-no="(?.).*/, '$'), + ); + expect(mapNos).toStrictEqual(['5', '1', '3', '4', '2']); +}); diff --git a/openchemlib b/openchemlib index 8b4a0c3c..1c3edb12 160000 --- a/openchemlib +++ b/openchemlib @@ -1 +1 @@ -Subproject commit 8b4a0c3c77606bc0817300fa8c5bc608a9ebfdec +Subproject commit 1c3edb1264f14575430c6c57504c9ee3cb644809 diff --git a/scripts/openchemlib/classes.js b/scripts/openchemlib/classes.js index b51dac70..4bd13053 100644 --- a/scripts/openchemlib/classes.js +++ b/scripts/openchemlib/classes.js @@ -28,6 +28,10 @@ const changed = [ '@org/openmolecules/chem/conf/gen/ConformerGenerator', changeConformerGenerator, ], + [ + '@org/openmolecules/chem/conf/so/ConformationSelfOrganizer', + changeConformationSelfOrganizer, + ], [ '@org/openmolecules/chem/conf/so/SelfOrganizedConformer', changeSelfOrganizedConformer, @@ -499,7 +503,10 @@ function changeBondLengthSet(code) { } function changeConformerSetDiagnostics(code) { - code = code.replace(methodRegExp('writeEliminationRuleFile'), ''); + code = code.replaceAll( + /BufferedWriter writer = new BufferedWriter.*/g, + 'BufferedWriter writer = new BufferedWriter();', + ); return code; } @@ -659,3 +666,16 @@ function fixCompoundFileHelper(code) { code = code.replaceAll('file.getName()', '""'); return code; } + +function changeConformationSelfOrganizer(code) { + code = code.replace('import java.io.FileOutputStream;\n', ''); + code = code.replace( + 'import java.io.OutputStreamWriter;\nimport java.nio.charset.StandardCharsets;\n', + '', + ); + code = code.replace( + /mDWWriter = new BufferedWriter.*/, + 'mDWWriter = new BufferedWriter();', + ); + return code; +} diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java index e64bfeb5..868b932f 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/AbstractDepictor.java @@ -1727,6 +1727,8 @@ else if (hydrogens == Molecule.cAtomQFNot3Hydrogen) isoStr = append(isoStr, "h<3"); else if (hydrogens == Molecule.cAtomQFNot2Hydrogen+Molecule.cAtomQFNot3Hydrogen) isoStr = append(isoStr, "h<2"); + else if (hydrogens == Molecule.cAtomQFNot0Hydrogen+Molecule.cAtomQFNot3Hydrogen) + isoStr = append(isoStr, "h1-2"); } if ((queryFeatures & Molecule.cAtomQFCharge) != 0) { long charge = (queryFeatures & Molecule.cAtomQFCharge); @@ -1766,6 +1768,12 @@ else if (neighbours == Molecule.cAtomQFNot0Neighbours+Molecule.cAtomQFNot1Neighb isoStr = append(isoStr, "n>2"); else if (neighbours == (Molecule.cAtomQFNeighbours & ~Molecule.cAtomQFNot4Neighbours)) isoStr = append(isoStr, "n>3"); + else if (neighbours == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot3Neighbours | Molecule.cAtomQFNot4Neighbours)) + isoStr = append(isoStr, "n1-2"); + else if (neighbours == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot4Neighbours)) + isoStr = append(isoStr, "n1-3"); + else if (neighbours == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot1Neighbour | Molecule.cAtomQFNot4Neighbours)) + isoStr = append(isoStr, "n2-3"); } if ((queryFeatures & Molecule.cAtomQFENeighbours) != 0) { long eNegNeighbours = (queryFeatures & Molecule.cAtomQFENeighbours); @@ -1791,7 +1799,7 @@ else if (eNegNeighbours == (Molecule.cAtomQFNot0ENeighbours | Molecule.cAtomQFNo isoStr = append(isoStr, "e>2"); else if (eNegNeighbours == (Molecule.cAtomQFENeighbours & ~Molecule.cAtomQFNot4ENeighbours)) isoStr = append(isoStr, "e>3"); - else if (eNegNeighbours == (Molecule.cAtomQFNot0ENeighbours | Molecule.cAtomQFNot3ENeighbours | Molecule.cAtomQFNot3ENeighbours)) + else if (eNegNeighbours == (Molecule.cAtomQFNot0ENeighbours | Molecule.cAtomQFNot3ENeighbours | Molecule.cAtomQFNot4ENeighbours)) isoStr = append(isoStr, "e1-2"); else if (eNegNeighbours == (Molecule.cAtomQFNot0ENeighbours | Molecule.cAtomQFNot4ENeighbours)) isoStr = append(isoStr, "e1-3"); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java index 386b9775..0838b226 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/Canonizer.java @@ -50,8 +50,8 @@ public class Canonizer { // The following options CONSIDER_DIASTEREOTOPICITY, CONSIDER_ENANTIOTOPICITY // and CONSIDER_STEREOHETEROTOPICITY have no influence on the idcode, - // i.e. the idcode is the same whether or not one of these options is - // used. However, if you require e.g. a pro-E atom always to appear + // i.e. the idcode is the same whether one of these options is used. + // However, if you require e.g. a pro-E atom always to appear // before the pro-Z, e.g. because you can distinguish them from the // encoded coordinates, then you need to use one of these options. // Of course, pro-R or pro-S can only be assigned, if one of the bonds @@ -928,7 +928,7 @@ private void canRecursivelyFindCIPParities() { while ((mNoOfRanks < mMol.getAtoms()) && paritiesFound) { for (int atom=0; atom=atom || atomMap.length>=mConnAtom[atom][i]) -// System.out.println("mConnAtom.length:"+mConnAtom.length+" atom:"+atom+" atomMap.length:"+atomMap.length+" i:"+i+" mConnAtom[atom][i]:"+mConnAtom[atom][i]+" mAtoms:"+mAtoms+" mAllAtoms:"+mAllAtoms); - if (atomMap.length>mConnAtom[atom][i] - && atomMap[mConnAtom[atom][i]] != -1) + if (bondMap[mConnBond[atom][i]] != -1) remainingNeighbours++; else if (mConnBondOrder[atom][i] == 1 - && isStereoBond(mConnBond[atom][i]) - && mBondAtom[0][mConnBond[atom][i]] == atom) { + && isStereoBond(mConnBond[atom][i]) + && mBondAtom[0][mConnBond[atom][i]] == atom) { lostStereoBond = mConnBond[atom][i]; lostAtom = mConnAtom[atom][i]; + } } - } - if (lostStereoBond != -1 - && remainingNeighbours >= 3) { + if (lostStereoBond != -1 && remainingNeighbours >= 3) { double angle = getBondAngle(atom, lostAtom); double minAngleDif = 10.0; int minConnBond = -1; for (int i=0; imConnAtom[atom][i] - && atomMap[mConnAtom[atom][i]] != -1) { + && (!isStereoBond(mConnBond[atom][i]) || mBondAtom[0][mConnBond[atom][i]] == atom) + && bondMap[mConnBond[atom][i]] != -1) { double angleDif = Math.abs(getAngleDif(angle, getBondAngle(atom, mConnAtom[atom][i]))); if (minAngleDif > angleDif) { minAngleDif = angleDif; minConnBond = mConnBond[atom][i]; + } } } - } if (minConnBond != -1) { int destBond = bondMap[minConnBond]; destMol.setBondType(destBond, mBondType[minConnBond] == cBondTypeUp ? cBondTypeDown : cBondTypeUp); @@ -629,7 +621,7 @@ public int getNonHydrogenNeighbourCount(int atom) { public int getExcludedNeighbourCount(int atom) { int count = 0; for (int i=0; imAllAtoms) + return null; + + Coordinates c = new Coordinates(); + for (int atom:atomIndex) + c.add(mCoordinates[atom]); + return c.scale(1.0 / atomIndex.length); + } + + public double getAtomX(int atom) { return mCoordinates[atom].x; } @@ -2604,10 +2626,26 @@ public double getBondLength(int bond) { * Returns the formal bond order. Delocalized rings have alternating single and double * bonds, which are returned as such. Bonds that are explicitly marked as being delocalized * are returned as 1. Dative bonds are returned as 0. + * In fragments with at least one bond type set as bond query feature, the smallest + * non-zero order of allowed bonds is returned. * @param bond * @return formal bond order 0 (dative bonds), 1, 2, 3, 4, or 5 */ public int getBondOrder(int bond) { + if (mIsFragment && (mBondQueryFeatures[bond] & cBondQFBondTypes) != 0) { + if ((mBondQueryFeatures[bond] & (cBondQFSingle | cBondQFDelocalized)) != 0) + return 1; + if ((mBondQueryFeatures[bond] & cBondQFDouble) != 0) + return 2; + if ((mBondQueryFeatures[bond] & cBondQFTriple) != 0) + return 3; + if ((mBondQueryFeatures[bond] & cBondQFQuadruple) != 0) + return 4; + if ((mBondQueryFeatures[bond] & cBondQFQuintuple) != 0) + return 5; + if ((mBondQueryFeatures[bond] & cBondQFMetalLigand) != 0) + return 0; + } switch (mBondType[bond] & cBondTypeMaskSimple) { case cBondTypeSingle: case cBondTypeDelocalized: return 1; @@ -2796,11 +2834,11 @@ public int getMaxBonds() { * @param v */ public void setMaxBonds(int v) { - mBondAtom[0] = (int[])copyOf(mBondAtom[0], v); - mBondAtom[1] = (int[])copyOf(mBondAtom[1], v); - mBondType = (int[])copyOf(mBondType, v); - mBondFlags = (int[])copyOf(mBondFlags, v); - mBondQueryFeatures = (int[])copyOf(mBondQueryFeatures, v); + mBondAtom[0] = copyOf(mBondAtom[0], v); + mBondAtom[1] = copyOf(mBondAtom[1], v); + mBondType = copyOf(mBondType, v); + mBondFlags = copyOf(mBondFlags, v); + mBondQueryFeatures = copyOf(mBondQueryFeatures, v); mMaxBonds = v; } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/MoleculeStandardizer.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/MoleculeStandardizer.java index aacda27b..519ef4f2 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/MoleculeStandardizer.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/MoleculeStandardizer.java @@ -36,7 +36,7 @@ public static StereoMolecule getStandardized(String idcode, String coordinates, /** * Standardises a molecule and fixes some structural errors. - * Typically this is done before canonicalization. + * Typically, this is done before canonicalization. * It includes the following changes:
* - different forms of functional groups (e.g. nitro) are normalized to a preferred one
* - charged acidic or basic atoms are (de-)protonated to remove charges and neutralize the molecule, if possible.
@@ -77,7 +77,6 @@ public static void standardize(StereoMolecule mol, int mode) throws Exception { * sulfonic acid, phosphoric acid, phenolic oxygen. Means: negative charges are removed. * Adds Na+ or Cl- for final charge equilibration. * @param mol - * @return true if an atom was added. */ private static void repairAndUnify(StereoMolecule mol) { mol.ensureHelperArrays(Molecule.cHelperRings); diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java index 8441fcfc..c0d1e2cd 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/SmilesParser.java @@ -54,6 +54,8 @@ public class SmilesParser { public static final int MODE_SKIP_COORDINATE_TEMPLATES = 4; public static final int MODE_MAKE_HYDROGEN_EXPLICIT = 8; public static final int MODE_NO_CACTUS_SYNTAX = 16; // if not set, then some CACTVS SMARTS extensions will be recognized and translated as close as possible + public static final int MODE_SINGLE_DOT_SEPARATOR = 32; // CONSIDER single dots '.' (rather than '..') as moelcule separator when parsing reactions + public static final int MODE_CREATE_SMARTS_WARNING = 64; private static final int INITIAL_CONNECTIONS = 16; private static final int MAX_CONNECTIONS = 100; // largest allowed one in SMILES is 99 @@ -68,10 +70,12 @@ public class SmilesParser { private StereoMolecule mMol; private boolean[] mIsAromaticBond; - private int mAromaticAtoms,mAromaticBonds,mSmartsMode,mCoordinateMode; + private int mAromaticAtoms,mAromaticBonds,mCoordinateMode; + private final int mSmartsMode,mMode; private long mRandomSeed; - private boolean mCreateSmartsWarnings,mMakeHydrogenExplicit,mAllowCactvs; + private final boolean mCreateSmartsWarnings,mMakeHydrogenExplicit,mAllowCactvs,mSingleDotSeparator; private StringBuilder mSmartsWarningBuffer; + private boolean mSmartsFeatureFound; /** * Creates a new SmilesParser that doesn't allow SMARTS features to be present in @@ -79,7 +83,7 @@ public class SmilesParser { * molecules is never set. */ public SmilesParser() { - this(SMARTS_MODE_IS_SMILES, false); + this(SMARTS_MODE_IS_SMILES); } /** @@ -88,15 +92,17 @@ public SmilesParser() { * an exception. If smartsMode is SMARTS_MODE_IS_SMARTS, then the input string is considered * a SMARTS, e.g. 'CC' is taken as fragment of two non-aromatic carbon atoms connected by a * single bond and without any implicit hydrogen atoms. If smartsMode is SMARTS_MODE_IS_GUESS, - * then - * molecules is never set. + * then the molecule is considered a substructure if any SMARTS features are discovered. + * Depending on whether SMARTS features are found, created molecules have the fragment flag set + * or not set. * @param mode one of SMARTS_MODE... and optionally other mode flags - * @param createSmartsWarnings if true, then getSmartsWarning() may be used after parsing a SMILES or SMARTS */ - public SmilesParser(int mode, boolean createSmartsWarnings) { + public SmilesParser(int mode) { + mMode = mode & ~SMARTS_MODE_MASK; mSmartsMode = mode & SMARTS_MODE_MASK; mAllowCactvs = (mode & MODE_NO_CACTUS_SYNTAX) == 0; - mCreateSmartsWarnings = createSmartsWarnings; + mSingleDotSeparator = (mode & MODE_SINGLE_DOT_SEPARATOR) != 0; + mCreateSmartsWarnings = (mode & MODE_CREATE_SMARTS_WARNING) != 0; mMakeHydrogenExplicit = ((mode & MODE_MAKE_HYDROGEN_EXPLICIT) != 0); mCoordinateMode = CoordinateInventor.MODE_DEFAULT; if ((mode & MODE_SKIP_COORDINATE_TEMPLATES) != 0) @@ -140,6 +146,10 @@ public StereoMolecule parseMolecule(byte[] smiles) { } public static boolean isReactionSmiles(byte[] smiles) { + return isReactionSmiles(smiles, null); + } + + public static boolean isReactionSmiles(byte[] smiles, int[] catalystCountHolder) { int count = 0; int index = -1; @@ -152,6 +162,16 @@ public static boolean isReactionSmiles(byte[] smiles) { break; count++; + + if (catalystCountHolder != null && count == 1) { + catalystCountHolder[0] = 0; + if (index+1 start) { - StereoMolecule reactant = new StereoMolecule(); - parse(reactant, smiles, start, i); - rxn.addReactant(reactant); - } - start = i + 2; - } - } - StereoMolecule reactants = new StereoMolecule(); - parse(reactants, smiles, start, index1); - rxn.addReactant(reactants); - - if (index2 - index1 > 1) { - start = index1+1; - for (int i=start; i start) { - StereoMolecule catalyst = new StereoMolecule(); - parse(catalyst, smiles, start, i); - rxn.addCatalyst(catalyst); + int part = 0; + int index = 0; + int closingGroupBracketIndex = -1; + while (index < smiles.length) { + while (index start) { - StereoMolecule product = new StereoMolecule(); - parse(product, smiles, start, i); - rxn.addProduct(product); - } - start = i + 2; + int end = index; + while (end') { + index++; + part++; } } - StereoMolecule products = new StereoMolecule(); - parse(products, smiles, start, smiles.length); - rxn.addProduct(products); return rxn; } @@ -270,6 +302,7 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, mSmartsWarningBuffer.setLength(0); mAromaticAtoms = 0; + mSmartsFeatureFound = false; boolean allowSmarts = (mSmartsMode != SMARTS_MODE_IS_SMILES); TreeMap parityMap = null; @@ -288,13 +321,15 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, int fromAtom = -1; boolean squareBracketOpen = false; boolean isDoubleDigit = false; - boolean smartsFeatureFound = false; + boolean hasLeadingBracket = false; int bracketLevel = 0; int bondType = Molecule.cBondTypeSingle; int bondQueryFeatures = 0; SortedList atomList = new SortedList<>(); SmilesRange range = new SmilesRange(smiles); AtomInfo atomInfo = new AtomInfo(); + ArrayList recursiveGroupList = new ArrayList<>(); + int[] skipCount = new int[1]; while (smiles[position] <= 32) position++; @@ -307,7 +342,7 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, || theChar == '*' || theChar == '?' || (theChar == '!' && allowSmarts && squareBracketOpen) - || (theChar == '#' && allowSmarts && squareBracketOpen)) { // TODO not-lists + || (theChar == '#' && allowSmarts && squareBracketOpen)) { int atomicNo = -1; int charge = 0; int mapNo = 0; @@ -315,7 +350,7 @@ public void parse(StereoMolecule mol, byte[] smiles, int position, int endIndex, int explicitHydrogens = HYDROGEN_ANY; boolean parityFound = false; boolean isClockwise = false; - int atomQueryFeatures = 0; // translated from obvious SMARTS features + long atomQueryFeatures = 0; // translated from obvious SMARTS features if (squareBracketOpen) { if (theChar == '*') { atomicNo = 6; @@ -325,47 +360,61 @@ else if (theChar == '?') { atomicNo = 0; } else { - boolean isNot = (theChar == '!'); - if (isNot) { - smartsFeatureFound = true; + boolean isNotList = (theChar == '!'); + if (isNotList) { + mSmartsFeatureFound = true; atomQueryFeatures |= Molecule.cAtomQFAny; position++; } - // Handle this before checking for atom symbols. R (ring count) takes precendence to R1 - R16 (substituent pseudo label) + // Handle this before checking for atom symbols, because R (ring count) takes precedence to R1 - R16 (substituent pseudo label) if (smiles[position-1] == 'R' && allowSmarts && (Character.isDigit(smiles[position]) || (mAllowCactvs && smiles[position] == '{'))) { atomicNo = 6; atomQueryFeatures |= Molecule.cAtomQFAny; position--; - if (isNot) + if (isNotList) position--; } else { - getGetInBracketAtomInfo(smiles, position-1, endIndex, atomInfo); + if (!parseAtomInBrackets(smiles, position-1, endIndex, atomInfo)) + throw new Exception("SmilesParser: Unexpected character in atom definition:'"+((char)smiles[position-1])+"' position:"+(position-1)); + atomicNo = atomInfo.atomicNo; position += atomInfo.labelLength - 1; - explicitHydrogens = HYDROGEN_IMPLICIT_ZERO; + if (mSmartsMode != SMARTS_MODE_IS_SMARTS) + explicitHydrogens = HYDROGEN_IMPLICIT_ZERO; // in case we have SMILES; neglected, if we process a SMARTS, which we may learn later when hitting a query feature - // If we have a comma after the first atom label, then we need to parse a list. + // If we have a comma after the first atom label, then we need to parse a (positive) atom list. // In this case we also have to set aromaticity query features from upper and lower case symbols. - if (allowSmarts && (smiles[position] == ',' || isNot)) { + if (allowSmarts && (smiles[position] == ',' || isNotList)) { boolean mayBeAromatic = atomInfo.mayBeAromatic; boolean mayBeAliphatic = atomInfo.mayBeAliphatic; int start = position - atomInfo.labelLength; while (start < endIndex) { - getGetInBracketAtomInfo(smiles, start, endIndex, atomInfo); - atomList.add(atomInfo.atomicNo); - mayBeAromatic |= atomInfo.mayBeAromatic; - mayBeAliphatic |= atomInfo.mayBeAliphatic; + if (!parseAtomInBrackets(smiles, start, endIndex, atomInfo)) { + if (!isNotList) + throw new Exception("SmilesParser: Unexpected character in atom list:'"+((char)smiles[start])+"'. Position:"+start); + // a not-list may be followed by ';' and another atom condition, while a positive list must not end with ',' + break; + } + + if (atomInfo.atomicNo == 1) { + if (!isNotList) // in not-lists we are allowed to remove hydrogens! + throw new Exception("SmilesParser: Hydrogen is not supported in positive atom lists:'"+new String(Arrays.copyOfRange(smiles, start, endIndex))+"'. Position:"+start); + } + else { + atomList.add(atomInfo.atomicNo); + mayBeAromatic |= atomInfo.mayBeAromatic; + mayBeAliphatic |= atomInfo.mayBeAliphatic; + } start += atomInfo.labelLength; - if (smiles[start] != ',') + if (smiles[start] != (isNotList ? ';' : ',')) // positive list: ',' e.g. "N,O"; negative lists: ';' e.g. "!#7;!#8" + break; + if (isNotList && smiles[start+1] != '!') break; start++; - if (smiles[start] == '!') { - if (!isNot) - throw new Exception("SmilesParser: inconsistent '!' in atom list."); + if (smiles[start] == '!') start++; - } } if (atomList.size() > 1) { @@ -402,7 +451,7 @@ else if (!mayBeAromatic) } if (smiles[position] == '[') - throw new Exception("SmilesParser: nested square brackets found"); + throw new Exception("SmilesParser: nested square brackets found. Position:"+position); if (smiles[position] == ']') { position++; @@ -410,34 +459,10 @@ else if (!mayBeAromatic) continue; } - if (smiles[position] == '+') { - charge = 1; - position++; - while (smiles[position] == '+') { - charge++; - position++; - } - if (charge == 1 && Character.isDigit(smiles[position])) { - charge = smiles[position] - '0'; - position++; - } - // explicit charge=0 is usually meant as query feature - if (charge == 0) - atomQueryFeatures |= Molecule.cAtomQFNotChargeNeg | Molecule.cAtomQFNotChargePos; - continue; - } + charge = parseCharge(smiles, position, skipCount); + if (skipCount[0] != 0) { + position += skipCount[0]; - if (smiles[position] == '-') { - charge = -1; - position++; - while (smiles[position] == '-') { - charge--; - position++; - } - if (charge == -1 && Character.isDigit(smiles[position])) { - charge = '0' - smiles[position]; - position++; - } // explicit charge=0 is usually meant as query feature if (charge == 0) atomQueryFeatures |= Molecule.cAtomQFNotChargeNeg | Molecule.cAtomQFNotChargePos; @@ -450,9 +475,8 @@ else if (!mayBeAromatic) if (smiles[position] == 'H') { position++; - position += range.parse(smiles, position, 1, 1); - explicitHydrogens = range.min; - int flags = 0; + position += range.parse(position, 1, 1); + long flags = 0; if (range.min <= 0 && range.max >= 0) flags |= Molecule.cAtomQFNot0Hydrogen; if (range.min <= 1 && range.max >= 1) @@ -480,7 +504,7 @@ else if (!mayBeAromatic) if (smiles[position] == 'D') { // non-H-neighbours position++; - position += range.parse(smiles, position, 1, 1); + position += range.parse(position, 1, 1); long flags = 0; if (range.min <= 0 && range.max >= 0) flags |= Molecule.cAtomQFNot0Neighbours; @@ -494,16 +518,21 @@ else if (!mayBeAromatic) flags |= Molecule.cAtomQFNot4Neighbours; if (flags != 0) { - if (!isNot) + if (isNot) + atomQueryFeatures |= flags; + else if ((atomQueryFeatures & Molecule.cAtomQFNeighbours) != 0) + atomQueryFeatures &= ~flags; + else { flags = flags ^ Molecule.cAtomQFNeighbours; - atomQueryFeatures |= flags; + atomQueryFeatures |= flags; + } } continue; } if (smiles[position] == 'z' && mAllowCactvs) { // electro-negative neighbour count (CACTVS extension) position++; - position += range.parse(smiles, position, 1, 4); + position += range.parse(position, 1, 4); long flags = 0; if (range.min <= 0 && range.max >= 0) flags |= Molecule.cAtomQFNot0ENeighbours; @@ -517,21 +546,38 @@ else if (!mayBeAromatic) flags |= Molecule.cAtomQFNot4ENeighbours; if (flags != 0) { - if (!isNot) + if (isNot) + atomQueryFeatures |= flags; + else if ((atomQueryFeatures & Molecule.cAtomQFENeighbours) != 0) + atomQueryFeatures &= ~flags; + else { flags = flags ^ Molecule.cAtomQFENeighbours; - atomQueryFeatures |= flags; + atomQueryFeatures |= flags; + } } continue; } if (smiles[position] == 'X') { // neighbour count including implicit hydrogens position++; - position += range.parse(smiles, position, 1, 1); + position += range.parse(position, 1, 1); byte[] valences = Molecule.cAtomValence[atomicNo]; if (valences == null) continue; int valence = valences[0]; + + // if we have a locally defined charge, we update the valance properly + int localCharge = parseCharge(smiles, position, skipCount); + if (skipCount[0] != 0) { + if (Molecule.isAtomicNoElectronegative(atomicNo)) + valence += localCharge; + else if (atomicNo == 6) + valence -= Math.abs(localCharge); + else + valence -= localCharge; + } + long flags = 0; // we convert into pi-electron count using standard valence if (valence-range.min <= 0 && valence-range.max >= 0) @@ -542,9 +588,14 @@ else if (!mayBeAromatic) flags |= Molecule.cAtomQFNot2PiElectrons; if (flags != 0) { - if (!isNot) + if (isNot) + atomQueryFeatures |= flags; + else if ((atomQueryFeatures & Molecule.cAtomQFPiElectrons) != 0) + atomQueryFeatures &= ~flags; + else { flags = flags ^ Molecule.cAtomQFPiElectrons; - atomQueryFeatures |= flags; + atomQueryFeatures |= flags; + } } continue; } @@ -557,7 +608,7 @@ else if (!mayBeAromatic) if (smiles[position] == 'R') { position++; - position += range.parse(smiles, position, 1, 3); + position += range.parse(position, 1, 3); long flags = 0; if (range.min <= 0 && range.max >= 0) flags |= Molecule.cAtomQFNotChain; @@ -571,16 +622,21 @@ else if (!mayBeAromatic) smartsWarning((isNot?"!R":"R")+range.max); if (flags != 0) { - if (!isNot) + if (isNot) + atomQueryFeatures |= flags; + else if ((atomQueryFeatures & Molecule.cAtomQFRingState) != 0) + atomQueryFeatures &= ~flags; + else { flags = flags ^ Molecule.cAtomQFRingState; - atomQueryFeatures |= flags; + atomQueryFeatures |= flags; + } } continue; } if (smiles[position] == 'r') { position++; - position += range.parse(smiles, position, 1, 1); + position += range.parse(position, 1, 1); if (range.isDefault) { if (isNot) atomQueryFeatures |= Molecule.cBondQFRingState & ~Molecule.cAtomQFNotChain; @@ -603,7 +659,7 @@ else if (!range.isRange()) if (smiles[position] == 'v') { position++; - position += range.parse(smiles, position, 1, 1); + position += range.parse(position, 1, 1); int valence = range.min; @@ -617,13 +673,27 @@ else if (!range.isRange()) continue; } + if (smiles[position] == '$') { // recursive SMARTS +// if (!isNot) +// throw new Exception("SmilesParser: non-negated recursive SMARTS relating to preceding atom are not supported yet. Position:"+position); + + position += parseRecursiveGroup(smiles, position, recursiveGroupList); + continue; + } + if (allowSmarts && (smiles[position] == ';' || smiles[position] == '&')) { // we interpret high and low precendence AND the same way - smartsFeatureFound = true; + mSmartsFeatureFound = true; position++; continue; } - throw new Exception("SmilesParser: unexpected character inside brackets: '"+(char)smiles[position]+"'"); + if (allowSmarts && (smiles[position] == ',' && isRepeatedAllowedORFeature(smiles, position, skipCount))) { // we allow OR-logic for some query options if they have the same type + mSmartsFeatureFound = true; + position += skipCount[0] + 1; + continue; + } + + throw new Exception("SmilesParser: unexpected character inside brackets: '"+(char)smiles[position]+"', position:"+position); } } else if (theChar == '*') { @@ -637,7 +707,7 @@ else if ((theChar == 'A' || theChar == 'a') && allowSmarts) { atomicNo = 6; atomQueryFeatures |= Molecule.cAtomQFAny; atomQueryFeatures |= theChar == 'A' ? Molecule.cAtomQFNotAromatic : Molecule.cAtomQFAromatic; - smartsFeatureFound = true; + mSmartsFeatureFound = true; } else { switch (Character.toUpperCase(theChar)) { @@ -683,14 +753,14 @@ else if ((theChar == 'A' || theChar == 'a') && allowSmarts) { /////////////////////////////////////////////////////////////////////////////// if (atomicNo == -1 && theChar != '?') - throw new Exception("SmilesParser: unknown element label found"); + throw new Exception("SmilesParser: unknown element label found. Position:"+(position-1)); int atom = mMol.addAtom(atomicNo); // this may be a hydrogen, if defined as [H] mMol.setAtomCharge(atom, charge); mMol.setAtomMapNo(atom, mapNo, false); mMol.setAtomAbnormalValence(atom, abnormalValence); if (atomQueryFeatures != 0) { - smartsFeatureFound = true; + mSmartsFeatureFound = true; if ((atomQueryFeatures & Molecule.cAtomQFAromatic) != 0) { atomQueryFeatures &= ~Molecule.cAtomQFAromatic; mMol.setAtomMarker(atom, true); @@ -702,7 +772,7 @@ else if ((theChar == 'A' || theChar == 'a') && allowSmarts) { mMol.setAtomQueryFeature(atom, atomQueryFeatures, true); } if (atomList.size() != 0) { - smartsFeatureFound = true; + mSmartsFeatureFound = true; int[] list = new int[atomList.size()]; for (int i=0; i') + else if ((theChar == '-' && smiles[position] == '>') || (theChar == '<' && smiles[position] == '-')) { excludedBonds |= Molecule.cBondTypeMetalLigand; position++; @@ -798,7 +889,7 @@ else if (theChar == '$') else if (theChar == ':') excludedBonds |= Molecule.cBondQFDelocalized; else - throw new Exception("SmilesParser: bond symbol '"+theChar+"' not allowed after '!'."); + throw new Exception("SmilesParser: bond symbol '"+theChar+"' not allowed after '!'. Position:"+(position-1)); } else { if (theChar == '@') @@ -812,7 +903,7 @@ else if (theChar == '$') else if (theChar == ':') bondType = Molecule.cBondTypeDelocalized; else if (theChar == '~') - bondQueryFeatures |= Molecule.cBondQFBondTypes; + bondQueryFeatures |= Molecule.cBondQFSingle | Molecule.cBondQFDouble | Molecule.cBondQFTriple | Molecule.cBondQFDelocalized; else if (theChar == '/') { if (readStereoFeatures) bondType = Molecule.cBondTypeUp; // encode slash temporarily in bondType @@ -895,7 +986,7 @@ else if ((theChar == '-' && smiles[position] == '>') } if (number >= ringClosureAtom.length) { if (number >=MAX_CONNECTIONS) - throw new Exception("SmilesParser: ringClosureAtom number out of range"); + throw new Exception("SmilesParser: ringClosureAtom number out of range: "+number); int oldSize = ringClosureAtom.length; int newSize = ringClosureAtom.length; @@ -939,7 +1030,7 @@ else if (bondType == Molecule.cBondTypeDown) if (ringClosureBondQueryFeatures[number] != 0) bondQueryFeatures = ringClosureBondQueryFeatures[number]; if (bondQueryFeatures != 0) { - smartsFeatureFound = true; + mSmartsFeatureFound = true; mMol.setBondQueryFeature(bond, ringClosureBondQueryFeatures[number], true); } ringClosureAtom[number] = -1; // for number re-usage @@ -951,12 +1042,15 @@ else if (bondType == Molecule.cBondTypeDown) } if (theChar == '+') { - throw new Exception("SmilesParser: '+' found outside brackets"); + throw new Exception("SmilesParser: '+' found outside brackets. Position:"+(position-1)); } if (theChar == '(') { - if (baseAtom[bracketLevel] == -1) - throw new Exception("Smiles with leading parenthesis are not supported"); + if (baseAtom[bracketLevel] == -1) { + // Leading '(' are superfluous and not good style, but we allow and ignore them including their closing counterparts + hasLeadingBracket = true; + continue; + } bracketLevel++; if (baseAtom.length == bracketLevel) baseAtom = Arrays.copyOf(baseAtom, baseAtom.length + BRACKET_LEVELS); @@ -965,6 +1059,13 @@ else if (bondType == Molecule.cBondTypeDown) } if (theChar == ')') { + if (bracketLevel == 0) { + if (!hasLeadingBracket) + throw new Exception("SmilesParser: Closing ')' without opening counterpart. Position:"+(position-1)); + baseAtom[0] = -1; + hasLeadingBracket = false; // we allow for a new leading '(', e.g. after '.' + continue; + } bracketLevel--; continue; } @@ -975,7 +1076,7 @@ else if (bondType == Molecule.cBondTypeDown) } if (theChar == ']') { - throw new Exception("SmilesParser: closing bracket at unexpected position"); + throw new Exception("SmilesParser: closing bracket at unexpected position:"+(position-1)); } if (theChar == '%') { @@ -992,7 +1093,7 @@ else if (bondType == Molecule.cBondTypeDown) continue; }*/ - throw new Exception("SmilesParser: unexpected character outside brackets: '"+theChar+"'"); + throw new Exception("SmilesParser: unexpected character outside brackets: '"+theChar+"' position:"+(position-1)); } // Check for unsatisfied open bonds @@ -1000,7 +1101,7 @@ else if (bondType == Molecule.cBondTypeDown) throw new Exception("SmilesParser: dangling open bond"); for (int rca:ringClosureAtom) if (rca != -1) - throw new Exception("SmilesParser: dangling ring closure"); + throw new Exception("SmilesParser: dangling ring closure."); int[] handleHydrogenAtomMap = mMol.getHandleHydrogenMap(); @@ -1012,23 +1113,28 @@ else if (bondType == Molecule.cBondTypeDown) if (mMol.getAtomCustomLabel(atom) != null) { // if we have the exact number of hydrogens int explicitHydrogen = mMol.getAtomCustomLabelBytes(atom)[0]; - if (smartsFeatureFound || mSmartsMode == SMARTS_MODE_IS_SMARTS) { - if (mMakeHydrogenExplicit) { - for (int i=0; i 0 && Character.isLetter(smiles[index1-1])) { + index1--; + skipCount[0]++; + } + + int index2 = commaPosition + 1; + while (Character.isLetter(smiles[index1])) { + if (smiles.length <= index2 || smiles[index1] != smiles[index2]) + return false; + index1++; + index2++; + } + return true; + } + + private boolean parseAtomInBrackets(byte[] smiles, int position, int endIndex, AtomInfo info) throws Exception { info.mayBeAromatic = true; info.mayBeAliphatic = true; if (smiles[position] == '#') { position++; + mSmartsFeatureFound = true; info.atomicNo = 0; info.labelLength = 1; while (position < endIndex @@ -1147,20 +1332,25 @@ private void getGetInBracketAtomInfo(byte[] smiles, int position, int endIndex, position++; } if (info.atomicNo == 0 || info.atomicNo >= Molecule.cAtomLabel.length) - throw new Exception("SmilesParser: Atomic number out of range."); + throw new Exception("SmilesParser: Atomic number out of range. position:"+(position-1)); + return true; } - else if (smiles[position] >= 'A' && smiles[position] <= 'Z') { + + if (smiles[position] >= 'A' && smiles[position] <= 'Z') { info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1; info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8)); info.mayBeAromatic = false; + return true; } - else if (smiles[position] >= 'a' && smiles[position] <= 'z') { + + if (smiles[position] >= 'a' && smiles[position] <= 'z') { info.labelLength = (smiles[position+1] >= 'a' && smiles[position+1] <= 'z') ? 2 : 1; info.atomicNo = Molecule.getAtomicNoFromLabel(new String(smiles, position, info.labelLength, StandardCharsets.UTF_8)); info.mayBeAliphatic = false; + return true; } - else - throw new Exception("SmilesParser: Unexpected character within brackets:'"+((char)smiles[position])+"'"); + + return false; } private int bondSymbolToQueryFeature(char symbol) { @@ -1182,7 +1372,35 @@ private void smartsWarning(String feature) { } } - private void locateAromaticDoubleBonds(boolean allowSmartsFeatures) throws Exception { + private int parseRecursiveGroup(byte[] smiles, int dollarIndex, ArrayList groupList) throws Exception { + if (smiles[dollarIndex+1] != '(') + throw new Exception("SmilesParser: '$' for recursive SMARTS must be followed by '('. position:"+dollarIndex); + + int openBrackets = 1; + int endIndex = dollarIndex+2; + while (endIndex < smiles.length && openBrackets > 0) { + if (smiles[endIndex] == '(') + openBrackets++; + else if (smiles[endIndex] == ')') + openBrackets--; + endIndex++; + } + + if (openBrackets > 0) + throw new Exception("SmilesParser: Missing closing ')' for recursive SMARTS. '('-position:"+(dollarIndex+1)); + + StereoMolecule group = new StereoMolecule(16, 16); + new SmilesParser(mMode | mSmartsMode).parse(group, smiles, dollarIndex+2, endIndex-1); + groupList.add(group); + + if (smiles[dollarIndex-1] == '!') + for (int atom=0; atom=0) System.arraycopy(mIsAromaticBond, 0, isAromaticBond, 0, mMol.getBonds()); // Some Smiles contain 'aromatic' rings with atoms not being compatible // with a PI-bond. These include: tertiary non-charged nitrogen, [nH], @@ -1273,22 +1489,22 @@ private void locateAromaticDoubleBonds(boolean allowSmartsFeatures) throws Excep for (int ring=0; ring 1 && Character.isLetterOrDigit(smiles[firstLetter-1])) + firstLetter--; + while (smiles[pos] == ',') { + boolean lettersMatch = true; + int letterCount = position-firstLetter; + for (int i=0; i val) + min = val; + else if (max < val) + max = val; + } + return pos - position; } @@ -1951,7 +2196,7 @@ public int parse(byte[] smiles, int position, int defaultMin, int defaultMax) { max = defaultMax; isDefault = true; return 0; - } + } public boolean isSingle() { return max == min; @@ -1966,9 +2211,9 @@ public String toString() { } private int parseInt() { - int num = smi[pos++] - '0'; - if (Character.isDigit(smi[pos])) - num = 10 * num + (smi[pos++] - '0'); + int num = smiles[pos++] - '0'; + if (Character.isDigit(smiles[pos])) + num = 10 * num + (smiles[pos++] - '0'); return num; } } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundFileHelper.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundFileHelper.java index a32b1b83..46cbff17 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundFileHelper.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundFileHelper.java @@ -38,10 +38,8 @@ import com.actelion.research.chem.StereoMolecule; import com.actelion.research.chem.reaction.Reaction; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; public abstract class CompoundFileHelper { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java index 45af4174..2f8f11de 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/io/CompoundTableConstants.java @@ -159,6 +159,7 @@ public interface CompoundTableConstants { String cColumnPropertyLookupFilterRemoveMinus = "lookupFilterRemoveMinus"; String cColumnPropertyLookupEncode = "lookupEncode"; String cColumnPropertyLookupDetailURL = "lookupDetailURL"; + String cColumnPropertyCategorySpecificLookup = "catSpecificLookup"; String cColumnPropertyLaunchCount = "launchCount"; String cColumnPropertyLaunchName = "launchName"; String cColumnPropertyLaunchCommand = "launchCommand"; diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/BondVector2IdCode.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/BondVector2IdCode.java index b0d6b24b..6787ab86 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/BondVector2IdCode.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/mcs/BondVector2IdCode.java @@ -46,24 +46,17 @@ public class BondVector2IdCode { private StereoMolecule mol; - private List liRingSets; public BondVector2IdCode(StereoMolecule mol) { this.mol = mol; liRingSets = new ArrayList(); - RingCollection rc = mol.getRingSet(); - int rings = rc.getSize(); - for (int i = 0; i < rings; i++) { - int [] arrIndexBnd = rc.getRingBonds(i); - liRingSets.add(arrIndexBnd); - } } @@ -75,18 +68,13 @@ public BondVector2IdCode(StereoMolecule mol) { public boolean containsFragmentOpenRing(IBitArray fragDefByBonds){ boolean openRing = false; - for(int [] arrIndexBnd : liRingSets){ - int ccOverlap=0; - for (int i = 0; i < arrIndexBnd.length; i++) { - if(fragDefByBonds.isBitSet(arrIndexBnd[i])){ ccOverlap++; } } - if((ccOverlap > 0) && ccOverlap < arrIndexBnd.length) { openRing = true; break; @@ -94,79 +82,50 @@ public boolean containsFragmentOpenRing(IBitArray fragDefByBonds){ } return openRing; - } - public String getFragmentIdCode(IBitArray fragDefByBonds){ StereoMolecule frag = convert(fragDefByBonds, false); - Canonizer can = new Canonizer(frag); - String idcode = can.getIDCode(); - return idcode; } - public Fragment getFragment(IBitArray fragDefByBonds){ StereoMolecule frag = convert(fragDefByBonds, false); - Canonizer can = new Canonizer(frag); - String idcode = can.getIDCode(); - Fragment fragment = new Fragment(idcode); - fragment.setMol(frag); - fragment.setSize(frag.getBonds()); - return fragment; } public Fragment getFragment(IBitArray fragDefByBonds, boolean addWildcards){ StereoMolecule frag = convert(fragDefByBonds, addWildcards); - Canonizer can = new Canonizer(frag); - String idcode = can.getIDCode(); - Fragment fragment = new Fragment(idcode); - fragment.setMol(frag); - fragment.setSize(frag.getBonds()); - return fragment; } - - private StereoMolecule convert(IBitArray fragDefByBonds, boolean addWildcards){ int bonds = mol.getBonds(); - int atoms = mol.getAtoms(); - boolean [] arrBonds = new boolean [bonds]; - - boolean [] arrAtoms = new boolean [atoms]; - + boolean [] arrAtoms = new boolean [atoms]; int bondsFragment = 0; for (int i = 0; i < bonds; i++) { if(fragDefByBonds.isBitSet(i)){ - - arrBonds[i] = true; - - bondsFragment++; - + arrBonds[i] = true; + bondsFragment++; arrAtoms[mol.getBondAtom(0, i)] = true; - arrAtoms[mol.getBondAtom(1, i)] = true; - } } @@ -176,99 +135,62 @@ private StereoMolecule convert(IBitArray fragDefByBonds, boolean addWildcards){ atomsFrag++; } } - - + StereoMolecule fragSubBonds = new StereoMolecule(atomsFrag, bondsFragment); - - int [] indexAtoms = mol.copyMoleculeByBonds(fragSubBonds, arrBonds, true, null); - - + // Add ring and aromatic info. // Added 07.07.2014 // int indexAtomNew = 0; for (int i = 0; i < indexAtoms.length; i++) { - - if(indexAtoms[i]>-1) { - if((mol.getAtomQueryFeatures(indexAtoms[i]) & Molecule.cAtomQFNotChain) > 0){ - fragSubBonds.setAtomQueryFeature(indexAtomNew, Molecule.cAtomQFNotChain, true); - - } - + } if((mol.getAtomQueryFeatures(indexAtoms[i]) & Molecule.cAtomQFAromatic) > 0){ - fragSubBonds.setAtomQueryFeature(indexAtomNew, Molecule.cAtomQFAromatic, true); - } - indexAtomNew++; - } - } if(addWildcards) { - boolean [] arrAtomCopied2Fragment = new boolean [mol.getAtoms()]; - for (int i = 0; i < indexAtoms.length; i++) { - if(indexAtoms[i] > -1) arrAtomCopied2Fragment[i] = true; } for (int i = 0; i < indexAtoms.length; i++) { - if(indexAtoms[i] > -1) { int atIndexOld = i; - int nConnected = mol.getConnAtoms(atIndexOld); - for (int j = 0; j < nConnected; j++) { - int indexAtConn = mol.getConnAtom(atIndexOld, j); - if(!arrAtomCopied2Fragment[indexAtConn]){ - int atWildCard = fragSubBonds.addAtom(0); - int atIndexNew = indexAtoms[i]; - fragSubBonds.addBond(atIndexNew, atWildCard, Molecule.cBondTypeSingle); - fragSubBonds.setAtomQueryFeature(atWildCard, Molecule.cAtomQFAny, true); - } } } } } - fragSubBonds.ensureHelperArrays(Molecule.cHelperRings); - return fragSubBonds; } public String getFragmentIdCodeCarbonSkeleton(IBitArray fragDefByBonds){ - StereoMolecule frag = convert(fragDefByBonds, false); - for (int i = 0; i < frag.getAtoms(); i++) { frag.setAtomicNo(i, 6); } - Canonizer can = new Canonizer(frag); - String idcode = can.getIDCode(); - return idcode; - } - } diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java index e9568ebd..6dc6ce58 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/chem/reaction/ReactionEncoder.java @@ -67,14 +67,17 @@ private ReactionEncoder() {} /** - * Creates a String containing a unique reaction code by + * Creates a String containing a canonical reaction code by * creating idcodes of every reactant and product and - * concatenating them in lexical order. + * concatenating them in lexically sorted order. This creates + * a canonical reaction code. The drawback is, however, that + * the original order of reactants and products may be changed. * If mapping information is available this will be encoded - * in a 2nd string. Otherwise this will be an empty string. + * in a 2nd string. Otherwise, this will be an empty string. * Coordinates, if available, will be encoded in a 3rd string. * If there are drawing objects assigned to this reaction * then these are encoded in a 4th string. + * If the reaction contains catalysts, they are encoded as 5th string. * * @return String[5] with reaction code, mapping, coordinates, drawing objects, catalysts */ @@ -83,11 +86,11 @@ public static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates } /** - * Creates a non-unique String containing a reaction code by - * creating idcodes of every reactant and product and - * concatenating them in original order. + * Creates a canonical or non-canonical String containing a reaction + * code by creating idcodes of every reactant and product and + * concatenating them in original or canonical order. * If mapping information is available this will be encoded - * in a 2nd string. Otherwise this will be null. + * in a 2nd string. Otherwise, this will be null. * Coordinates, if available, will be encoded in a 3rd string. * If there are drawing objects assigned to this reaction * then these are encoded in a 4th string. @@ -95,10 +98,10 @@ public static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates * * @param reaction * @param keepAbsoluteCoordinates - * @param sortByIDCode + * @param sortByIDCode whether to sort reactant and product idcodes to produce a canonical reaction code * @return String[5] with reaction code, mapping, coordinates, drawing objects, catalysts */ - private static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates, boolean sortByIDCode) { + public static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates, boolean sortByIDCode) { if (reaction == null || reaction.getReactants() == 0 || reaction.getProducts() == 0) { diff --git a/src/com/actelion/research/gwt/chemlib/com/actelion/research/gui/editor/AtomQueryFeatureDialogBuilder.java b/src/com/actelion/research/gwt/chemlib/com/actelion/research/gui/editor/AtomQueryFeatureDialogBuilder.java index af1b8d50..7d749bd3 100644 --- a/src/com/actelion/research/gwt/chemlib/com/actelion/research/gui/editor/AtomQueryFeatureDialogBuilder.java +++ b/src/com/actelion/research/gwt/chemlib/com/actelion/research/gui/editor/AtomQueryFeatureDialogBuilder.java @@ -230,6 +230,9 @@ private void build(ExtendedMolecule mol, int atom, boolean includeReactionHints) mChoiceNeighbours.addItem("at least 2"); mChoiceNeighbours.addItem("at least 3"); mChoiceNeighbours.addItem("at least 4"); + mChoiceNeighbours.addItem("1 or 2"); + mChoiceNeighbours.addItem("1,2, or 3"); + mChoiceNeighbours.addItem("2 or 3"); mDialog.add(mDialog.createLabel("Non-H neighbours:"), 1,15); mDialog.add(mChoiceNeighbours, 3,15); @@ -246,9 +249,9 @@ private void build(ExtendedMolecule mol, int atom, boolean includeReactionHints) mChoiceENeighbours.addItem("at least 2"); mChoiceENeighbours.addItem("at least 3"); mChoiceENeighbours.addItem("at least 4"); - mChoiceENeighbours.addItem("from 1 to 2"); - mChoiceENeighbours.addItem("from 1 to 3"); - mChoiceENeighbours.addItem("from 2 to 3"); + mChoiceENeighbours.addItem("1 or 2"); + mChoiceENeighbours.addItem("1,2, or 3"); + mChoiceENeighbours.addItem("2 or 3"); mDialog.add(mDialog.createLabel("Electroneg. neighbours:"), 1,17); mDialog.add(mChoiceENeighbours, 3,17); @@ -262,6 +265,7 @@ private void build(ExtendedMolecule mol, int atom, boolean includeReactionHints) mChoiceHydrogen.addItem("at least 3"); mChoiceHydrogen.addItem("less than 2"); mChoiceHydrogen.addItem("less than 3"); + mChoiceHydrogen.addItem("1 or 2"); mDialog.add(mDialog.createLabel("Hydrogen count:"), 1,19); mDialog.add(mChoiceHydrogen, 3,19); @@ -430,6 +434,12 @@ else if (neighbourFeatures == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQF mChoiceNeighbours.setSelectedIndex(8); else if (neighbourFeatures == (Molecule.cAtomQFNeighbours & ~Molecule.cAtomQFNot4Neighbours)) mChoiceNeighbours.setSelectedIndex(9); + else if (neighbourFeatures == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot3Neighbours | Molecule.cAtomQFNot4Neighbours)) + mChoiceNeighbours.setSelectedIndex(10); + else if (neighbourFeatures == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot4Neighbours)) + mChoiceNeighbours.setSelectedIndex(11); + else if (neighbourFeatures == (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot1Neighbour | Molecule.cAtomQFNot4Neighbours)) + mChoiceNeighbours.setSelectedIndex(12); else mChoiceNeighbours.setSelectedIndex(0); @@ -492,6 +502,8 @@ else if (hydrogenFeatures == (Molecule.cAtomQFNot2Hydrogen | Molecule.cAtomQFNot mChoiceHydrogen.setSelectedIndex(7); else if (hydrogenFeatures == (Molecule.cAtomQFNot3Hydrogen)) mChoiceHydrogen.setSelectedIndex(8); + else if (hydrogenFeatures == (Molecule.cAtomQFNot0Hydrogen | Molecule.cAtomQFNot3Hydrogen)) + mChoiceHydrogen.setSelectedIndex(9); else mChoiceHydrogen.setSelectedIndex(0); @@ -690,6 +702,24 @@ else if (mMol.getConnAtoms(atom) < 2) else if (mMol.getConnAtoms(atom) < 3) queryFeatures |= (Molecule.cAtomQFNeighbours & ~Molecule.cAtomQFNot4Neighbours); break; + case 10: // between 1 and 2 non-H neighbours + if (mMol.getConnAtoms(atom) == 0) + queryFeatures |= (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot3Neighbours | Molecule.cAtomQFNot4Neighbours); + else + queryFeatures |= (Molecule.cAtomQFNot3Neighbours | Molecule.cAtomQFNot4Neighbours); + break; + case 11: // between 1 and 3 non-H neighbours + if (mMol.getConnAtoms(atom) == 0) + queryFeatures |= (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot4Neighbours); + else + queryFeatures |= Molecule.cAtomQFNot4Neighbours; + break; + case 12: // between 2 and 3 non-H neighbours + if (mMol.getConnAtoms(atom) <= 1) + queryFeatures |= (Molecule.cAtomQFNot0Neighbours | Molecule.cAtomQFNot1ENeighbour | Molecule.cAtomQFNot4Neighbours); + else if (mMol.getConnAtoms(atom) <= 3) + queryFeatures |= Molecule.cAtomQFNot4Neighbours; + break; } int eNeighbours = mMol.getAtomElectronegativeNeighbours(atom); @@ -787,6 +817,10 @@ else if (mMol.getConnAtoms(atom) < 3) case 8: // less than 3 hydrogens queryFeatures |= (Molecule.cAtomQFNot3Hydrogen); break; + case 9: // between 1 and 2 hydrogens + queryFeatures |= (Molecule.cAtomQFNot0Hydrogen + | Molecule.cAtomQFNot3Hydrogen); + break; } switch (mChoicePi.getSelectedIndex()) { diff --git a/src/com/actelion/research/gwt/chemlib/info/clearthought/layout/TableLayoutConstraints.java b/src/com/actelion/research/gwt/chemlib/info/clearthought/layout/TableLayoutConstraints.java index 981eec12..f399cebc 100644 --- a/src/com/actelion/research/gwt/chemlib/info/clearthought/layout/TableLayoutConstraints.java +++ b/src/com/actelion/research/gwt/chemlib/info/clearthought/layout/TableLayoutConstraints.java @@ -271,12 +271,12 @@ public TableLayoutConstraints (String constraints) // Get the first column (assume component is in only one column) String tokenA = st.nextToken(); - col1 = new Integer(tokenA).intValue(); + col1 = Integer.parseInt(tokenA); col2 = col1; // Get the first row (assume component is in only one row) String tokenB = st.nextToken(); - row1 = new Integer(tokenB).intValue(); + row1 = Integer.parseInt(tokenB); row2 = row1; // Get next two tokens @@ -286,8 +286,8 @@ public TableLayoutConstraints (String constraints) try { // Attempt to use tokens A and B as col2 and row2 - col2 = new Integer(tokenA).intValue(); - row2 = new Integer(tokenB).intValue(); + col2 = Integer.parseInt(tokenA); + row2 = Integer.parseInt(tokenB); // Get next two tokens tokenA = st.nextToken(); diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerSetDiagnostics.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerSetDiagnostics.java index 91876311..17430635 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerSetDiagnostics.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/gen/ConformerSetDiagnostics.java @@ -5,9 +5,8 @@ import org.openmolecules.chem.conf.so.ConformationRule; import org.openmolecules.chem.conf.so.SelfOrganizedConformer; -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.TreeMap; @@ -65,11 +64,35 @@ public String getExitReason() { return mExitReason; } - + public void writeEliminationRuleFile(String path) { + try { + BufferedWriter writer = new BufferedWriter(); + writeDataWarriorHeader(writer, true); + writer.write("Structure\tcoords\tconformer\telim_rules"); + writer.newLine(); + int conformer = 0; + for (ConformerDiagnostics cd:mDiagnosticsMap.values()) { + writer.write(cd.getIDCode()); + writer.write("\t"); + writer.write(cd.getCoords()); + writer.write("\t"); + writer.write(conformer++); + writer.write("\t"); + for (String rule:cd.getEliminationRules()) { + writer.write(rule); + writer.write(""); + } + writer.write("\n"); + } + writeDataWarriorFooter(writer); + writer.close(); + } + catch (IOException ioe) {} + } public void writePermutationSpace(String fileName) { try { - BufferedWriter writer = new BufferedWriter(new FileWriter(fileName)); + BufferedWriter writer = new BufferedWriter(); writer.write("rigid fragments"); for (int i=1; i<=mRotatableBond.length; i++) writer.write("\ttorsion "+i); @@ -102,7 +125,7 @@ public void writePermutationSpace(String fileName) { public void writeAllConformersFile(String fileName) { try { - BufferedWriter writer = new BufferedWriter(new FileWriter(fileName)); + BufferedWriter writer = new BufferedWriter(); writeDataWarriorHeader(writer, true); writer.write("Structure\tcoords\tname\tsuccess\tlikelihood\tcollision"); for (int i=1; i<=mRigidFragment.length; i++) @@ -139,7 +162,7 @@ public void writeAllConformersFile(String fileName) { public void writeRigidFragmentFile(String path) { try { - BufferedWriter writer = new BufferedWriter(new FileWriter(path)); + BufferedWriter writer = new BufferedWriter(); writeDataWarriorHeader(writer, true); writer.write("Structure\tcoords\tfragment\tconformer\tlikelyhood\trule strain\tatom strain"); writer.newLine(); @@ -171,7 +194,7 @@ public void writeRigidFragmentFile(String path) { public void writeRotatableBondsFile(String path) { try { - BufferedWriter writer = new BufferedWriter(new FileWriter(path)); + BufferedWriter writer = new BufferedWriter(); writeDataWarriorHeader(writer, false); writer.write("Structure\ttorsion-ID\tfragments\ttorsions\tfrequenies\trelevance"); writer.newLine(); diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationSelfOrganizer.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationSelfOrganizer.java index e2c4543f..cb64fb1c 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationSelfOrganizer.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/ConformationSelfOrganizer.java @@ -35,7 +35,6 @@ import com.actelion.research.util.DoubleFormat; import java.io.BufferedWriter; -import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Random; @@ -413,7 +412,7 @@ public static void writeDWFileStart() { mLastDWConformer = null; try { - mDWWriter = new BufferedWriter(new FileWriter(DATAWARRIOR_DEBUG_FILE)); + mDWWriter = new BufferedWriter(); mDWWriter.write(""); mDWWriter.newLine(); mDWWriter.write(""); diff --git a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/TetrahedralStereoRule.java b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/TetrahedralStereoRule.java index eaab6430..b8c5f235 100644 --- a/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/TetrahedralStereoRule.java +++ b/src/com/actelion/research/gwt/chemlib/org/openmolecules/chem/conf/so/TetrahedralStereoRule.java @@ -30,6 +30,7 @@ import com.actelion.research.chem.Coordinates; import com.actelion.research.chem.Molecule; +import com.actelion.research.chem.MolfileCreator; import com.actelion.research.chem.StereoMolecule; import com.actelion.research.chem.conf.Conformer; @@ -187,7 +188,6 @@ private void calculateRotatableAtoms(StereoMolecule mol, int[] neighbourAtom, in int stereoCenter = neighbourAtom[4]; int neighbours = (neighbourAtom[3] == -1) ? 3 : 4; - int[] fragmentNo = new int[mol.getAllAtoms()]; boolean[] neglectBond = new boolean[mol.getAllBonds()]; for (int i=0; i=3 neighbour-bond-fragment and to proportionally rotate the closer part of it and not just one atom int remainingNeighboursToRotate = neighbours - 2 - (rotatableAtomCount == 0 ? 0 : 1); for (int i=0; i=mRotatableNeighbour.length) { + System.out.println("##### Out of bounds exception:"+new MolfileCreator(mol).getMolfile()); + System.out.print("neighbours:"+neighbours+" "); + System.out.print("neighbourAtom:"); for (int k:neighbourAtom) System.out.print(" " + k); System.out.println(); + System.out.print("neighbourBond:"); for (int k : neighbourBond) System.out.print(" " + k); System.out.println(); + System.out.print("fragmentNo:"); for (int k : fragmentNo) System.out.print(" " + k); System.out.println(); + System.out.print("isNeighbourFragment:"); for (boolean k : isNeighbourFragment) System.out.print(" " + k); System.out.println(); + System.out.print("isRotatableAtom:"); for (boolean b : isRotatableAtom) System.out.print(" " + b); System.out.println(); +} mRotatableNeighbour[rotatableIndex++] = neighbourAtom[i]; +} else mStaticNeighbour[staticIndex++] = neighbourAtom[i]; } @@ -324,7 +338,8 @@ private void calculateRotatableAtoms(StereoMolecule mol, int[] neighbourAtom, in private Coordinates calculateRotationAxis(Conformer conformer) { Coordinates axis = new Coordinates(); - for (int atom:mStaticNeighbour) + int[] neighbour = (mStaticNeighbour.length == 2) ? mStaticNeighbour : mRotatableNeighbour; + for (int atom:neighbour) axis.add(conformer.getCoordinates(atom)); axis.scale(0.5); @@ -343,8 +358,8 @@ public String toString() { StringBuilder sb = new StringBuilder("stereo rule:"); super.addAtomList(sb); sb.append(" rotatable:"); - for (int rn:mRotatableNeighbour) - sb.append(rn+" "); + for (int ra:mRotatableAtom) + sb.append(ra+" "); return sb.toString(); } } diff --git a/src/com/actelion/research/gwt/jre/java/io/BufferedWriter.java b/src/com/actelion/research/gwt/jre/java/io/BufferedWriter.java index 6b57c388..86cec9a0 100644 --- a/src/com/actelion/research/gwt/jre/java/io/BufferedWriter.java +++ b/src/com/actelion/research/gwt/jre/java/io/BufferedWriter.java @@ -1,6 +1,7 @@ package java.io; public class BufferedWriter { + public BufferedWriter() {} public BufferedWriter(FileWriter fileWriter) { } @@ -17,6 +18,9 @@ public void write(String string) throws IOException { public void write(short value) { } + public void write(int value) { + } + public void flush() { } diff --git a/src/com/actelion/research/gwt/jre/java/lang/Character.java b/src/com/actelion/research/gwt/jre/java/lang/Character.java index 714ff4a2..e6386bba 100644 --- a/src/com/actelion/research/gwt/jre/java/lang/Character.java +++ b/src/com/actelion/research/gwt/jre/java/lang/Character.java @@ -1,12 +1,12 @@ /* * Copyright 2008 Google Inc. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -233,7 +233,7 @@ public static boolean isDigit(int c) { } public static native boolean isDigit(char c) /*-{ - return (null != String.fromCharCode(c).match(/\d/)); + return /\d/.test(String.fromCharCode(c)); }-*/; public static boolean isHighSurrogate(char ch) { @@ -244,14 +244,28 @@ public static boolean isHighSurrogate(char ch) { * TODO: correct Unicode handling. */ public static native boolean isLetter(char c) /*-{ - return (null != String.fromCharCode(c).match(/[A-Z]/i)); + return /[A-Z]/i.test(String.fromCharCode(c)); + }-*/; + + /* + * TODO: correct Unicode handling. + */ + public static native boolean isLetter(byte c) /*-{ + return /[A-Z]/i.test(String.fromCharCode(c)); }-*/; /* * TODO: correct Unicode handling. */ public static native boolean isLetterOrDigit(char c) /*-{ - return (null != String.fromCharCode(c).match(/[A-Z\d]/i)); + return /[A-Z\d]/i.test(String.fromCharCode(c)); + }-*/; + + /* + * TODO: correct Unicode handling. + */ + public static native boolean isLetterOrDigit(byte c) /*-{ + return /[A-Z\d]/i.test(String.fromCharCode(c)); }-*/; /* @@ -422,7 +436,7 @@ static char forDigit(int digit) { /** * Computes the high surrogate character of the UTF16 representation of a * non-BMP code point. See {@link getLowSurrogate}. - * + * * @param codePoint requested codePoint, required to be >= * MIN_SUPPLEMENTARY_CODE_POINT * @return high surrogate character @@ -434,7 +448,7 @@ static char getHighSurrogate(int codePoint) { /** * Computes the low surrogate character of the UTF16 representation of a non-BMP * code point. See {@link getHighSurrogate}. - * + * * @param codePoint requested codePoint, required to be >= * MIN_SUPPLEMENTARY_CODE_POINT * @return low surrogate character diff --git a/src/com/actelion/research/gwt/minimal/JSSmilesParser.java b/src/com/actelion/research/gwt/minimal/JSSmilesParser.java index 7ec6e4bf..515f0091 100644 --- a/src/com/actelion/research/gwt/minimal/JSSmilesParser.java +++ b/src/com/actelion/research/gwt/minimal/JSSmilesParser.java @@ -17,16 +17,17 @@ private native void init(JavaScriptObject options) options = options || {}; var smartsMode = options.smartsMode || 'smiles'; - var createSmartsWarnings = options.createSmartsWarnings || false; var skipCoordinateTemplates = options.skipCoordinateTemplates || false; var makeHydrogenExplicit = options.makeHydrogenExplicit || false; var noCactvs = options.noCactvs || false; - this.@com.actelion.research.gwt.minimal.JSSmilesParser::init(Ljava/lang/String;ZZZZ)(smartsMode, createSmartsWarnings, skipCoordinateTemplates, makeHydrogenExplicit, noCactvs); + var singleDotSeparator = options.singleDotSeparator || false; + var createSmartsWarnings = options.createSmartsWarnings || false; + this.@com.actelion.research.gwt.minimal.JSSmilesParser::init(Ljava/lang/String;ZZZZZ)(smartsMode, skipCoordinateTemplates, makeHydrogenExplicit, noCactvs, singleDotSeparator, createSmartsWarnings); }-*/; - private void init(String smartsMode, boolean createSmartsWarnings, + private void init(String smartsMode, boolean skipCoordinateTemplates, boolean makeHydrogenExplicit, - boolean noCactvs) { + boolean noCactvs, boolean singleDotSeparator, boolean createSmartsWarnings) { int mode = SmilesParser.SMARTS_MODE_IS_SMILES; switch (smartsMode) { case "smarts": @@ -45,7 +46,13 @@ private void init(String smartsMode, boolean createSmartsWarnings, if (noCactvs) { mode |= SmilesParser.MODE_NO_CACTUS_SYNTAX; } - oclParser = new SmilesParser(mode, createSmartsWarnings); + if (singleDotSeparator) { + mode |= SmilesParser.MODE_SINGLE_DOT_SEPARATOR; + } + if (createSmartsWarnings) { + mode |= SmilesParser.MODE_CREATE_SMARTS_WARNING; + } + oclParser = new SmilesParser(mode); } public void setRandomSeed(int seed) { diff --git a/types.d.ts b/types.d.ts index b8414cd6..b0998879 100644 --- a/types.d.ts +++ b/types.d.ts @@ -2592,8 +2592,6 @@ export interface ISmilesParserOptions { */ smartsMode?: 'smiles' | 'smarts' | 'guess'; - createSmartsWarnings?: boolean; - skipCoordinateTemplates?: boolean; makeHydrogenExplicit?: boolean; @@ -2602,6 +2600,13 @@ export interface ISmilesParserOptions { * Disable parsing of CACTVS syntax. */ noCactvs?: boolean; + + /** + * Consider single dots '.' (rather than '..') as molecule separator when parsing reactions. + */ + singleDotSeparator?: boolean; + + createSmartsWarnings?: boolean; } export interface ISmilesParserParseMoleculeOptions {