Skip to content

Commit

Permalink
add option to symmetrize conjugated terminal groups when RMS pruning …
Browse files Browse the repository at this point in the history
…conformers (#7270)

* expose symmetrizeTerminalAtoms() to the public API

* support symmetrizing terminal groups in RMS pruning in confgen

* add that to the python wrapper

* add backwards compatibility note

* allow JSON config of the new option
  • Loading branch information
greglandrum committed Mar 19, 2024
1 parent 9bb5e48 commit 81af0db
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 13 deletions.
6 changes: 3 additions & 3 deletions Code/GraphMol/DistGeomHelpers/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

rdkit_library(DistGeomHelpers BoundsMatrixBuilder.cpp Embedder.cpp EmbedderUtils.cpp
LINK_LIBRARIES ForceFieldHelpers SubstructMatch GraphMol DistGeometry Alignment
LINK_LIBRARIES MolAlign ForceFieldHelpers SubstructMatch GraphMol DistGeometry Alignment
)
target_compile_definitions(DistGeomHelpers PRIVATE RDKIT_DISTGEOMHELPERS_BUILD)

Expand All @@ -9,10 +9,10 @@ rdkit_headers(BoundsMatrixBuilder.h

rdkit_test(testDistGeomHelpers testDgeomHelpers.cpp
LINK_LIBRARIES
DistGeomHelpers MolAlign MolTransforms FileParsers SmilesParse )
DistGeomHelpers MolTransforms FileParsers SmilesParse )

rdkit_catch_test(distGeomHelpersCatch catch_tests.cpp
LINK_LIBRARIES DistGeomHelpers MolAlign MolTransforms FileParsers SmilesParse )
LINK_LIBRARIES DistGeomHelpers MolTransforms FileParsers SmilesParse )


if(RDK_BUILD_PYTHON_WRAPPERS)
Expand Down
14 changes: 12 additions & 2 deletions Code/GraphMol/DistGeomHelpers/Embedder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <GraphMol/MolOps.h>
#include <GraphMol/ForceFieldHelpers/CrystalFF/TorsionPreferences.h>
#include <GraphMol/Substruct/SubstructMatch.h>
#include <GraphMol/MolAlign/AlignMolecules.h>
#include <boost/dynamic_bitset.hpp>
#include <iomanip>
#include <RDGeneral/RDThreads.h>
Expand Down Expand Up @@ -1356,16 +1357,24 @@ std::vector<std::vector<unsigned int>> getMolSelfMatches(
MolOps::RemoveHsParameters ps;
bool sanitize = false;
MolOps::removeHs(tmol, ps, sanitize);

std::unique_ptr<RWMol> prbMolSymm;
if (params.symmetrizeConjugatedTerminalGroupsForPruning) {
prbMolSymm.reset(new RWMol(tmol));
MolAlign::details::symmetrizeTerminalAtoms(*prbMolSymm);
}
const auto &prbMolForMatch = prbMolSymm ? *prbMolSymm : tmol;

SubstructMatchParameters sssps;
sssps.maxMatches = 1;
// provides the atom indices in the molecule corresponding
// to the indices in the H-stripped version
auto strippedMatch = SubstructMatch(mol, tmol, sssps);
auto strippedMatch = SubstructMatch(mol, prbMolForMatch, sssps);
CHECK_INVARIANT(strippedMatch.size() == 1, "expected match not found");

sssps.maxMatches = 1000;
sssps.uniquify = false;
auto heavyAtomMatches = SubstructMatch(tmol, tmol, sssps);
auto heavyAtomMatches = SubstructMatch(tmol, prbMolForMatch, sssps);
for (const auto &match : heavyAtomMatches) {
res.emplace_back(0);
res.back().reserve(match.size());
Expand Down Expand Up @@ -1546,6 +1555,7 @@ void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs,
#endif
}
auto selfMatches = detail::getMolSelfMatches(mol, params);

for (unsigned int ci = 0; ci < confs.size(); ++ci) {
auto &conf = confs[ci];
if (confsOk[ci]) {
Expand Down
1 change: 1 addition & 0 deletions Code/GraphMol/DistGeomHelpers/Embedder.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ struct RDKIT_DISTGEOMHELPERS_EXPORT EmbedParameters {
bool trackFailures{false};
std::vector<unsigned int> failures;
bool enableSequentialRandomSeeds{false};
bool symmetrizeConjugatedTerminalGroupsForPruning{true};

EmbedParameters() : boundsMat(nullptr), CPCI(nullptr), callback(nullptr) {}
EmbedParameters(
Expand Down
1 change: 1 addition & 0 deletions Code/GraphMol/DistGeomHelpers/EmbedderUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ void updateEmbedParametersFromJSON(EmbedParameters &params,
PT_OPT_GET(forceTransAmides);
PT_OPT_GET(useSymmetryForPruning);
PT_OPT_GET(enableSequentialRandomSeeds);
PT_OPT_GET(symmetrizeConjugatedTerminalGroupsForPruning);

std::map<int, RDGeom::Point3D> *cmap = nullptr;
const auto coordMap = pt.get_child_optional("coordMap");
Expand Down
7 changes: 6 additions & 1 deletion Code/GraphMol/DistGeomHelpers/Wrap/rdDistGeom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,12 @@ BOOST_PYTHON_MODULE(rdDistGeom) {
"enableSequentialRandomSeeds",
&PyEmbedParameters::enableSequentialRandomSeeds,
"handle random number seeds so that conformer generation can be restarted")
.def("SetCoordMap", &PyEmbedParameters::setCoordMap, python::args("self"), "sets the coordmap to be used");
.def_readwrite(
"symmetrizeConjugatedTerminalGroupsForPruning",
&PyEmbedParameters::symmetrizeConjugatedTerminalGroupsForPruning,
"symmetrize terminal conjugated groups for RMSD pruning")
.def("SetCoordMap", &PyEmbedParameters::setCoordMap, python::args("self"),
"sets the coordmap to be used");

docString =
"Use distance geometry to obtain multiple sets of \n\
Expand Down
11 changes: 11 additions & 0 deletions Code/GraphMol/DistGeomHelpers/Wrap/testDistGeom.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,17 @@ def testCoordMap(self):
angle = v1.AngleTo(v2)
self.assertAlmostEqual(angle, math.pi / 2.0, delta=0.15)

def testSymmetrizeTerminal(self):
mol = Chem.AddHs(Chem.MolFromSmiles("FCC(=O)O"))
ps = rdDistGeom.ETKDGv3()
ps.randomSeed = 0xc0ffee
ps.pruneRmsThresh = 0.5
cids = rdDistGeom.EmbedMultipleConfs(mol, 50, ps)
self.assertEqual(len(cids), 1)
ps.symmetrizeConjugatedTerminalGroupsForPruning = False
cids = rdDistGeom.EmbedMultipleConfs(mol, 50, ps)
self.assertGreater(len(cids), 1)


if __name__ == '__main__':
unittest.main()
23 changes: 23 additions & 0 deletions Code/GraphMol/DistGeomHelpers/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,4 +962,27 @@ TEST_CASE("Github #7181: ET terms applied to constrained atoms") {
auto rmsd = MolAlign::alignMol(*mol, *templ, cid, -1, &imatch);
CHECK(rmsd < 0.2);
}
}

TEST_CASE("terminal groups in pruning") {
SECTION("basics") {
std::vector<std::string> smiles = {"FCC(=O)O", "FCC(=O)[O-]",
"FCC(=N)[NH-]", "FCS(=O)(=O)O",
"FCP(=O)(O)O"};
for (const auto &smi : smiles) {
auto mol = v2::SmilesParse::MolFromSmiles(smi);
REQUIRE(mol);
MolOps::addHs(*mol);
DGeomHelpers::EmbedParameters ps = DGeomHelpers::ETKDGv3;
ps.randomSeed = 0xc0ffee;
ps.pruneRmsThresh = 0.5;

auto cids = DGeomHelpers::EmbedMultipleConfs(*mol, 50, ps);
CHECK(cids.size() == 1);

ps.symmetrizeConjugatedTerminalGroupsForPruning = false;
cids = DGeomHelpers::EmbedMultipleConfs(*mol, 50, ps);
CHECK(cids.size() >= 2);
}
}
}
7 changes: 4 additions & 3 deletions Code/GraphMol/MolAlign/AlignMolecules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
namespace RDKit {
namespace MolAlign {

namespace {
namespace details {
void symmetrizeTerminalAtoms(RWMol &mol) {
// clang-format off
static const std::string qsmarts =
Expand All @@ -50,7 +50,8 @@ void symmetrizeTerminalAtoms(RWMol &mol) {
mol.replaceBond(obond->getIdx(), &qb);
}
}

} // namespace details
namespace {
double alignConfsOnAtomMap(const Conformer &prbCnf, const Conformer &refCnf,
const MatchVectType &atomMap,
RDGeom::Transform3D &trans,
Expand All @@ -77,7 +78,7 @@ void getAllMatchesPrbRef(const ROMol &prbMol, const ROMol &refMol,
std::unique_ptr<RWMol> prbMolSymm;
if (symmetrizeConjugatedTerminalGroups) {
prbMolSymm.reset(new RWMol(prbMol));
symmetrizeTerminalAtoms(*prbMolSymm);
details::symmetrizeTerminalAtoms(*prbMolSymm);
}
const auto &prbMolForMatch = prbMolSymm ? *prbMolSymm : prbMol;
SubstructMatch(refMol, prbMolForMatch, matches, uniquify, recursionPossible,
Expand Down
16 changes: 12 additions & 4 deletions Code/GraphMol/MolAlign/AlignMolecules.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef std::vector<std::pair<int, int>> MatchVectType;

class Conformer;
class ROMol;
class RWMol;
namespace MolAlign {
class RDKIT_MOLALIGN_EXPORT MolAlignException : public std::exception {
public:
Expand Down Expand Up @@ -177,7 +178,8 @@ RDKIT_MOLALIGN_EXPORT double getBestRMS(
int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
const RDNumeric::DoubleVector *weights = nullptr, int numThreads = 1);

//! Returns the symmetric distance matrix between the conformers of a molecule.
//! Returns the symmetric distance matrix between the conformers of a
//! molecule.
/// getBestRMS() is used to calculate the inter-conformer distances
/*!
This function will attempt to align all permutations of matching atom
Expand Down Expand Up @@ -228,9 +230,9 @@ RDKIT_MOLALIGN_EXPORT std::vector<double> getAllConformerBestRMS(
\param maxMatches (optional) if map is empty, this will be the max number of
matches found in a SubstructMatch().
\param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
terminal functional groups (like nitro or carboxylate) will
be considered symmetrically
\param weights (optional) weights for each pair of atoms.
terminal functional groups (like nitro or carboxylate)
will be considered symmetrically \param weights (optional) weights for
each pair of atoms.
<b>Returns</b>
Best RMSD value found
Expand Down Expand Up @@ -292,6 +294,12 @@ RDKIT_MOLALIGN_EXPORT void alignMolConformers(
const std::vector<unsigned int> *confIds = nullptr,
const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
unsigned int maxIters = 50, std::vector<double> *RMSlist = nullptr);

namespace details {
//! Converts terminal atoms in groups like nitro or carboxylate to be symmetry
/// equivalent
RDKIT_MOLALIGN_EXPORT void symmetrizeTerminalAtoms(RWMol &mol);
} // namespace details
} // namespace MolAlign
} // namespace RDKit
#endif
1 change: 1 addition & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ GitHub)
- Metal atoms (really any atom which has a default valence of -1) now have their radical electron count set to zero if they form any bonds. Metal atoms/ions without bonds will continue to be assigned a radical count of either 1 or 0 if they do/do not have an odd number of valence electrons. It is not possible in a cheminformatics system to generally answer what the spin state of a metal atom should be, so we are taking a simple and easily explainable approach. If you know the spin state of your species, you can directly provide that information by calling SetNumRadicalElectrons().
- Chirality will now be perceived for three-coordinate atoms with a T-shaped coordination environment and the wedge in the stem of the T. If we are perceiving tetrahedral stereo, it's possible to interpret this unambiguously.
- Bug fixes in the v2 tautomer hash algorithm will change the output for some molecules. Look at PR #7200 for more details: https://github.com/rdkit/rdkit/pull/7200
- RMS pruning during conformer generation now symmetrizes conjugated terminal groups by default. This can be disabled with the parameter "symmetrizeConjugatedTerminalGroupsForPruning"

## New Features and Enhancements:

Expand Down

0 comments on commit 81af0db

Please sign in to comment.