Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RGD highlights as in blog post #7322

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Code/GraphMol/Depictor/RDDepictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ struct RDKIT_DEPICTOR_EXPORT ConstrainedDepictionParams {
bool forceRDKit = false;
//! if true, terminal dummy atoms in the reference are ignored
/// if they match an implicit hydrogen in the molecule or if they are
/// attached top a query atom; defaults to false
/// attached to a query atom; defaults to false
bool allowRGroups = false;
//! if false (default), a part of the molecule is hard-constrained
/// to have the same coordinates as the reference, and the rest of
Expand Down
41 changes: 21 additions & 20 deletions Code/GraphMol/RGroupDecomposition/RGroupCore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ RWMOL_SPTR RCore::extractCoreFromMolMatch(
std::vector<Bond *> newBonds;
std::map<Atom *, int> dummyAtomMap;
std::map<const Atom *, Atom *> molAtomMap;
for (auto &pair : match) {
for (const auto &pair : match) {
const auto queryAtom = core->getAtomWithIdx(pair.first);
auto const targetAtom = extractedCore->getAtomWithIdx(pair.second);
const auto targetAtom = extractedCore->getAtomWithIdx(pair.second);
if (int rLabel; queryAtom->getPropIfPresent(RLABEL, rLabel)) {
targetAtom->setProp(RLABEL, rLabel);
}
Expand Down Expand Up @@ -166,16 +166,16 @@ RWMOL_SPTR RCore::extractCoreFromMolMatch(
}
newBonds.push_back(connectingBond);

// Check to see if we are breaking a stereo bond definition, by removing one of the stereo atoms
// If so, set to the new atom
for (auto bond: extractedCore->atomBonds(targetAtom)) {
// Check to see if we are breaking a stereo bond definition, by
// removing one of the stereo atoms. If so, set to the new atom
for (auto bond : extractedCore->atomBonds(targetAtom)) {
if (bond->getIdx() == connectingBond->getIdx()) {
continue;
}

if (bond->getStereo() > Bond::STEREOANY) {
auto &stereoAtoms = bond->getStereoAtoms();
for (int& stereoAtom : stereoAtoms) {
for (int &stereoAtom : stereoAtoms) {
if (stereoAtom == static_cast<int>(targetNeighborIndex)) {
stereoAtom = static_cast<int>(newDummyIdx);
}
Expand Down Expand Up @@ -496,8 +496,7 @@ std::vector<MatchVectType> RCore::matchTerminalUserRGroups(
// Find what target atoms will bond to these dummies
std::vector<std::vector<int>> neighborDummyLists;
for (auto dummyIndex : dummyIndexes) {
const int neighborIdx =
terminalRGroupAtomToNeighbor.at(dummyIndex);
const int neighborIdx = terminalRGroupAtomToNeighbor.at(dummyIndex);
const auto coreBond = core->getBondBetweenAtoms(dummyIndex, neighborIdx);
// find the atom in the target mapped to the neighbor in the core
const int targetIdx = matchMap[neighborIdx];
Expand All @@ -506,10 +505,9 @@ std::vector<MatchVectType> RCore::matchTerminalUserRGroups(
// now look for neighbors of that target atom that are not mapped to a
// core atom- the dummy atom can potentially be mapped to each of those
for (const auto &nbrIdx :
boost::make_iterator_range(target.getAtomNeighbors(targetAtom))) {
boost::make_iterator_range(target.getAtomNeighbors(targetAtom))) {
if (!mappedTargetIdx.test(nbrIdx)) {
const auto targetBond =
target.getBondBetweenAtoms(targetIdx, nbrIdx);
const auto targetBond = target.getBondBetweenAtoms(targetIdx, nbrIdx);
// check for bond compatibility
if (bondCompat(coreBond, targetBond, sssParams)) {
available.push_back(nbrIdx);
Expand Down Expand Up @@ -757,8 +755,9 @@ bool RCore::checkAllBondsToRGroupPresent(
if (coreAtomIndices.empty()) {
continue;
}
CHECK_INVARIANT(coreAtomIndices.size() == 1,
"Target atom neighboring R group should have exactly one match in core");
CHECK_INVARIANT(
coreAtomIndices.size() == 1,
"Target atom neighboring R group should have exactly one match in core");
auto coreAtomIdx = coreAtomIndices.front();
const auto coreAtom = core->getAtomWithIdx(coreAtomIdx);
// don't need to match a non terminal user R group
Expand Down Expand Up @@ -790,7 +789,8 @@ bool RCore::checkAllBondsToRGroupPresent(
std::set<int> bondIndices;
for (const auto coreNeighborIdx : coreNeighborIndices) {
for (const auto coreRGroupIdx : coreRGroupIndices) {
const auto bond = core->getBondBetweenAtoms(coreNeighborIdx, coreRGroupIdx);
const auto bond =
core->getBondBetweenAtoms(coreNeighborIdx, coreRGroupIdx);
if (bond) {
bondIndices.insert(bond->getIdx());
}
Expand All @@ -807,17 +807,18 @@ int RCore::matchingIndexToCoreIndex(int matchingIndex) const {
return atom->getProp<int>(RLABEL_CORE_INDEX);
}

// Create tautomer query for the matching mol on demand and cache for performance
// If the tautomer query cannot be created (because we can't kekulize the query)
// then nullptr will be returned and we revert to non-tautomer match
// Create tautomer query for the matching mol on demand and cache for
// performance If the tautomer query cannot be created (because we can't
// kekulize the query) then nullptr will be returned and we revert to
// non-tautomer match
std::shared_ptr<TautomerQuery> RCore::getMatchingTautomerQuery() {
if (!checkedForTautomerQuery) {
try {
// Enumerate tautomers from a sanitized copy of the matching molecule
RWMol copy(*matchingMol);
// If the core has had rgroup labels removed when creating the matching mol
// then we need to update properties. Should a full sanitization be done?
// MolOps::sanitizeMol(*copy);
// If the core has had rgroup labels removed when creating the matching
// mol then we need to update properties. Should a full sanitization be
// done? MolOps::sanitizeMol(*copy);
copy.updatePropertyCache(false);
std::shared_ptr<TautomerQuery> tautomerQuery(
TautomerQuery::fromMol(copy));
Expand Down
83 changes: 61 additions & 22 deletions Code/GraphMol/RGroupDecomposition/RGroupData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,62 @@

namespace RDKit {

void RGroupData::mergeIntoCombinedMol(const ROMOL_SPTR &mol) {
CHECK_INVARIANT(mol, "mol must not be null");
if (!combinedMol) {
combinedMol = RWMOL_SPTR(new RWMol(*mol));
} else {
combinedMol.reset(static_cast<RWMol *>(combineMols(*combinedMol, *mol)));
single_fragment = false;
}
smiles = getSmiles();
combinedMol->setProp(common_properties::internalRgroupSmiles, smiles);
std::vector<int> incomingAtomIndices;
std::vector<int> incomingBondIndices;
mol->getPropIfPresent(common_properties::_rgroupTargetAtoms,
incomingAtomIndices);
mol->getPropIfPresent(common_properties::_rgroupTargetBonds,
incomingBondIndices);
std::vector<int> existingAtomIndices;
std::vector<int> existingBondIndices;
combinedMol->getPropIfPresent(common_properties::_rgroupTargetAtoms,
existingAtomIndices);
combinedMol->getPropIfPresent(common_properties::_rgroupTargetBonds,
existingBondIndices);
if (!incomingAtomIndices.empty()) {
existingAtomIndices.insert(
existingAtomIndices.end(),
std::make_move_iterator(incomingAtomIndices.begin()),
std::make_move_iterator(incomingAtomIndices.end()));
}
if (!incomingBondIndices.empty()) {
existingBondIndices.insert(
existingBondIndices.end(),
std::make_move_iterator(existingBondIndices.begin()),
std::make_move_iterator(existingBondIndices.end()));
}
}

std::string RGroupData::getRGroupLabel(int rlabel) {
static const std::string RPREFIX = "R";
return RPREFIX + std::to_string(rlabel);
}

const std::string &RGroupData::getCoreLabel() {
static const std::string CORE = "Core";
return CORE;
}

const std::string &RGroupData::getMolLabel() {
static const std::string MOL = "Mol";
return MOL;
}

void RGroupData::add(const ROMOL_SPTR &newMol,
const std::vector<int> &rlabel_attachments) {
// some fragments can be added multiple times if they are cyclic
if (std::any_of(mols.begin(), mols.end(), [&newMol](const auto &mol) {
return newMol == mol;
})) {
if (std::any_of(mols.begin(), mols.end(),
[&newMol](const auto &mol) { return newMol == mol; })) {
return;
}

Expand Down Expand Up @@ -52,30 +102,18 @@ void RGroupData::add(const ROMOL_SPTR &newMol,
// MCS alignment is not used (NoAlign flag)
smilesVect.push_back(std::regex_replace(MolToSmiles(*newMol, true),
remove_isotopes_regex, "*"));
if (!combinedMol.get()) {
combinedMol = boost::shared_ptr<RWMol>(new RWMol(*mols[0].get()));
} else {
ROMol *m = combineMols(*combinedMol.get(), *newMol.get());
single_fragment = false;
m->updateProps(*combinedMol.get());
combinedMol.reset(new RWMol(*m));
delete m;
}
smiles = getSmiles();
combinedMol->setProp(common_properties::internalRgroupSmiles, smiles);
mergeIntoCombinedMol(newMol);
computeIsHydrogen();
is_linker = single_fragment && attachments.size() > 1;
}

std::map<int, int> RGroupData::getNumBondsToRlabels() const {
std::map<int, int> rlabelsUsedCount;

for (ROMol::AtomIterator atIt = combinedMol->beginAtoms();
atIt != combinedMol->endAtoms(); ++atIt) {
Atom *atom = *atIt;
for (const auto atom : combinedMol->atoms()) {
int rlabel;
if (atom->getPropIfPresent<int>(RLABEL, rlabel)) {
rlabelsUsedCount[rlabel] += 1;
++rlabelsUsedCount[rlabel];
}
}
return rlabelsUsedCount;
Expand All @@ -94,15 +132,16 @@ std::string RGroupData::toString() const {
}

void RGroupData::computeIsHydrogen() { // is the rgroup all Hs
is_hydrogen = std::all_of(mols.begin(), mols.end(), [this](const auto &mol) {
return isMolHydrogen(*mol);
is_hydrogen = std::all_of(mols.begin(), mols.end(), [](const auto &mol) {
return RGroupData::isMolHydrogen(*mol);
});
}

bool RGroupData::isMolHydrogen(const ROMol &mol) const {
bool RGroupData::isMolHydrogen(const ROMol &mol) {
auto atoms = mol.atoms();
return std::all_of(atoms.begin(), atoms.end(), [](const auto &atom) {
return (atom->getAtomicNum() == 1 || (atom->getAtomicNum() == 0 && atom->hasProp(SIDECHAIN_RLABELS)));
return (atom->getAtomicNum() == 1 ||
(atom->getAtomicNum() == 0 && atom->hasProp(SIDECHAIN_RLABELS)));
});
}

Expand Down
14 changes: 10 additions & 4 deletions Code/GraphMol/RGroupDecomposition/RGroupData.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace RDKit {
//! A single rgroup attached to a given core.
struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupData {
RWMOL_SPTR combinedMol;
std::vector<ROMOL_SPTR> mols; // All the mols in the rgroup
std::vector<std::string> smilesVect; // used for rgroup equivalence
std::vector<ROMOL_SPTR> mols; // All the mols in the rgroup
std::vector<std::string> smilesVect; // used for rgroup equivalence
std::string
smiles; // smiles for all the mols in the rgroup (with attachments)
std::set<int> attachments; // core attachment points
Expand All @@ -43,15 +43,21 @@ struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupData {
std::map<int, int> getNumBondsToRlabels() const;

std::string toString() const;
static std::string getRGroupLabel(int rlabel);
static const std::string &getCoreLabel();
static const std::string &getMolLabel();
static bool isMolHydrogen(const ROMol &mol);

private:
void computeIsHydrogen();

bool isMolHydrogen(const ROMol &mol) const;

//! compute the canonical smiles for the attachments (bug: removes dupes since
//! we are using a set...)
std::string getSmiles() const;
//! merge mol into combinedMol, including atom and bond highlights if present
void mergeIntoCombinedMol(const ROMOL_SPTR &mol);
std::map<int, std::vector<int>> rlabelAtomIndicesMap;
std::map<int, std::vector<int>> rlabelBondIndicesMap;
};
} // namespace RDKit

Expand Down