Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No coords atropisomers - fix smiles output of atrop wedges after reordering #7418

Merged
merged 17 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
330 changes: 292 additions & 38 deletions Code/GraphMol/Atropisomers.cpp

Large diffs are not rendered by default.

107 changes: 107 additions & 0 deletions Code/GraphMol/CIPLabeler/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/MarvinParse/MarvinParser.h>
#include <GraphMol/test_fixtures.h>
#include <GraphMol/Substruct/SubstructMatch.h>

#include "CIPLabeler.h"
#include "Digraph.h"
Expand Down Expand Up @@ -840,3 +841,109 @@ TEST_CASE("upsertTest", "[basic]") {
CHECK(smilesOut != "");
}
}

TEST_CASE("atropisomers", "[basic]") {
SECTION("atropisomers1") {
UseLegacyStereoPerceptionFixture useLegacy(false);

std::string rdbase = getenv("RDBASE");

std::vector<std::string> files = {
"BMS-986142_atrop5.sdf", "BMS-986142_atrop1.sdf",
"BMS-986142_atrop2.sdf", "JDQ443_atrop1.sdf",
"JDQ443_atrop2.sdf", "Mrtx1719_atrop1.sdf",
"Mrtx1719_atrop2.sdf", "RP-6306_atrop1.sdf",
"RP-6306_atrop2.sdf", "Sotorasib_atrop1.sdf",
"Sotorasib_atrop2.sdf", "ZM374979_atrop1.sdf",
"ZM374979_atrop2.sdf"};

for (auto file : files) {
std::cout << "File: " << file << std::endl;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't include output like this in the tests, it makes the running the tests really noisy and doesn't add anything. If there's an informative message that you want to output when a test fails, you can do that with catch2's INFO() macro.

I know that I approved several older PRs that also have a large amount of output, but I plan to do a PR to clean that up in the not-too-distant future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You fixed that one example, but there are a number of other outputs to std::cout in that file and in the other tests added for this PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I got them all - probably one I didn't put in.

auto fName =
rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + file;

auto molsdf = MolFileToMol(fName, true, true, true);
REQUIRE(molsdf);
CIPLabeler::assignCIPLabels(*molsdf, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> CIPVals;
for (auto bond : molsdf->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

if (bond->hasProp(common_properties::_CIPCode)) {
CIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
CIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

molsdf->clearConformers();

SmilesWriteParams wp;
wp.canonical = false;
wp.doKekule = false;
unsigned int flags =
RDKit::SmilesWrite::CXSmilesFields::CX_MOLFILE_VALUES |
RDKit::SmilesWrite::CXSmilesFields::CX_ATOM_PROPS |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_CFG |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER;
SmilesParserParams pp;
pp.allowCXSMILES = true;
pp.sanitize = true;

auto smi = MolToCXSmiles(*molsdf, wp, flags);
auto newMol = SmilesToMol(smi, pp);
CIPLabeler::assignCIPLabels(*newMol, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> newCIPVals;
for (auto bond : newMol->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}
if (bond->hasProp(common_properties::_CIPCode)) {
newCIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
newCIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

RDKit::SubstructMatchParameters params;

auto match = RDKit::SubstructMatch(*molsdf, *newMol, params);

for (auto thisBond : newMol->bonds()) {
unsigned int a1 = thisBond->getBeginAtomIdx();
unsigned int a2 = thisBond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

unsigned int a1match = match[0][a1].second;
unsigned int a2match = match[0][a2].second;

if (a1match > a2match) {
std::swap(a1match, a2match);
}
if (CIPVals[std::make_pair(a1match, a2match)] !=
newCIPVals[std::make_pair(a1, a2)]) {
std::cout << "Bond " << thisBond->getBeginAtomIdx() << " - "
<< thisBond->getEndAtomIdx() << " has changed from "
<< CIPVals[std::make_pair(a1match, a2match)] << " to "
<< newCIPVals[std::make_pair(a1, a2)] << std::endl;
}
CHECK(CIPVals[std::make_pair(a1match, a2match)] ==
newCIPVals[std::make_pair(a1, a2)]);
}

std::cout << "Done" << std::endl;
}
}
}
8 changes: 6 additions & 2 deletions Code/GraphMol/Chirality.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,12 @@ RDKIT_GRAPHMOL_EXPORT void setStereoForBond(ROMol &mol, Bond *bond,
/// returns a map from bond idx -> controlling atom idx
RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params = nullptr,
const Conformer *conf = nullptr);
const ROMol &mol, const BondWedgingParameters *params = nullptr);

RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params,
const Conformer *conf);

RDKIT_GRAPHMOL_EXPORT void wedgeMolBonds(
ROMol &mol, const Conformer *conf = nullptr,
Expand Down
2 changes: 2 additions & 0 deletions Code/GraphMol/FileParsers/CDXMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,8 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(

Atropisomers::detectAtropisomerChirality(
*res, &res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}

// now that atom stereochem has been perceived, the wedging
Expand Down
2 changes: 1 addition & 1 deletion Code/GraphMol/FileParsers/MolFileStereochem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ void invertMolBlockWedgingInfo(ROMol &mol) {
}

void markUnspecifiedStereoAsUnknown(ROMol &mol, int confId) {
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol);
const auto conf = mol.getConformer(confId);
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol, nullptr, &conf);
for (auto b : mol.bonds()) {
if (b->getBondType() == Bond::DOUBLE) {
int dirCode;
Expand Down
26 changes: 13 additions & 13 deletions Code/GraphMol/FileParsers/file_parsers_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <fstream>
#include <string>
#include <sstream>
#include <string_view>
// #include <string_view>
#include <streambuf>

#include "RDGeneral/test.h"
Expand Down Expand Up @@ -6010,10 +6010,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6142,10 +6142,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6227,7 +6227,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
// Maeparser does not parse bond properties, so don't check them.
}
SECTION("Check reverse roundtrip") {
constexpr std::string_view maeBlock = R"MAE(f_m_ct {
std::string maeBlock = R"MAE(f_m_ct {
s_m_title
:::
""
Expand Down Expand Up @@ -6317,7 +6317,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto ctBlockStart = mae.find("f_m_ct");
REQUIRE(ctBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart]);
std::string ctBlock(&mae[ctBlockStart]);

CHECK(ctBlock == MaeWriter::getText(*mol));
}
Expand Down Expand Up @@ -6585,8 +6585,8 @@ TEST_CASE(
auto bondBlockStart = mae.find("m_bond[2]");
REQUIRE(bondBlockStart != std::string::npos);

const std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
const std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);

CHECK(atomBlock.find(" -2 ") != std::string::npos);
}
Expand Down
9 changes: 6 additions & 3 deletions Code/GraphMol/MarvinParse/MarvinParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,9 +611,12 @@ class MarvinCMLReader {
MolOps::assignChiralTypesFrom3D(*mol, conf3d->getId(), true);
}

if (conf || conf3d) {
Atropisomers::detectAtropisomerChirality(
*mol, conf != nullptr ? conf : conf3d);
if (conf) {
Atropisomers::detectAtropisomerChirality(*mol, conf);
} else if (conf3d) {
Atropisomers::detectAtropisomerChirality(*mol, conf3d);
} else {
Atropisomers::detectAtropisomerChirality(*mol, nullptr);
}

ClearSingleBondDirFlags(*mol);
Expand Down
108 changes: 93 additions & 15 deletions Code/GraphMol/SmilesParse/CXSmilesOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,9 @@ bool parse_wedged_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
if (atom->getIdx() != bond->getEndAtomIdx()) {
BOOST_LOG(rdWarningLog)
<< "atom " << atomIdx << " is not associated with bond "
<< bondIdx << " in w block" << std::endl;
<< bondIdx << "(" << bond->getBeginAtomIdx() + startAtomIdx << "-"
<< bond->getEndAtomIdx() + startAtomIdx << ")"
<< " in w block" << std::endl;
return false;
}
auto eidx = bond->getBeginAtomIdx();
Expand Down Expand Up @@ -2038,23 +2040,97 @@ std::string get_bond_config_block(
bd = Bond::BondDir::NONE;
}

if (atropisomerOnly) {
// on of the bonds on the beging atom of this bond must be an atropisomer
if (atropisomerOnly && bd == Bond::BondDir::NONE) {
continue;
}

if (bd == Bond::BondDir::NONE) {
continue;
}
bool foundAtropisomer = false;
// see if this one is an atropisomer

bool isAnAtropisomer = false;

const Atom *firstAtom = bond->getBeginAtom();
const Atom *firstAtom = bond->getBeginAtom();
if (bd == Bond::BondDir::BEGINDASH || bd == Bond::BondDir::BEGINWEDGE) {
for (auto bondNbr : mol.atomBonds(firstAtom)) {
if (bondNbr->getIdx() == bond->getIdx()) {
continue; // a bond is not its own neighbor
}
if (bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCW ||
bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCCW) {
foundAtropisomer = true;
isAnAtropisomer = true;

// if it is for an atropisomer and there are no coords, check to see
// if the wedge needs to be flipped based on the smiles reordering
if (!coordsIncluded && isAnAtropisomer) {
Atropisomers::AtropAtomAndBondVec atomAndBondVecs[2];
if (!Atropisomers::getAtropisomerAtomsAndBonds(
bondNbr, atomAndBondVecs, mol)) {
throw RDKit::SmilesParseException(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be a SmilesParseException since we aren't parsing SMILES at this point.
This should either be a ValueError if it's something which could happen but shouldn't or handled using the RDKit's CHECK_INVARIANT() if this only happens in the case of an RDKit bug.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to value error

"Internal error - should not occur");
// should not happend
} else {
unsigned int swaps = 0;

unsigned int firstReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getBeginAtom()->getIdx()) -
atomOrder.begin();
unsigned int secondReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getEndAtom()->getIdx()) -
atomOrder.begin();
if (firstReorderedIdx > secondReorderedIdx) {
++swaps;
}

for (unsigned int bondAtomIndex = 0; bondAtomIndex < 2;
++bondAtomIndex) {
if (atomAndBondVecs[bondAtomIndex].first == firstAtom)
continue; // swapped atoms on the side where the wedge bond
// is does NOT change the wedge bond
if (atomAndBondVecs[bondAtomIndex].second.size() == 2) {
unsigned int firstOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[0]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();
unsigned int secondOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[1]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();

unsigned int firstReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
firstOtherAtomIdx) -
atomOrder.begin();
unsigned int secondReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
secondOtherAtomIdx) -
atomOrder.begin();

if (firstReorderedAtomIdx > secondReorderedAtomIdx) {
++swaps;
}
}
}
if (swaps % 2) {
bd = (bd == Bond::BondDir::BEGINWEDGE)
? Bond::BondDir::BEGINDASH
: Bond::BondDir::BEGINWEDGE;
}
}
}

break;
}
}
if (!foundAtropisomer) {
}

if (atropisomerOnly) {
// one of the bonds on the beginning atom of this bond must be an
// atropisomer

if (!isAnAtropisomer) {
continue;
}
} else { // atropisomeronly is FALSE - check for a wedging caused by
Expand Down Expand Up @@ -2109,8 +2185,9 @@ std::string get_bond_config_block(
std::string wType = "";
if (bd == Bond::BondDir::UNKNOWN) {
wType = "w";
} else if (coordsIncluded) {
} else if (coordsIncluded || isAnAtropisomer) {
// we only do wedgeUp and wedgeDown if coordinates are being output
// or its an atropisomer
if (bd == Bond::BondDir::BEGINWEDGE) {
wType = "wU";
} else if (bd == Bond::BondDir::BEGINDASH) {
Expand Down Expand Up @@ -2354,12 +2431,13 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
// do the CX_BOND_ATROPISOMER only if CX_BOND_CFG s not done. CX_BOND_CFG
// includes the atropisomer wedging
else if (flags & SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER) {
if (conf) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
}

bool includeCoords = flags & SmilesWrite::CXSmilesFields::CX_COORDS &&
mol.getNumConformers();
if (includeCoords) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
} else {
Atropisomers::wedgeBondsFromAtropisomers(mol, nullptr, wedgeBonds);
}
const auto cfgblock = get_bond_config_block(
mol, atomOrder, bondOrder, includeCoords, wedgeBonds, true);
appendToCXExtension(cfgblock, res);
Expand Down
9 changes: 7 additions & 2 deletions Code/GraphMol/SmilesParse/SmilesParse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,13 @@ std::unique_ptr<RWMol> MolFromSmiles(const std::string &smiles,
res->updatePropertyCache(false);
MolOps::assignChiralTypesFrom3D(*res, conf3d->getId(), true);
}
if (conf || conf3d) {
Atropisomers::detectAtropisomerChirality(*res, conf ? conf : conf3d);

if (conf) {
Atropisomers::detectAtropisomerChirality(*res, conf);
} else if (conf3d) {
Atropisomers::detectAtropisomerChirality(*res, conf3d);
} else {
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}

if (res && (params.sanitize || params.removeHs)) {
Expand Down