Skip to content

Commit

Permalink
No coords atropisomers - fix smiles output of atrop wedges after reor…
Browse files Browse the repository at this point in the history
…dering (#7418)

* removed string_view in favor of string for catch test

* add parsing and generation of atropisomers when coords not present

* changed string_view to string in catch test

* more docs

* reformulation of the docs

* make an error message a little bit more useful

* small optimization
clang-format

* add `BondWedgingParameters` to new function

* changes for CIP test errors

* Updated internal doc to match what it does

* changes per PR review

* removed cout statements in tests

---------

Co-authored-by: Greg Landrum <greg.landrum@gmail.com>
  • Loading branch information
tadhurst-cdd and greglandrum committed May 7, 2024
1 parent f48c874 commit a2b149a
Show file tree
Hide file tree
Showing 20 changed files with 700 additions and 101 deletions.
361 changes: 319 additions & 42 deletions Code/GraphMol/Atropisomers.cpp

Large diffs are not rendered by default.

97 changes: 97 additions & 0 deletions Code/GraphMol/CIPLabeler/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/MarvinParse/MarvinParser.h>
#include <GraphMol/test_fixtures.h>
#include <GraphMol/Substruct/SubstructMatch.h>

#include "CIPLabeler.h"
#include "Digraph.h"
Expand Down Expand Up @@ -840,3 +841,99 @@ TEST_CASE("upsertTest", "[basic]") {
CHECK(smilesOut != "");
}
}

TEST_CASE("atropisomers", "[basic]") {
SECTION("atropisomers1") {
UseLegacyStereoPerceptionFixture useLegacy(false);

std::string rdbase = getenv("RDBASE");

std::vector<std::string> files = {
"BMS-986142_atrop5.sdf", "BMS-986142_atrop1.sdf",
"BMS-986142_atrop2.sdf", "JDQ443_atrop1.sdf",
"JDQ443_atrop2.sdf", "Mrtx1719_atrop1.sdf",
"Mrtx1719_atrop2.sdf", "RP-6306_atrop1.sdf",
"RP-6306_atrop2.sdf", "Sotorasib_atrop1.sdf",
"Sotorasib_atrop2.sdf", "ZM374979_atrop1.sdf",
"ZM374979_atrop2.sdf"};

for (auto file : files) {
auto fName =
rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + file;

auto molsdf = MolFileToMol(fName, true, true, true);
REQUIRE(molsdf);
CIPLabeler::assignCIPLabels(*molsdf, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> CIPVals;
for (auto bond : molsdf->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

if (bond->hasProp(common_properties::_CIPCode)) {
CIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
CIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

molsdf->clearConformers();

SmilesWriteParams wp;
wp.canonical = false;
wp.doKekule = false;
unsigned int flags =
RDKit::SmilesWrite::CXSmilesFields::CX_MOLFILE_VALUES |
RDKit::SmilesWrite::CXSmilesFields::CX_ATOM_PROPS |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_CFG |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER;
SmilesParserParams pp;
pp.allowCXSMILES = true;
pp.sanitize = true;

auto smi = MolToCXSmiles(*molsdf, wp, flags);
auto newMol = SmilesToMol(smi, pp);
CIPLabeler::assignCIPLabels(*newMol, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> newCIPVals;
for (auto bond : newMol->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}
if (bond->hasProp(common_properties::_CIPCode)) {
newCIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
newCIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

RDKit::SubstructMatchParameters params;

auto match = RDKit::SubstructMatch(*molsdf, *newMol, params);

for (auto thisBond : newMol->bonds()) {
unsigned int a1 = thisBond->getBeginAtomIdx();
unsigned int a2 = thisBond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

unsigned int a1match = match[0][a1].second;
unsigned int a2match = match[0][a2].second;

if (a1match > a2match) {
std::swap(a1match, a2match);
}
CHECK(CIPVals[std::make_pair(a1match, a2match)] ==
newCIPVals[std::make_pair(a1, a2)]);
}
}
}
}
8 changes: 6 additions & 2 deletions Code/GraphMol/Chirality.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,12 @@ RDKIT_GRAPHMOL_EXPORT void setStereoForBond(ROMol &mol, Bond *bond,
/// returns a map from bond idx -> controlling atom idx
RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params = nullptr,
const Conformer *conf = nullptr);
const ROMol &mol, const BondWedgingParameters *params = nullptr);

RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params,
const Conformer *conf);

RDKIT_GRAPHMOL_EXPORT void wedgeMolBonds(
ROMol &mol, const Conformer *conf = nullptr,
Expand Down
2 changes: 2 additions & 0 deletions Code/GraphMol/FileParsers/CDXMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,8 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(

Atropisomers::detectAtropisomerChirality(
*res, &res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}

// now that atom stereochem has been perceived, the wedging
Expand Down
2 changes: 1 addition & 1 deletion Code/GraphMol/FileParsers/MolFileStereochem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ void invertMolBlockWedgingInfo(ROMol &mol) {
}

void markUnspecifiedStereoAsUnknown(ROMol &mol, int confId) {
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol);
const auto conf = mol.getConformer(confId);
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol, nullptr, &conf);
for (auto b : mol.bonds()) {
if (b->getBondType() == Bond::DOUBLE) {
int dirCode;
Expand Down
30 changes: 15 additions & 15 deletions Code/GraphMol/FileParsers/file_parsers_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <fstream>
#include <string>
#include <sstream>
#include <string_view>
// #include <string_view>
#include <streambuf>

#include "RDGeneral/test.h"
Expand Down Expand Up @@ -1618,7 +1618,6 @@ H 0.635000 0.635000 0.635000
mol->addConformer(conf);

const std::string xyzblock = MolToXYZBlock(*mol, 0, 15);
std::cout << xyzblock << std::endl;
std::string xyzblock_expected = R"XYZ(7
CHEMBL506259
O 0.402012650000000 -0.132994360000000 1.000000170000000
Expand Down Expand Up @@ -5074,7 +5073,8 @@ M END
Chirality::reapplyMolBlockWedging(*m);
CHECK(m->getBondWithIdx(2)->getBondDir() == Bond::BondDir::NONE);
}
SECTION("Reapply the original wedging, regardless the bond type of wedged bonds") {
SECTION(
"Reapply the original wedging, regardless the bond type of wedged bonds") {
auto m = R"CTAB(
Mrv2311 04232413302D
Expand Down Expand Up @@ -6041,10 +6041,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6173,10 +6173,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6258,7 +6258,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
// Maeparser does not parse bond properties, so don't check them.
}
SECTION("Check reverse roundtrip") {
constexpr std::string_view maeBlock = R"MAE(f_m_ct {
std::string maeBlock = R"MAE(f_m_ct {
s_m_title
:::
""
Expand Down Expand Up @@ -6348,7 +6348,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto ctBlockStart = mae.find("f_m_ct");
REQUIRE(ctBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart]);
std::string ctBlock(&mae[ctBlockStart]);

CHECK(ctBlock == MaeWriter::getText(*mol));
}
Expand Down Expand Up @@ -6616,8 +6616,8 @@ TEST_CASE(
auto bondBlockStart = mae.find("m_bond[2]");
REQUIRE(bondBlockStart != std::string::npos);

const std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
const std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);

CHECK(atomBlock.find(" -2 ") != std::string::npos);
}
Expand Down
9 changes: 6 additions & 3 deletions Code/GraphMol/MarvinParse/MarvinParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,9 +611,12 @@ class MarvinCMLReader {
MolOps::assignChiralTypesFrom3D(*mol, conf3d->getId(), true);
}

if (conf || conf3d) {
Atropisomers::detectAtropisomerChirality(
*mol, conf != nullptr ? conf : conf3d);
if (conf) {
Atropisomers::detectAtropisomerChirality(*mol, conf);
} else if (conf3d) {
Atropisomers::detectAtropisomerChirality(*mol, conf3d);
} else {
Atropisomers::detectAtropisomerChirality(*mol, nullptr);
}

ClearSingleBondDirFlags(*mol);
Expand Down
107 changes: 92 additions & 15 deletions Code/GraphMol/SmilesParse/CXSmilesOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,9 @@ bool parse_wedged_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
if (atom->getIdx() != bond->getEndAtomIdx()) {
BOOST_LOG(rdWarningLog)
<< "atom " << atomIdx << " is not associated with bond "
<< bondIdx << " in w block" << std::endl;
<< bondIdx << "(" << bond->getBeginAtomIdx() + startAtomIdx << "-"
<< bond->getEndAtomIdx() + startAtomIdx << ")"
<< " in w block" << std::endl;
return false;
}
auto eidx = bond->getBeginAtomIdx();
Expand Down Expand Up @@ -2038,23 +2040,96 @@ std::string get_bond_config_block(
bd = Bond::BondDir::NONE;
}

if (atropisomerOnly) {
// on of the bonds on the beging atom of this bond must be an atropisomer
if (atropisomerOnly && bd == Bond::BondDir::NONE) {
continue;
}

if (bd == Bond::BondDir::NONE) {
continue;
}
bool foundAtropisomer = false;
// see if this one is an atropisomer

bool isAnAtropisomer = false;

const Atom *firstAtom = bond->getBeginAtom();
const Atom *firstAtom = bond->getBeginAtom();
if (bd == Bond::BondDir::BEGINDASH || bd == Bond::BondDir::BEGINWEDGE) {
for (auto bondNbr : mol.atomBonds(firstAtom)) {
if (bondNbr->getIdx() == bond->getIdx()) {
continue; // a bond is not its own neighbor
}
if (bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCW ||
bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCCW) {
foundAtropisomer = true;
isAnAtropisomer = true;

// if it is for an atropisomer and there are no coords, check to see
// if the wedge needs to be flipped based on the smiles reordering
if (!coordsIncluded && isAnAtropisomer) {
Atropisomers::AtropAtomAndBondVec atomAndBondVecs[2];
if (!Atropisomers::getAtropisomerAtomsAndBonds(
bondNbr, atomAndBondVecs, mol)) {
throw ValueErrorException("Internal error - should not occur");
// should not happend
} else {
unsigned int swaps = 0;

unsigned int firstReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getBeginAtom()->getIdx()) -
atomOrder.begin();
unsigned int secondReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getEndAtom()->getIdx()) -
atomOrder.begin();
if (firstReorderedIdx > secondReorderedIdx) {
++swaps;
}

for (unsigned int bondAtomIndex = 0; bondAtomIndex < 2;
++bondAtomIndex) {
if (atomAndBondVecs[bondAtomIndex].first == firstAtom)
continue; // swapped atoms on the side where the wedge bond
// is does NOT change the wedge bond
if (atomAndBondVecs[bondAtomIndex].second.size() == 2) {
unsigned int firstOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[0]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();
unsigned int secondOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[1]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();

unsigned int firstReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
firstOtherAtomIdx) -
atomOrder.begin();
unsigned int secondReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
secondOtherAtomIdx) -
atomOrder.begin();

if (firstReorderedAtomIdx > secondReorderedAtomIdx) {
++swaps;
}
}
}
if (swaps % 2) {
bd = (bd == Bond::BondDir::BEGINWEDGE)
? Bond::BondDir::BEGINDASH
: Bond::BondDir::BEGINWEDGE;
}
}
}

break;
}
}
if (!foundAtropisomer) {
}

if (atropisomerOnly) {
// one of the bonds on the beginning atom of this bond must be an
// atropisomer

if (!isAnAtropisomer) {
continue;
}
} else { // atropisomeronly is FALSE - check for a wedging caused by
Expand Down Expand Up @@ -2109,8 +2184,9 @@ std::string get_bond_config_block(
std::string wType = "";
if (bd == Bond::BondDir::UNKNOWN) {
wType = "w";
} else if (coordsIncluded) {
} else if (coordsIncluded || isAnAtropisomer) {
// we only do wedgeUp and wedgeDown if coordinates are being output
// or its an atropisomer
if (bd == Bond::BondDir::BEGINWEDGE) {
wType = "wU";
} else if (bd == Bond::BondDir::BEGINDASH) {
Expand Down Expand Up @@ -2354,12 +2430,13 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
// do the CX_BOND_ATROPISOMER only if CX_BOND_CFG s not done. CX_BOND_CFG
// includes the atropisomer wedging
else if (flags & SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER) {
if (conf) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
}

bool includeCoords = flags & SmilesWrite::CXSmilesFields::CX_COORDS &&
mol.getNumConformers();
if (includeCoords) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
} else {
Atropisomers::wedgeBondsFromAtropisomers(mol, nullptr, wedgeBonds);
}
const auto cfgblock = get_bond_config_block(
mol, atomOrder, bondOrder, includeCoords, wedgeBonds, true);
appendToCXExtension(cfgblock, res);
Expand Down

0 comments on commit a2b149a

Please sign in to comment.