Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No coords atropisomers - fix smiles output of atrop wedges after reordering #7418

Merged
merged 17 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
361 changes: 319 additions & 42 deletions Code/GraphMol/Atropisomers.cpp

Large diffs are not rendered by default.

97 changes: 97 additions & 0 deletions Code/GraphMol/CIPLabeler/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/MarvinParse/MarvinParser.h>
#include <GraphMol/test_fixtures.h>
#include <GraphMol/Substruct/SubstructMatch.h>

#include "CIPLabeler.h"
#include "Digraph.h"
Expand Down Expand Up @@ -840,3 +841,99 @@ TEST_CASE("upsertTest", "[basic]") {
CHECK(smilesOut != "");
}
}

TEST_CASE("atropisomers", "[basic]") {
SECTION("atropisomers1") {
UseLegacyStereoPerceptionFixture useLegacy(false);

std::string rdbase = getenv("RDBASE");

std::vector<std::string> files = {
"BMS-986142_atrop5.sdf", "BMS-986142_atrop1.sdf",
"BMS-986142_atrop2.sdf", "JDQ443_atrop1.sdf",
"JDQ443_atrop2.sdf", "Mrtx1719_atrop1.sdf",
"Mrtx1719_atrop2.sdf", "RP-6306_atrop1.sdf",
"RP-6306_atrop2.sdf", "Sotorasib_atrop1.sdf",
"Sotorasib_atrop2.sdf", "ZM374979_atrop1.sdf",
"ZM374979_atrop2.sdf"};

for (auto file : files) {
auto fName =
rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + file;

auto molsdf = MolFileToMol(fName, true, true, true);
REQUIRE(molsdf);
CIPLabeler::assignCIPLabels(*molsdf, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> CIPVals;
for (auto bond : molsdf->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

if (bond->hasProp(common_properties::_CIPCode)) {
CIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
CIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

molsdf->clearConformers();

SmilesWriteParams wp;
wp.canonical = false;
wp.doKekule = false;
unsigned int flags =
RDKit::SmilesWrite::CXSmilesFields::CX_MOLFILE_VALUES |
RDKit::SmilesWrite::CXSmilesFields::CX_ATOM_PROPS |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_CFG |
RDKit::SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER;
SmilesParserParams pp;
pp.allowCXSMILES = true;
pp.sanitize = true;

auto smi = MolToCXSmiles(*molsdf, wp, flags);
auto newMol = SmilesToMol(smi, pp);
CIPLabeler::assignCIPLabels(*newMol, 100000);

std::map<std::pair<unsigned int, unsigned int>, std::string> newCIPVals;
for (auto bond : newMol->bonds()) {
auto a1 = bond->getBeginAtomIdx();
auto a2 = bond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}
if (bond->hasProp(common_properties::_CIPCode)) {
newCIPVals[std::make_pair(a1, a2)] =
bond->getProp<std::string>(common_properties::_CIPCode);
} else {
newCIPVals[std::make_pair(a1, a2)] = "N/A";
}
}

RDKit::SubstructMatchParameters params;

auto match = RDKit::SubstructMatch(*molsdf, *newMol, params);

for (auto thisBond : newMol->bonds()) {
unsigned int a1 = thisBond->getBeginAtomIdx();
unsigned int a2 = thisBond->getEndAtomIdx();
if (a1 > a2) {
std::swap(a1, a2);
}

unsigned int a1match = match[0][a1].second;
unsigned int a2match = match[0][a2].second;

if (a1match > a2match) {
std::swap(a1match, a2match);
}
CHECK(CIPVals[std::make_pair(a1match, a2match)] ==
newCIPVals[std::make_pair(a1, a2)]);
}
}
}
}
8 changes: 6 additions & 2 deletions Code/GraphMol/Chirality.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,12 @@ RDKIT_GRAPHMOL_EXPORT void setStereoForBond(ROMol &mol, Bond *bond,
/// returns a map from bond idx -> controlling atom idx
RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params = nullptr,
const Conformer *conf = nullptr);
const ROMol &mol, const BondWedgingParameters *params = nullptr);

RDKIT_GRAPHMOL_EXPORT
std::map<int, std::unique_ptr<Chirality::WedgeInfoBase>> pickBondsToWedge(
const ROMol &mol, const BondWedgingParameters *params,
const Conformer *conf);

RDKIT_GRAPHMOL_EXPORT void wedgeMolBonds(
ROMol &mol, const Conformer *conf = nullptr,
Expand Down
2 changes: 2 additions & 0 deletions Code/GraphMol/FileParsers/CDXMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,8 @@ std::vector<std::unique_ptr<RWMol>> MolsFromCDXMLDataStream(

Atropisomers::detectAtropisomerChirality(
*res, &res->getConformer(confidx));
} else { // no Conformer
Atropisomers::detectAtropisomerChirality(*res, nullptr);
}

// now that atom stereochem has been perceived, the wedging
Expand Down
2 changes: 1 addition & 1 deletion Code/GraphMol/FileParsers/MolFileStereochem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ void invertMolBlockWedgingInfo(ROMol &mol) {
}

void markUnspecifiedStereoAsUnknown(ROMol &mol, int confId) {
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol);
const auto conf = mol.getConformer(confId);
auto wedgeBonds = RDKit::Chirality::pickBondsToWedge(mol, nullptr, &conf);
for (auto b : mol.bonds()) {
if (b->getBondType() == Bond::DOUBLE) {
int dirCode;
Expand Down
30 changes: 15 additions & 15 deletions Code/GraphMol/FileParsers/file_parsers_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <fstream>
#include <string>
#include <sstream>
#include <string_view>
// #include <string_view>
#include <streambuf>

#include "RDGeneral/test.h"
Expand Down Expand Up @@ -1618,7 +1618,6 @@ H 0.635000 0.635000 0.635000
mol->addConformer(conf);

const std::string xyzblock = MolToXYZBlock(*mol, 0, 15);
std::cout << xyzblock << std::endl;
std::string xyzblock_expected = R"XYZ(7
CHEMBL506259
O 0.402012650000000 -0.132994360000000 1.000000170000000
Expand Down Expand Up @@ -5074,7 +5073,8 @@ M END
Chirality::reapplyMolBlockWedging(*m);
CHECK(m->getBondWithIdx(2)->getBondDir() == Bond::BondDir::NONE);
}
SECTION("Reapply the original wedging, regardless the bond type of wedged bonds") {
SECTION(
"Reapply the original wedging, regardless the bond type of wedged bonds") {
auto m = R"CTAB(
Mrv2311 04232413302D

Expand Down Expand Up @@ -6041,10 +6041,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6173,10 +6173,10 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto bondBlockStart = mae.find("m_bond[6]");
REQUIRE(bondBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string_view bondBlock(&mae[bondBlockStart]);
std::string ctBlock(&mae[ctBlockStart], atomBlockStart - ctBlockStart);
std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
std::string bondBlock(&mae[bondBlockStart]);

// Check mol properties
CHECK(ctBlock.find("s_m_title") != std::string::npos);
Expand Down Expand Up @@ -6258,7 +6258,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
// Maeparser does not parse bond properties, so don't check them.
}
SECTION("Check reverse roundtrip") {
constexpr std::string_view maeBlock = R"MAE(f_m_ct {
std::string maeBlock = R"MAE(f_m_ct {
s_m_title
:::
""
Expand Down Expand Up @@ -6348,7 +6348,7 @@ TEST_CASE("MaeWriter basic testing", "[mae][MaeWriter][writer]") {
auto ctBlockStart = mae.find("f_m_ct");
REQUIRE(ctBlockStart != std::string::npos);

std::string_view ctBlock(&mae[ctBlockStart]);
std::string ctBlock(&mae[ctBlockStart]);

CHECK(ctBlock == MaeWriter::getText(*mol));
}
Expand Down Expand Up @@ -6616,8 +6616,8 @@ TEST_CASE(
auto bondBlockStart = mae.find("m_bond[2]");
REQUIRE(bondBlockStart != std::string::npos);

const std::string_view atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);
const std::string atomBlock(&mae[atomBlockStart],
bondBlockStart - atomBlockStart);

CHECK(atomBlock.find(" -2 ") != std::string::npos);
}
Expand Down
9 changes: 6 additions & 3 deletions Code/GraphMol/MarvinParse/MarvinParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,9 +611,12 @@ class MarvinCMLReader {
MolOps::assignChiralTypesFrom3D(*mol, conf3d->getId(), true);
}

if (conf || conf3d) {
Atropisomers::detectAtropisomerChirality(
*mol, conf != nullptr ? conf : conf3d);
if (conf) {
Atropisomers::detectAtropisomerChirality(*mol, conf);
} else if (conf3d) {
Atropisomers::detectAtropisomerChirality(*mol, conf3d);
} else {
Atropisomers::detectAtropisomerChirality(*mol, nullptr);
}

ClearSingleBondDirFlags(*mol);
Expand Down
107 changes: 92 additions & 15 deletions Code/GraphMol/SmilesParse/CXSmilesOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,9 @@ bool parse_wedged_bonds(Iterator &first, Iterator last, RDKit::RWMol &mol,
if (atom->getIdx() != bond->getEndAtomIdx()) {
BOOST_LOG(rdWarningLog)
<< "atom " << atomIdx << " is not associated with bond "
<< bondIdx << " in w block" << std::endl;
<< bondIdx << "(" << bond->getBeginAtomIdx() + startAtomIdx << "-"
<< bond->getEndAtomIdx() + startAtomIdx << ")"
<< " in w block" << std::endl;
return false;
}
auto eidx = bond->getBeginAtomIdx();
Expand Down Expand Up @@ -2038,23 +2040,96 @@ std::string get_bond_config_block(
bd = Bond::BondDir::NONE;
}

if (atropisomerOnly) {
// on of the bonds on the beging atom of this bond must be an atropisomer
if (atropisomerOnly && bd == Bond::BondDir::NONE) {
continue;
}

if (bd == Bond::BondDir::NONE) {
continue;
}
bool foundAtropisomer = false;
// see if this one is an atropisomer

bool isAnAtropisomer = false;

const Atom *firstAtom = bond->getBeginAtom();
const Atom *firstAtom = bond->getBeginAtom();
if (bd == Bond::BondDir::BEGINDASH || bd == Bond::BondDir::BEGINWEDGE) {
for (auto bondNbr : mol.atomBonds(firstAtom)) {
if (bondNbr->getIdx() == bond->getIdx()) {
continue; // a bond is not its own neighbor
}
if (bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCW ||
bondNbr->getStereo() == Bond::BondStereo::STEREOATROPCCW) {
foundAtropisomer = true;
isAnAtropisomer = true;

// if it is for an atropisomer and there are no coords, check to see
// if the wedge needs to be flipped based on the smiles reordering
if (!coordsIncluded && isAnAtropisomer) {
Atropisomers::AtropAtomAndBondVec atomAndBondVecs[2];
if (!Atropisomers::getAtropisomerAtomsAndBonds(
bondNbr, atomAndBondVecs, mol)) {
throw ValueErrorException("Internal error - should not occur");
// should not happend
} else {
unsigned int swaps = 0;

unsigned int firstReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getBeginAtom()->getIdx()) -
atomOrder.begin();
unsigned int secondReorderedIdx =
std::find(atomOrder.begin(), atomOrder.end(),
bondNbr->getEndAtom()->getIdx()) -
atomOrder.begin();
if (firstReorderedIdx > secondReorderedIdx) {
++swaps;
}

for (unsigned int bondAtomIndex = 0; bondAtomIndex < 2;
++bondAtomIndex) {
if (atomAndBondVecs[bondAtomIndex].first == firstAtom)
continue; // swapped atoms on the side where the wedge bond
// is does NOT change the wedge bond
if (atomAndBondVecs[bondAtomIndex].second.size() == 2) {
unsigned int firstOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[0]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();
unsigned int secondOtherAtomIdx =
atomAndBondVecs[bondAtomIndex]
.second[1]
->getOtherAtom(atomAndBondVecs[bondAtomIndex].first)
->getIdx();

unsigned int firstReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
firstOtherAtomIdx) -
atomOrder.begin();
unsigned int secondReorderedAtomIdx =
std::find(atomOrder.begin(), atomOrder.end(),
secondOtherAtomIdx) -
atomOrder.begin();

if (firstReorderedAtomIdx > secondReorderedAtomIdx) {
++swaps;
}
}
}
if (swaps % 2) {
bd = (bd == Bond::BondDir::BEGINWEDGE)
? Bond::BondDir::BEGINDASH
: Bond::BondDir::BEGINWEDGE;
}
}
}

break;
}
}
if (!foundAtropisomer) {
}

if (atropisomerOnly) {
// one of the bonds on the beginning atom of this bond must be an
// atropisomer

if (!isAnAtropisomer) {
continue;
}
} else { // atropisomeronly is FALSE - check for a wedging caused by
Expand Down Expand Up @@ -2109,8 +2184,9 @@ std::string get_bond_config_block(
std::string wType = "";
if (bd == Bond::BondDir::UNKNOWN) {
wType = "w";
} else if (coordsIncluded) {
} else if (coordsIncluded || isAnAtropisomer) {
// we only do wedgeUp and wedgeDown if coordinates are being output
// or its an atropisomer
if (bd == Bond::BondDir::BEGINWEDGE) {
wType = "wU";
} else if (bd == Bond::BondDir::BEGINDASH) {
Expand Down Expand Up @@ -2354,12 +2430,13 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
// do the CX_BOND_ATROPISOMER only if CX_BOND_CFG s not done. CX_BOND_CFG
// includes the atropisomer wedging
else if (flags & SmilesWrite::CXSmilesFields::CX_BOND_ATROPISOMER) {
if (conf) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
}

bool includeCoords = flags & SmilesWrite::CXSmilesFields::CX_COORDS &&
mol.getNumConformers();
if (includeCoords) {
Atropisomers::wedgeBondsFromAtropisomers(mol, conf, wedgeBonds);
} else {
Atropisomers::wedgeBondsFromAtropisomers(mol, nullptr, wedgeBonds);
}
const auto cfgblock = get_bond_config_block(
mol, atomOrder, bondOrder, includeCoords, wedgeBonds, true);
appendToCXExtension(cfgblock, res);
Expand Down