Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow disabling output of dative bonds to SMILES #7384

Merged
merged 10 commits into from
May 25, 2024
29 changes: 22 additions & 7 deletions Code/GraphMol/ChemReactions/ReactionParser.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2007-2022, Novartis Institutes for BioMedical Research Inc.
// Copyright (c) 2007-2024, Novartis Institutes for BioMedical Research Inc.
// and other RDKit contributors
//
// All rights reserved.
Expand Down Expand Up @@ -46,6 +46,7 @@
#include <RDGeneral/BadFileException.h>
#include <RDGeneral/FileParseException.h>
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>

namespace RDKit {
class ROMol;
Expand Down Expand Up @@ -128,11 +129,25 @@ inline ChemicalReaction *RxnSmartsToChemicalReaction(
} // namespace v1
//! returns the reaction SMARTS for a reaction
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmarts(
const ChemicalReaction &rxn);
const ChemicalReaction &rxn, const SmilesWriteParams &params);
//! \overload
inline std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn) {
SmilesWriteParams params;
params.canonical = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to take a while to go through, can you give me the nickel tour? Why do we need canonical=False for instance?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because it's SMARTS and we can't do sensible canonicalization with SMARTS

return ChemicalReactionToRxnSmarts(rxn, params);
}

//! returns the reaction SMILES for a reaction
RDKIT_CHEMREACTIONS_EXPORT std::string ChemicalReactionToRxnSmiles(
const ChemicalReaction &rxn, bool canonical = true);
const ChemicalReaction &rxn,
const SmilesWriteParams &params = SmilesWriteParams());
//! \overload
inline std::string ChemicalReactionToRxnSmiles(const ChemicalReaction &rxn,
bool canonical) {
SmilesWriteParams params;
params.canonical = canonical;
return ChemicalReactionToRxnSmiles(rxn, params);
}
//! @}

//---------------------------------------------------------------------------
Expand Down Expand Up @@ -256,8 +271,8 @@ namespace ReactionParser {
//! \brief constructs a ChemicalReaction from the metadata in a PNG stream
/*!

Looks through the metadata in the PNG to find the first tag that matches one of
the tags in \c RDKit::PNGData. A reaction is constructed from this chunk.
Looks through the metadata in the PNG to find the first tag that matches one
of the tags in \c RDKit::PNGData. A reaction is constructed from this chunk.

Throws a \c FileParseException if no suitable tag is found.

Expand Down Expand Up @@ -288,8 +303,8 @@ inline namespace v1 {
//! \brief constructs a ChemicalReaction from the metadata in a PNG stream
/*!

Looks through the metadata in the PNG to find the first tag that matches one of
the tags in \c RDKit::PNGData. A reaction is constructed from this chunk.
Looks through the metadata in the PNG to find the first tag that matches one
of the tags in \c RDKit::PNGData. A reaction is constructed from this chunk.

Throws a \c FileParseException if no suitable tag is found.

Expand Down
41 changes: 21 additions & 20 deletions Code/GraphMol/ChemReactions/ReactionWriter.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2010-2022, Novartis Institutes for BioMedical Research Inc.
// Copyright (c) 2010-2024, Novartis Institutes for BioMedical Research Inc.
// and other RDKit contributors
//
// All rights reserved.
Expand Down Expand Up @@ -38,7 +38,6 @@
#include <GraphMol/FileParsers/FileParsers.h>
#include <GraphMol/FileParsers/FileParserUtils.h>
#include <GraphMol/SmilesParse/SmartsWrite.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>
#include <GraphMol/MolOps.h>
#include <GraphMol/Chirality.h>
#include <GraphMol/FileParsers/MolFileStereochem.h>
Expand All @@ -55,12 +54,13 @@ void setRXNRoleOfAllMoleculeAtoms(RDKit::ROMol &mol, int role) {
}
}

std::string molToString(RDKit::ROMol &mol, bool toSmiles) {
std::string molToString(RDKit::ROMol &mol, bool toSmiles,
const RDKit::SmilesWriteParams &params) {
std::string res = "";
if (toSmiles) {
res = MolToSmiles(mol, true);
res = MolToSmiles(mol, params);
} else {
res = MolToSmarts(mol, true);
res = MolToSmarts(mol, params);
}
std::vector<int> mapping;
if (RDKit::MolOps::getMolFrags(mol, mapping) > 1) {
Expand All @@ -71,15 +71,15 @@ std::string molToString(RDKit::ROMol &mol, bool toSmiles) {

std::string chemicalReactionTemplatesToString(
const RDKit::ChemicalReaction &rxn, RDKit::ReactionMoleculeType type,
bool toSmiles, bool canonical) {
bool toSmiles, const RDKit::SmilesWriteParams &params) {
std::string res = "";
std::vector<std::string> vfragsmi;
auto begin = getStartIterator(rxn, type);
auto end = getEndIterator(rxn, type);
for (; begin != end; ++begin) {
vfragsmi.push_back(molToString(**begin, toSmiles));
vfragsmi.push_back(molToString(**begin, toSmiles, params));
}
if (canonical) {
if (params.canonical) {
std::sort(vfragsmi.begin(), vfragsmi.end());
}
for (unsigned i = 0; i < vfragsmi.size(); ++i) {
Expand All @@ -91,17 +91,17 @@ std::string chemicalReactionTemplatesToString(
return res;
}

std::string chemicalReactionToRxnToString(const RDKit::ChemicalReaction &rxn,
bool toSmiles, bool canonical) {
std::string chemicalReactionToRxnToString(
const RDKit::ChemicalReaction &rxn, bool toSmiles,
const RDKit::SmilesWriteParams &params) {
std::string res = "";
res += chemicalReactionTemplatesToString(rxn, RDKit::Reactant, toSmiles,
canonical);
res += ">";
res +=
chemicalReactionTemplatesToString(rxn, RDKit::Agent, toSmiles, canonical);
chemicalReactionTemplatesToString(rxn, RDKit::Reactant, toSmiles, params);
res += ">";
res += chemicalReactionTemplatesToString(rxn, RDKit::Agent, toSmiles, params);
res += ">";
res += chemicalReactionTemplatesToString(rxn, RDKit::Product, toSmiles,
canonical);
res +=
chemicalReactionTemplatesToString(rxn, RDKit::Product, toSmiles, params);
return res;
}

Expand All @@ -121,14 +121,15 @@ void write_template(std::ostringstream &res, RDKit::ROMol &tpl) {
namespace RDKit {

//! returns the reaction SMARTS for a reaction
std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn) {
return chemicalReactionToRxnToString(rxn, false, false);
std::string ChemicalReactionToRxnSmarts(const ChemicalReaction &rxn,
const SmilesWriteParams &params) {
return chemicalReactionToRxnToString(rxn, false, params);
};

//! returns the reaction SMILES for a reaction
std::string ChemicalReactionToRxnSmiles(const ChemicalReaction &rxn,
bool canonical) {
return chemicalReactionToRxnToString(rxn, true, canonical);
const SmilesWriteParams &params) {
return chemicalReactionToRxnToString(rxn, true, params);
};

//! returns an RXN block for a reaction
Expand Down
20 changes: 18 additions & 2 deletions Code/GraphMol/ChemReactions/Wrap/rdChemReactions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,12 +884,28 @@ Sample Usage:
see the documentation for rdkit.Chem.MolFromSmiles for an explanation\n\
of the replacements argument.",
python::return_value_policy<python::manage_new_object>());
python::def("ReactionToSmarts", RDKit::ChemicalReactionToRxnSmarts,
python::def("ReactionToSmarts",
(std::string(*)(const RDKit::ChemicalReaction &))
RDKit::ChemicalReactionToRxnSmarts,
(python::arg("reaction")),
"construct a reaction SMARTS string for a ChemicalReaction");
python::def("ReactionToSmiles", RDKit::ChemicalReactionToRxnSmiles,
python::def("ReactionToSmiles",
(std::string(*)(const RDKit::ChemicalReaction &,
bool))RDKit::ChemicalReactionToRxnSmiles,
(python::arg("reaction"), python::arg("canonical") = true),
"construct a reaction SMILES string for a ChemicalReaction");
python::def("ReactionToSmarts",
(std::string(*)(const RDKit::ChemicalReaction &,
const RDKit::SmilesWriteParams &))
RDKit::ChemicalReactionToRxnSmarts,
(python::arg("reaction"), python::arg("params")),
"construct a reaction SMARTS string for a ChemicalReaction");
python::def("ReactionToSmiles",
(std::string(*)(const RDKit::ChemicalReaction &,
const RDKit::SmilesWriteParams &))
RDKit::ChemicalReactionToRxnSmiles,
(python::arg("reaction"), python::arg("params")),
"construct a reaction SMILES string for a ChemicalReaction");

python::def(
"ReactionFromRxnFile", RDKit::RxnFileToChemicalReaction,
Expand Down
18 changes: 18 additions & 0 deletions Code/GraphMol/ChemReactions/Wrap/testReactionWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,24 @@ def testSanitizeRxnAsMols(self):
self.assertFalse(rxn.GetProductTemplate(0).GetBondWithIdx(0).GetIsAromatic())
self.assertEqual(rxn.GetAgentTemplate(0).GetAtomWithIdx(1).GetFormalCharge(), 1)

def testSmilesWriteParams(self):
rxn = AllChem.ReactionFromSmarts(
"[C:1]-[C:2].[NH3:3]->[Fe:4]-[NH2:5]>>[C:1]=[C:2].[NH3:3]->[Fe:4]-[NH2:5]")
self.assertIsNotNone(rxn)
params = AllChem.SmilesWriteParams()
self.assertEqual(
AllChem.ReactionToSmiles(rxn, params),
"[CH3:1][CH3:2].[NH3:3]->[Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3]->[Fe:4][NH2:5]")
self.assertEqual(
AllChem.ReactionToSmarts(rxn, params),
"[C:1]-[C:2].[N&H3:3]->[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]->[#26:4]-[N&H2:5]")
params.includeDativeBonds = False
self.assertEqual(AllChem.ReactionToSmiles(rxn, params),
"[CH3:1][CH3:2].[NH3:3][Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3][Fe:4][NH2:5]")
self.assertEqual(
AllChem.ReactionToSmarts(rxn, params),
"[C:1]-[C:2].[N&H3:3]-[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]-[#26:4]-[N&H2:5]")


if __name__ == '__main__':
unittest.main(verbosity=True)
19 changes: 18 additions & 1 deletion Code/GraphMol/ChemReactions/catch_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (c) 2018-2021 Greg Landrum and other RDKit contributors
// Copyright (c) 2018-2024 Greg Landrum and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
Expand Down Expand Up @@ -1796,4 +1796,21 @@ TEST_CASE("sanitizeRxnAsMols") {
CHECK_THROWS_AS(RxnOps::sanitizeRxnAsMols(*rxn), MolSanitizeException);
}
}
}

TEST_CASE("Github #7372: SMILES output option to disable dative bonds") {
SECTION("basics") {
auto rxn = "[C:1]-[C:2].[NH3:3]->[Fe:4]-[NH2:5]>>[C:1]=[C:2].[NH3:3]->[Fe:4]-[NH2:5]"_rxnsmarts;
REQUIRE(rxn);
auto smi = ChemicalReactionToRxnSmiles(*rxn);
CHECK(smi == "[CH3:1][CH3:2].[NH3:3]->[Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3]->[Fe:4][NH2:5]");
smi = ChemicalReactionToRxnSmarts(*rxn);
CHECK(smi == "[C:1]-[C:2].[N&H3:3]->[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]->[#26:4]-[N&H2:5]");
SmilesWriteParams ps;
ps.includeDativeBonds = false;
smi = ChemicalReactionToRxnSmiles(*rxn,ps);
CHECK(smi == "[CH3:1][CH3:2].[NH3:3][Fe:4][NH2:5]>>[CH2:1]=[CH2:2].[NH3:3][Fe:4][NH2:5]");
smi = ChemicalReactionToRxnSmarts(*rxn,ps);
CHECK(smi == "[C:1]-[C:2].[N&H3:3]-[#26:4]-[N&H2:5]>>[C:1]=[C:2].[N&H3:3]-[#26:4]-[N&H2:5]");
}
}
28 changes: 28 additions & 0 deletions Code/GraphMol/SmilesParse/CXSmilesOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2214,6 +2214,29 @@ std::string get_bond_config_block(
return res;
}

std::string get_coordbonds_block(const ROMol &mol,
const std::vector<unsigned int> &atomOrder,
const std::vector<unsigned int> &bondOrder) {
std::string res = "";
for (unsigned int i = 0; i < bondOrder.size(); ++i) {
auto idx = bondOrder[i];
const auto bond = mol.getBondWithIdx(idx);
if (bond->getBondType() != Bond::BondType::DATIVE) {
continue;
}
auto begAtomOrder =
std::find(atomOrder.begin(), atomOrder.end(), bond->getBeginAtomIdx()) -
atomOrder.begin();
if (!res.empty()) {
res += ",";
} else {
res = "C:";
}
res += boost::str(boost::format("%d.%d") % begAtomOrder % i);
}
return res;
}

std::string get_ringbond_cistrans_block(
const ROMol &mol, const std::vector<unsigned int> &atomOrder,
const std::vector<unsigned int> &bondOrder) {
Expand Down Expand Up @@ -2442,6 +2465,11 @@ std::string getCXExtensions(const ROMol &mol, std::uint32_t flags) {
appendToCXExtension(cfgblock, res);
}

if (flags & SmilesWrite::CXSmilesFields::CX_COORDINATE_BONDS) {
const auto block = get_coordbonds_block(mol, atomOrder, bondOrder);
appendToCXExtension(block, res);
}

if (flags & SmilesWrite::CXSmilesFields::CX_LINKNODES) {
const auto linknodeblock = get_linknodes_block(mol, atomOrder);
appendToCXExtension(linknodeblock, res);
Expand Down