Skip to content

Commit

Permalink
[v2 API] MRV parsers (#7110)
Browse files Browse the repository at this point in the history
* add v2 API for Mrv parsers

needs testing

* add some tests for the v2 and v1 APIs

cleanup some warnings in VC++ on windows

* typo
  • Loading branch information
greglandrum committed Feb 2, 2024
1 parent aa4602f commit 634fd14
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 142 deletions.
132 changes: 55 additions & 77 deletions Code/GraphMol/MarvinParse/MarvinParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@

namespace RDKit {

namespace v2 {
namespace MarvinParser {

/*
Imports the Marvin-specific dialect of CML (Chemical Markup Language) and
converts it to datastructures that are compatible with Molfile, RXNfile, and
Expand All @@ -53,21 +56,21 @@ class MarvinCMLReader {

~MarvinCMLReader(){};

RWMol *parseMolecule(boost::property_tree::ptree molTree,
std::unique_ptr<RWMol> parseMolecule(boost::property_tree::ptree molTree,
bool sanitize = false, bool removeHs = false) {
boost::property_tree::ptree molSection;

try {
molSection = molTree.get_child("cml.MDocument.MChemicalStruct.molecule");
} catch (const std::exception &e) {
} catch (const std::exception &) {
try {
molSection = molTree.get_child("cml.MDocument");
return new RWMol();
} catch (const std::exception &e) {
return std::make_unique<RWMol>();
} catch (const std::exception &) {
try {
molSection = molTree.get_child("cml");
return new RWMol();
} catch (const std::exception &e) {
return std::make_unique<RWMol>();
} catch (const std::exception &) {
throw FileParseException("Expected \"molecule\" in MRV file");
}
}
Expand All @@ -81,10 +84,10 @@ class MarvinCMLReader {
return parseMolecule(marvinMol.get(), sanitize, removeHs);
}

ChemicalReaction *parseReaction(boost::property_tree::ptree rxnTree,
boost::property_tree::ptree documentTree,
bool sanitize = false,
bool removeHs = false) {
std::unique_ptr<ChemicalReaction> parseReaction(
boost::property_tree::ptree rxnTree,
boost::property_tree::ptree documentTree, bool sanitize = false,
bool removeHs = false) {
std::unique_ptr<ChemicalReaction> rxn{new ChemicalReaction()};
rxnTree = rxnTree.get_child("cml.MDocument.MChemicalStruct.reaction");
std::unique_ptr<MarvinReaction> marvinReaction{
Expand Down Expand Up @@ -125,7 +128,7 @@ class MarvinCMLReader {

marvinReaction->prepSgroupsForRDKit();

return rxn.release();
return rxn;
}

Atom *molAtomFromMarvinAtom(const MarvinAtom *marvinAtom,
Expand Down Expand Up @@ -233,7 +236,7 @@ class MarvinCMLReader {
}

return res;
} catch (const std::exception &e) {
} catch (const std::exception &) {
delete res;
res = nullptr;
throw;
Expand Down Expand Up @@ -410,26 +413,25 @@ class MarvinCMLReader {
}

mol->addBond(bond, true);
} catch (const std::exception &e) {
} catch (const std::exception &) {
delete bond;
throw;
}
}

RWMol *parseMolecule(MarvinMol *marvinMol, bool sanitize = false,
bool removeHs = false) {
std::unique_ptr<RWMol> parseMolecule(MarvinMol *marvinMol,
bool sanitize = false,
bool removeHs = false) {
PRECONDITION(marvinMol, "no molecule");
std::vector<MarvinStereoGroup *> stereoGroups;
std::unique_ptr<Conformer> confPtr;
Conformer *conf = nullptr;
std::unique_ptr<Conformer> conf3dPtr;
Conformer *conf3d = nullptr;

RWMol *mol = nullptr;
std::unique_ptr<RWMol> mol{new RWMol()};

try {
mol = new RWMol();

mol->setProp("_MolFileComments", "Generated by RDKit");

// set the atoms
Expand Down Expand Up @@ -545,7 +547,7 @@ class MarvinCMLReader {
bool chiralityPossible = false;

for (auto bondPtr : marvinMol->bonds) {
molBondFromMarvinBond(bondPtr, marvinMol, mol, chiralityPossible);
molBondFromMarvinBond(bondPtr, marvinMol, mol.get(), chiralityPossible);
}

// add the stereo groups
Expand Down Expand Up @@ -573,7 +575,7 @@ class MarvinCMLReader {
for (auto &marvinSgroup : marvinMol->sgroups) {
auto sgroup = std::unique_ptr<SubstanceGroup>();

marvinSgroup->parseMoleculeSpecific(mol, sgroup, sequenceId);
marvinSgroup->parseMoleculeSpecific(mol.get(), sgroup, sequenceId);

if (sgroup->getIsValid()) {
addSubstanceGroup(*mol, *sgroup.get());
Expand Down Expand Up @@ -642,7 +644,7 @@ class MarvinCMLReader {

if (mol->hasProp(common_properties::_NeedsQueryScan)) {
mol->clearProp(common_properties::_NeedsQueryScan);
QueryOps::completeMolQueries(mol);
QueryOps::completeMolQueries(mol.get());
}

// clean up
Expand All @@ -654,9 +656,7 @@ class MarvinCMLReader {
return mol;
}

catch (const std::exception &e) {
delete mol;

catch (const std::exception &) {
for (auto &stereoGroup : stereoGroups) {
delete stereoGroup;
}
Expand Down Expand Up @@ -731,7 +731,7 @@ class MarvinCMLReader {
}

return res;
} catch (const std::exception &e) {
} catch (const std::exception &) {
delete res;

throw;
Expand All @@ -752,7 +752,7 @@ class MarvinCMLReader {
try {
childTree = rxnTree.get_child("reactantList");
foundChild = true;
} catch (const std::exception &e) {
} catch (const std::exception &) {
foundChild = false;
}

Expand All @@ -766,7 +766,7 @@ class MarvinCMLReader {
try {
childTree = rxnTree.get_child("agentList");
foundChild = true;
} catch (const std::exception &e) {
} catch (const std::exception &) {
foundChild = false;
}
if (foundChild) {
Expand All @@ -779,7 +779,7 @@ class MarvinCMLReader {
try {
childTree = rxnTree.get_child("productList");
foundChild = true;
} catch (const std::exception &e) {
} catch (const std::exception &) {
foundChild = false;
}
if (foundChild) {
Expand Down Expand Up @@ -927,7 +927,7 @@ class MarvinCMLReader {
} // end of if (parseArrowPlusesAndConditions)

return res;
} catch (const std::exception &e) {
} catch (const std::exception &) {
delete res;

throw;
Expand All @@ -947,7 +947,7 @@ bool MrvDataStreamIsReaction(std::istream &inStream) {
// see if the reaction header is present
try {
auto rxn = tree.get_child("cml.MDocument.MChemicalStruct.reaction");
} catch (const std::exception &e) {
} catch (const std::exception &) {
return false;
}

Expand Down Expand Up @@ -982,66 +982,54 @@ bool MrvFileIsReaction(const std::string &fName) {
// Read a RWMol from a stream
//
//------------------------------------------------
RWMol *MrvDataStreamToMol(std::istream *inStream, bool sanitize,
bool removeHs) {
PRECONDITION(inStream, "no stream");

std::unique_ptr<RWMol> MolFromMrvDataStream(std::istream &inStream,
const MrvParserParams &params) {
ptree tree;

// Parse the XML into the property tree.

read_xml(*inStream, tree);
read_xml(inStream, tree);

MarvinCMLReader reader;
return reader.parseMolecule(tree, sanitize, removeHs);
}
//------------------------------------------------
//
// Read a RWMol from a stream reference
//
//------------------------------------------------
RWMol *MrvDataStreamToMol(std::istream &inStream, bool sanitize,
bool removeHs) {
return MrvDataStreamToMol(&inStream, sanitize, removeHs);
return reader.parseMolecule(tree, params.sanitize, params.removeHs);
}
//------------------------------------------------
//
// Read a RWMol from a string
//
//------------------------------------------------
RWMol *MrvBlockToMol(const std::string &molmrvText, bool sanitize,
bool removeHs) {
std::unique_ptr<RWMol> MolFromMrvBlock(const std::string &molmrvText,
const MrvParserParams &params) {
std::istringstream inStream(molmrvText);
return MrvDataStreamToMol(inStream, sanitize, removeHs);
return MolFromMrvDataStream(inStream, params);
}

//------------------------------------------------
//
// Read an RWMol from a file
//
//------------------------------------------------
RWMol *MrvFileToMol(const std::string &fName, bool sanitize, bool removeHs) {
std::unique_ptr<RWMol> MolFromMrvFile(const std::string &fName,
const MrvParserParams &params) {
std::ifstream inStream(fName.c_str());
if (!inStream || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fName;
throw BadFileException(errout.str());
}
RWMol *res = nullptr;
if (!inStream.eof()) {
res = MrvDataStreamToMol(inStream, sanitize, removeHs);
if (inStream.eof()) {
return nullptr;
}
return res;
return MolFromMrvDataStream(inStream, params);
}

//------------------------------------------------
//
// Read a ChemicalReaction from a stream
//
//------------------------------------------------
ChemicalReaction *MrvDataStreamToChemicalReaction(std::istream *inStream,
bool sanitize,
bool removeHs) {
std::unique_ptr<ChemicalReaction> ReactionFromMrvDataStream(
std::istream &inStream, const MrvParserParams &params) {
PRECONDITION(inStream, "no stream");

Utils::LocaleSwitcher ls;
Expand All @@ -1050,49 +1038,39 @@ ChemicalReaction *MrvDataStreamToChemicalReaction(std::istream *inStream,

// Parse the XML into the property tree.

read_xml(*inStream, tree);
read_xml(inStream, tree);

MarvinCMLReader reader;
return reader.parseReaction(tree, tree.get_child("cml.MDocument"), sanitize,
removeHs);
return reader.parseReaction(tree, tree.get_child("cml.MDocument"),
params.sanitize, params.removeHs);
}

//------------------------------------------------
//
// Read a ChemicalReaction from a stream reference
//
//------------------------------------------------
ChemicalReaction *MrvDataStreamToChemicalReaction(std::istream &inStream,
bool sanitize,
bool removeHs) {
return MrvDataStreamToChemicalReaction(&inStream, sanitize, removeHs);
}
//------------------------------------------------
//
// Read a ChemicalReaction from a string
//
//------------------------------------------------
ChemicalReaction *MrvStringToChemicalReaction(const std::string &molmrvText,
bool sanitize, bool removeHs) {
std::unique_ptr<ChemicalReaction> ReactionFromMrvString(
const std::string &molmrvText, const MrvParserParams &params) {
std::istringstream inStream(molmrvText);
return MrvDataStreamToChemicalReaction(inStream, sanitize, removeHs);
return ReactionFromMrvDataStream(inStream, params);
}

//------------------------------------------------
//
// Read a ChemicalReaction from a file
//
//------------------------------------------------
ChemicalReaction *MrvFileToChemicalReaction(const std::string &fName,
bool sanitize, bool removeHs) {
std::unique_ptr<ChemicalReaction> ReactionFromMrvFile(
const std::string &fName, const MrvParserParams &params) {
std::ifstream inStream(fName.c_str());
if (!inStream || (inStream.bad())) {
std::ostringstream errout;
errout << "Bad input file " << fName;
throw BadFileException(errout.str());
}
ChemicalReaction *res =
MrvDataStreamToChemicalReaction(inStream, sanitize, removeHs);
return res;
return ReactionFromMrvDataStream(inStream, params);
}
} // namespace MarvinParser
} // namespace v2
} // namespace RDKit

0 comments on commit 634fd14

Please sign in to comment.