Skip to content

Commit

Permalink
New tests for specical query atoms and atropisomers (#7010)
Browse files Browse the repository at this point in the history
* New tests for speical query atoms and atropisomers

* fixed error, and used unique_ptrs

* Removed test that makes GraphMol depend on GenericGroups

* More to remove GraphMol dependency on GenericGroups
  • Loading branch information
tadhurst-cdd committed Jan 11, 2024
1 parent d6e8e47 commit 73b4da2
Show file tree
Hide file tree
Showing 33 changed files with 592 additions and 30 deletions.
33 changes: 33 additions & 0 deletions Code/GraphMol/CIPLabeler/Wrap/pyCIPLabelWrapTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ def testLabelBondsList(self):
self.assertFalse(bond1.HasProp("_CIPCode"))
self.assertEqual(bond3.GetProp("_CIPCode"), "Z")

def doOneAtropIomerMandP(self, inputSmiles , expected):

ps = Chem.SmilesParserParams()
ps.allowCXSMILES = True
ps.parseName = False
ps.sanitize = True
ps.removeHs = False
ps.explicit3dChirality = False

mol = Chem.MolFromSmiles(inputSmiles, ps)

self.assertIsNotNone(mol)
Chem.rdCIPLabeler.AssignCIPLabels(mol)

cipsCodes =""
for bondIndex in range(mol.GetNumBonds()):

bond = mol.GetBondWithIdx(bondIndex)

if (bond.HasProp("_CIPCode")):
cipsCodes += str(bondIndex) + bond.GetProp("_CIPCode") + ":"

self.assertEqual(cipsCodes, expected)

def testAtropIsomer(self):
mol = "FC1=C(C2=C(C)C(N3C(=O)C4=C(N(C)C3=O)C(F)=CC=C4)=CC=C2)C2=C(NC3=C2CC[C@H](C(O)(C)C)C3)C(C(=O)N)=C1 |(2.1158,0.5489,;1.4029,0.9642,;0.6554,0.5402,;0.6459,-0.2846,;1.3556,-0.7053,;2.0747,-0.3011,;1.346,-1.5302,;2.3682,-1.8618,;2.9748,-1.3027,;2.794,-0.4978,;3.7623,-1.5486,;3.9431,-2.3535,;3.3364,-2.9126,;3.5173,-3.7175,;2.549,-2.6667,;1.9423,-3.2258,;4.7594,-2.6222,;4.9309,-3.4292,;5.3958,-2.0448,;5.2074,-1.2063,;4.3851,-0.9565,;0.6268,-1.9345,;-0.0827,-1.5137,;-0.0732,-0.6889,;-0.0819,0.9813,;-0.0819,1.8063,;-0.8665,2.0612,;-1.3515,1.3938,;-0.8665,0.7264,;-1.2039,-0.0639,;-2.0578,-0.1602,;-2.5629,0.5349,;-3.3837,0.4518,;-4.2045,0.3687,;-3.4668,1.2725,;-3.3006,-0.369,;-2.2074,1.3172,;0.6555,2.2474,;0.6459,3.0723,;-0.0731,3.4765,;1.3556,3.4931,;1.403,1.8235,),wU:7.14wD:31.35|"
self.doOneAtropIomerMandP(mol, "6P:")

mol = "C1(N2C(C)=CC=C2Br)=C(C)C(C)=C(N2C(C)=CC=C2Br)C(C)=C1C |(-0.0002,1.5403,;-0.0002,3.0805,;-1.334,3.8508,;-2.6678,3.0807,;-1.334,5.391,;1.3338,5.391,;1.3338,3.8508,;2.6676,3.0807,;-1.3338,0.7702,;-2.6678,1.5403,;-1.3338,-0.7702,;-2.6678,-1.5401,;-0.0002,-1.5403,;-0.0002,-3.0805,;1.3338,-3.8508,;2.6676,-3.0805,;1.3338,-5.391,;-1.334,-5.391,;-1.334,-3.8508,;-2.6678,-3.0805,;1.3338,-0.7702,;2.6678,-1.5403,;1.3338,0.7702,;2.6678,1.5404,),wU:1.6,13.14|"
self.doOneAtropIomerMandP(mol, "0m:12m:")

mol = "N1(n2c(C)ccc2Br)C(=O)[C@H](C)[C@H](C)C1=O |(-11.1517,1.8306,;-11.1517,3.3708,;-12.4855,4.1411,;-13.8193,3.371,;-12.4855,5.6813,;-9.8177,5.6813,;-9.8177,4.1411,;-8.4839,3.371,;-12.3975,0.9252,;-13.8622,1.4011,;-11.9217,-0.5394,;-12.8269,-1.7852,;-10.3817,-0.5394,;-9.4765,-1.7852,;-9.9059,0.9252,;-8.4413,1.4011,),wU:0.8,10.11,12.13|"
self.doOneAtropIomerMandP(mol, "0p:")

if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion Code/GraphMol/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ rdkit_library(GraphMol
NontetrahedralStereo.cpp Atropisomers.cpp
WedgeBonds.cpp
SHARED
LINK_LIBRARIES RDGeometryLib RDGeneral )
LINK_LIBRARIES RDGeometryLib RDGeneral )
target_compile_definitions(GraphMol PRIVATE RDKIT_GRAPHMOL_BUILD)
if (RDK_USE_URF)
target_link_libraries(GraphMol PUBLIC ${RDK_URF_LIBS})
Expand Down
83 changes: 83 additions & 0 deletions Code/GraphMol/FileParsers/test1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5313,6 +5313,88 @@ void testGithub2000() {
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}

void testAtomQueries() {
BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
BOOST_LOG(rdInfoLog) << "Testing atom queries contains HCOUNT, RBCOUNT, UNSAT"
<< std::endl;
{
std::string pathName = getenv("RDBASE");
pathName += "/Code/GraphMol/FileParsers/test_data/";
std::unique_ptr<RWMol> testQuery(
MolFileToMol(pathName + "AtomQuery1.mol", true, false, true));
TEST_ASSERT(testQuery);

std::unique_ptr<RWMol> testMol(
SmilesToMol("[H]C([H])([H])C1=CN=C(C=C)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

MatchVectType mv;
SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() > 0);

testMol.reset(SmilesToMol("[H]CC1=CN=C(C=C)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() > 0);

testMol.reset(SmilesToMol("CCC1=CN=C(C=C)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() == 0); // search fails
}
{
std::string pathName = getenv("RDBASE");
pathName += "/Code/GraphMol/FileParsers/test_data/";
std::unique_ptr<RWMol> testQuery(
MolFileToMol(pathName + "AtomQuery2.mol", true, false, true));
TEST_ASSERT(testQuery);

std::unique_ptr<RWMol> testMol(SmilesToMol("C1(C)C(C)=CN=C(C)C=1C1CCCCC1"));
TEST_ASSERT(testMol);

MatchVectType mv;
SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() > 0);

testMol.reset(SmilesToMol("C1(C)C(C)=CN=C(C)C=1C12CCC(CC1)CC2"));
TEST_ASSERT(testMol);

SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() == 0); // search fails
}
{
std::string pathName = getenv("RDBASE");
pathName += "/Code/GraphMol/FileParsers/test_data/";
std::unique_ptr<RWMol> testQuery(
MolFileToMol(pathName + "AtomQuery3.mol", true, false, true));
TEST_ASSERT(testQuery);

std::unique_ptr<RWMol> testMol(
SmilesToMol("CCC1=CN=C(C=C)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

MatchVectType mv;
SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() > 0);

testMol.reset(SmilesToMol("CCC1=CN=C(C3=CC=CC=C3)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() > 0);

testMol.reset(SmilesToMol("CCC1=CN=C(C3CCCCC3)C(C2CCCCC2)=C1C"));
TEST_ASSERT(testMol);

SubstructMatch(*testMol, *testQuery, mv);
TEST_ASSERT(mv.size() == 0);
}

BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}

void RunTests() {
#if 1
test1();
Expand Down Expand Up @@ -5411,6 +5493,7 @@ void RunTests() {
testWedgeBondToDoublebond();
testGithub1615();
testGithub2000();
testAtomQueries();
#endif
}

Expand Down
59 changes: 59 additions & 0 deletions Code/GraphMol/FileParsers/testAtropisomers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,64 @@ class MolAtropTest {
}
};

void testLookForAtropisomersInSDdfFiles(std::string fileName,
unsigned int expectedHits,
unsigned int expectedMisses) {
BOOST_LOG(rdInfoLog) << "Looking for atropisomers in " << fileName
<< std::endl;

std::string rdbase = getenv("RDBASE");
std::string fName =
rdbase + "/Code/GraphMol/FileParsers/test_data/atropisomers/" + fileName;

std::ifstream in;
in.open(fName);
std::string line;
unsigned int foundCount = 0;
unsigned int notFoundCount = 0;
while (!in.eof()) {
std::string molBlock = "";
while (std::getline(in, line)) {
if (line.find("$$$$") != std::string::npos) {
break;
}

molBlock += line + "\n";
}

if (molBlock.length() < 10) {
continue; // try for another;
}

std::unique_ptr<RWMol> mol(MolBlockToMol(molBlock, false, false, false));
TEST_ASSERT(mol != nullptr);

auto hasAtropisomers = RDKit::Atropisomers::doesMolHaveAtropisomers(*mol);

if (hasAtropisomers) {
BOOST_LOG(rdInfoLog) << "Found atropisomers in " << fileName << std::endl;
foundCount++;
printf("Atropisomers- %d hits %d misses\r", foundCount, notFoundCount);
std::flush(std::cout);
std::ofstream out;
out.open(fName + "_" + std::to_string(foundCount) + ".sdf");
out << molBlock << std::endl;
} else {
notFoundCount++;
if (notFoundCount % 100 == 0) {
printf("Atropisomers- %d hits %d misses\r", foundCount,
notFoundCount);
std::flush(std::cout);
}
}
}
printf("\nFinal results:\nFound atropisomers in %s - %d hits %d misses\n",
fileName.c_str(), foundCount, notFoundCount);

TEST_ASSERT(foundCount == expectedHits);
TEST_ASSERT(notFoundCount == expectedMisses);
}

int main(int argc, char *argv[]) {
(void)argc;
(void)argv;
Expand All @@ -254,6 +312,7 @@ int main(int argc, char *argv[]) {
BOOST_LOG(rdInfoLog) << " ---- Running with POSIX locale ----- " << std::endl;

molAtropTest.RunTests();
testLookForAtropisomersInSDdfFiles("TestMultInSDF.sdf", 1, 4);

return 0;
}
32 changes: 32 additions & 0 deletions Code/GraphMol/FileParsers/test_data/AtomQuery1.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

Mrv2308 06012316552D

0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 10 10 0 0 0
M V30 BEGIN ATOM
M V30 1 C -7.1459 4.3109 0 0
M V30 2 C -8.4795 3.5409 0 0
M V30 3 C -8.4795 2.0007 0 0
M V30 4 N -7.1459 1.2307 0 0
M V30 5 C -5.8122 2.0007 0 0
M V30 6 C -5.8122 3.5409 0 0
M V30 7 C -4.4785 4.3109 0 0 HCOUNT=3
M V30 8 C -7.146 5.8509 0 0
M V30 9 C -9.8132 4.3109 0 0
M V30 10 C -9.8132 1.2308 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 2 1 2
M V30 2 1 2 3
M V30 3 2 5 6
M V30 4 1 6 1
M V30 5 1 6 7
M V30 6 1 1 8
M V30 7 1 2 9
M V30 8 1 3 10
M V30 9 2 3 4
M V30 10 1 4 5
M V30 END BOND
M V30 END CTAB
M END
32 changes: 32 additions & 0 deletions Code/GraphMol/FileParsers/test_data/AtomQuery2.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

Mrv2308 06012316552D

0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 10 10 0 0 0
M V30 BEGIN ATOM
M V30 1 C -7.1459 4.3109 0 0
M V30 2 C -8.4795 3.5409 0 0
M V30 3 C -8.4795 2.0007 0 0
M V30 4 N -7.1459 1.2307 0 0
M V30 5 C -5.8122 2.0007 0 0
M V30 6 C -5.8122 3.5409 0 0
M V30 7 C -4.4785 4.3109 0 0
M V30 8 C -7.146 5.8509 0 0
M V30 9 C -9.8132 4.3109 0 0 RBCNT=2
M V30 10 C -9.8132 1.2308 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 2 1 2
M V30 2 1 2 3
M V30 3 2 5 6
M V30 4 1 6 1
M V30 5 1 6 7
M V30 6 1 1 8
M V30 7 1 2 9
M V30 8 1 3 10
M V30 9 2 3 4
M V30 10 1 4 5
M V30 END BOND
M V30 END CTAB
M END
32 changes: 32 additions & 0 deletions Code/GraphMol/FileParsers/test_data/AtomQuery3.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

Mrv2308 06012316552D

0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 10 10 0 0 0
M V30 BEGIN ATOM
M V30 1 C -7.1459 4.3109 0 0
M V30 2 C -8.4795 3.5409 0 0
M V30 3 C -8.4795 2.0007 0 0
M V30 4 N -7.1459 1.2307 0 0
M V30 5 C -5.8122 2.0007 0 0
M V30 6 C -5.8122 3.5409 0 0
M V30 7 C -4.4785 4.3109 0 0
M V30 8 C -7.146 5.8509 0 0
M V30 9 C -9.8132 4.3109 0 0
M V30 10 C -9.8132 1.2308 0 0 UNSAT=1
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 2 1 2
M V30 2 1 2 3
M V30 3 2 5 6
M V30 4 1 6 1
M V30 5 1 6 7
M V30 6 1 1 8
M V30 7 1 2 9
M V30 8 1 3 10
M V30 9 2 3 4
M V30 10 1 4 5
M V30 END BOND
M V30 END CTAB
M END
4 changes: 2 additions & 2 deletions Code/GraphMol/FileParsers/test_data/cdk2_stereo.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
c1cc(Cl)ccc1c(n[o]2)nc2CSC/3=[NH+]/CCN3,mol_14069
[C@@H]12/C=C\[C@@H](C(C(=O)O)C1C(=O)O)c3cccc(c23)[N+]([O-])=O,mol_12186,None,4.50,Scaffold_00,divscreen,0
c1cc(Cl)ccc1c(n[o]2)nc2CSC/3=[NH+]/CCN3,mol_14069
[C@@H]12/C=C\[C@@H](C(C(=O)O)C1C(=O)O)c3cccc(c23)[N+]([O-])=O,mol_12186,None,4.50,Scaffold_00,divscreen,0
2 changes: 1 addition & 1 deletion Code/GraphMol/GenericGroups/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ target_compile_definitions(GenericGroups PRIVATE RDKIT_GENERICGROUPS_BUILD)
rdkit_headers(GenericGroups.h DEST GraphMol/GenericGroups)

rdkit_catch_test(testGenericGroups generic_tests.cpp
LINK_LIBRARIES GenericGroups FileParsers SmilesParse SubstructMatch)
LINK_LIBRARIES GenericGroups FileParsers MarvinParser SmilesParse SubstructMatch)
9 changes: 4 additions & 5 deletions Code/GraphMol/GenericGroups/GenericGroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -592,13 +592,12 @@ const static std::map<
{"NoCarbonRing", Matchers::NoCarbonRingAtomMatcher},
{"CXX", Matchers::NoCarbonRingAtomMatcher},
{"NoCarbonRingH", Matchers::NoCarbonRingHAtomMatcher},
{"CXH", Matchers::NoCarbonRingHAtomMatcher}
};
{"CXH", Matchers::NoCarbonRingHAtomMatcher}};

// This is an extension of adjustQueryProperties from GraphMol that allows the search of generic groups
// This is an extension of adjustQueryProperties from GraphMol that allows the
// search of generic groups
RDKIT_GENERICGROUPS_EXPORT ROMol *adjustQueryPropertiesWithGenericGroups(
const ROMol &mol,
const MolOps::AdjustQueryParameters *inParams=nullptr);
const ROMol &mol, const MolOps::AdjustQueryParameters *inParams = nullptr);

//! returns false if any of the molecule's generic atoms are not satisfied in
/// the current match
Expand Down

0 comments on commit 73b4da2

Please sign in to comment.