Skip to content

Commit

Permalink
- remove valences which are inconsistent with periodic table group me…
Browse files Browse the repository at this point in the history
…mbership Al(IIIb), Si(IVb), P(Vb), As(Vb), Sb(Vb), Bi(Vb)

- address elements with d orbitals (P, As, Sb, Si) which can accommodate bonds supported by lone pairs from halogens and chalcogens with requiring adding extra valences to periodic table
- add test to make sure that PX6-, AlX63-, AsX6-, SbX6-, BiF6- do not raise an AtomValenceException while PX6, AlX6, AsX6, SbX6, BiX6 and peracids of the same elements which cannot exist raise an AtomValenceException
- correct SmilesTests.java and rdkit/Chem/UnitTestSmiles.py where CCC[Al+3] should rather read CCC[Al+2]
- comment out incorrect entries in rdkit/Chem/test_data/NCI_aromat_regress.txt and rdkit/Chem/test_data/NCI_5K_TPSA.csv
- add -1 valence to alkali elements which were missing it (Rb, Cs, Fr)
  • Loading branch information
ptosco committed May 1, 2024
1 parent 187d2ca commit 2d2ac12
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 71 deletions.
125 changes: 80 additions & 45 deletions Code/GraphMol/Atom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,44 +312,77 @@ unsigned int Atom::getTotalValence() const {

namespace {

unsigned int calcAtomChg(const Atom &atom, int &chg, int &effectiveChg, int &effectiveValenceChgContrib, bool addDativeBondContrib) {
auto atomicNum = atom.getAtomicNum();
int dativeNbrChg = 0;
int nElectronRichNbrs = 0;
int nOuterElecs = PeriodicTable::getTable()->getNouterElecs(atomicNum);
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
auto nbrAtom = bnd->getOtherAtom(&atom);
// dative bonds from nbr to atom contribute neighbor formal charge to atom
bool isBondDative = (bnd->getBondType() == Bond::DATIVE ||
bnd->getBondType() == Bond::DATIVEONE);
if (addDativeBondContrib && isBondDative &&
bnd->getEndAtom() == &atom) {
dativeNbrChg += nbrAtom->getFormalCharge();
}
auto nbrAtomicNum = nbrAtom->getAtomicNum();
if (!(isBondDative && bnd->getBeginAtom() == &atom) && PeriodicTable::getTable()->getNouterElecs(nbrAtomicNum) > nOuterElecs) {
++nElectronRichNbrs;
}
}
chg = atom.getFormalCharge() + dativeNbrChg;
effectiveChg = chg;
auto atomIsEarly = isEarlyAtom(atomicNum);
if (atomIsEarly) {
effectiveChg = -chg; // <- the usual correction for early atoms
}
// special case for carbon - see GitHub #539
if (atomicNum == 6 && effectiveChg > 0) {
effectiveChg = -chg;
}
effectiveValenceChgContrib = chg;
if (nOuterElecs >= 4 && !atomIsEarly &&
(atomicNum <= 10 || chg > 0 || nElectronRichNbrs < -chg)) {
effectiveValenceChgContrib = -chg;
}
return atomicNum;
}

int calculateExplicitValence(const Atom &atom, bool strict, bool checkIt) {
// FIX: contributions of bonds to valence are being done at best
// approximately
double accum = 0;
double accum = 0.0;
int chg;
int effectiveChg;
int effectiveValenceChgContrib;
auto atomicNum = calcAtomChg(atom, chg, effectiveChg, effectiveValenceChgContrib, true);
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
accum += bnd->getValenceContrib(&atom);
}
accum += atom.getNumExplicitHs();

// check accum is greater than the default valence
auto atomicNum = atom.getAtomicNum();
unsigned int dv = PeriodicTable::getTable()->getDefaultValence(atomicNum);
int chr = atom.getFormalCharge();
if (isEarlyAtom(atomicNum)) {
chr *= -1; // <- the usual correction for early atoms
}
// special case for carbon - see GitHub #539
if (atomicNum == 6 && chr > 0) {
chr = -chr;
}
if (accum > (dv + chr) && isAromaticAtom(atom)) {

if (accum > (dv + effectiveChg) && isAromaticAtom(atom)) {
// this needs some explanation : if the atom is aromatic and
// accum > (dv + chr) we assume that no hydrogen can be added
// to this atom. We set x = (v + chr) such that x is the
// accum > (dv + effectiveChg) we assume that no hydrogen can be added
// to this atom. We set x = (v + effectiveChg) such that x is the
// closest possible integer to "accum" but less than
// "accum".
//
// "v" here is one of the allowed valences. For example:
// sulfur here : O=c1ccs(=O)cc1
// nitrogen here : c1cccn1C

int pval = dv + chr;
int pval = dv + effectiveChg;
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);
for (auto val : valens) {
if (val == -1) {
break;
}
val += chr;
val += effectiveChg;
if (val > accum) {
break;
} else {
Expand Down Expand Up @@ -383,14 +416,23 @@ int calculateExplicitValence(const Atom &atom, bool strict, bool checkIt) {
auto res = static_cast<int>(std::round(accum));

if (strict || checkIt) {
int effectiveValence;
if (PeriodicTable::getTable()->getNouterElecs(atomicNum) >= 4) {
effectiveValence = res - atom.getFormalCharge();
} else {
// for boron and co, we move to the right in the PT, so adding
// extra valences means adding negative charge
effectiveValence = res + atom.getFormalCharge();
}
// For carbon and elements to its right, extra valences can be
// accommodated by establishing covalent bonds through available
// lone electron pairs (e.g., ammonium, phosphonium, oxonium salts)
// Establishing such bonds comes at the expense of adding
// positive formal charge. Therefore, the positive formal
// charge needs to be subtracted to the atom valence to
// obtain the number of valence electrons actually engaged
// in covalent bonds, neglecting the contribution from
// lone pairs.
// However, starting from the 3rd period elements can also
// accommodate lone electrons pairs donated by atoms in
// higher groups (e.g., chalcogens and halogens),
// similarly to boron in [BF4]- and aluminum in
// [AlF6]3-, and therefore gain negative formal charge
// (e.g., hexafluorophosphate [PF6]-, hexafluorosilicate [SiF6]2-,
// hexafluoroantimonate [AsF6]-, bismuthate [BiO3]-)
int effectiveValence = res + effectiveValenceChgContrib;
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);

int maxValence = valens.back();
Expand Down Expand Up @@ -426,21 +468,22 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
explicitValence = calculateExplicitValence(atom, strict, checkIt);
}
// special cases
auto atomic_num = atom.getAtomicNum();
if (atomic_num == 0) {
int chg;
int effectiveChg;
int effectiveValenceChgContrib;
auto atomicNum = calcAtomChg(atom, chg, effectiveChg, effectiveValenceChgContrib, false);
if (atomicNum == 0) {
return 0;
}
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
if (QueryOps::hasComplexBondTypeQuery(*bnd)) {
return 0;
}
}
auto formal_charge = atom.getFormalCharge();
auto num_radical_electrons = atom.getNumRadicalElectrons();
if (explicitValence == 0 && atomic_num == 1 && num_radical_electrons == 0) {
if (formal_charge == 1 || formal_charge == -1) {
if (explicitValence == 0 && atomicNum == 1 && atom.getNumRadicalElectrons() == 0) {
if (chg == 1 || chg == -1) {
return 0;
} else if (formal_charge == 0) {
} else if (chg == 0) {
return 1;
} else {
if (strict) {
Expand All @@ -465,7 +508,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {

// The d-block and f-block of the periodic table (i.e. transition metals,
// lanthanoids and actinoids) have no default valence.
int dv = PeriodicTable::getTable()->getDefaultValence(atomic_num);
int dv = PeriodicTable::getTable()->getDefaultValence(atomicNum);
if (dv == -1) {
return 0;
}
Expand All @@ -481,11 +524,10 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// exception
// finally aromatic cases are dealt with differently - these atoms are allowed
// only default valences
const auto &valens = PeriodicTable::getTable()->getValenceList(atomic_num);
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);

int explicitPlusRadV =
atom.getExplicitValence() + atom.getNumRadicalElectrons();
int chg = atom.getFormalCharge();

// NOTE: this is here to take care of the difference in element on
// the right side of the carbon vs left side of carbon
Expand Down Expand Up @@ -516,19 +558,12 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
//
// So assuming you read all the above stuff - you know why we are
// changing signs for "chg" here
if (isEarlyAtom(atomic_num)) {
chg *= -1;
}
// special case for carbon - see GitHub #539
if (atomic_num == 6 && chg > 0) {
chg = -chg;
}

int res = 0;
// if we have an aromatic case treat it differently
if (isAromaticAtom(atom)) {
if (explicitPlusRadV <= (static_cast<int>(dv) + chg)) {
res = dv + chg - explicitPlusRadV;
if (explicitPlusRadV <= (static_cast<int>(dv) + effectiveChg)) {
res = dv + effectiveChg - explicitPlusRadV;
} else {
// As we assume when finding the explicitPlusRadValence if we are
// aromatic we should not be adding any hydrogen and already
Expand All @@ -541,7 +576,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// formal charge here vs the explicit valence function.
bool satis = false;
for (auto vi = valens.begin(); vi != valens.end() && *vi > 0; ++vi) {
if (explicitPlusRadV == ((*vi) + chg)) {
if (explicitPlusRadV == (*vi + effectiveChg)) {
satis = true;
break;
}
Expand All @@ -565,7 +600,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// and be able to add hydrogens
res = -1;
for (auto vi = valens.begin(); vi != valens.end() && *vi >= 0; ++vi) {
int tot = (*vi) + chg;
int tot = *vi - effectiveValenceChgContrib;
if (explicitPlusRadV <= tot) {
res = tot - explicitPlusRadV;
break;
Expand All @@ -579,7 +614,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// raise an error
std::ostringstream errout;
errout << "Explicit valence for atom # " << atom.getIdx() << " "
<< PeriodicTable::getTable()->getElementSymbol(atomic_num)
<< PeriodicTable::getTable()->getElementSymbol(atomicNum)
<< " greater than permitted";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
Expand Down
36 changes: 18 additions & 18 deletions Code/GraphMol/atomic_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,25 @@ const double electronMass =
// list of allowed valences, -1 for anything
const std::string periodicTableAtomData =
R"DAT(0 * 0 0 0 0 0 0 0 -1
1 H 0.31 0.33 1.2 1.008 1 1 1.007825032 1
2 He 0.28 0.7 1.4 4.003 2 4 4.002603254 0
3 Li 1.28 1.23 2.2 6.941 1 7 7.01600455 1 -1
4 Be 0.96 0.9 1.9 9.012 2 9 9.0121822 2
5 B 0.84 0.82 1.8 10.812 3 11 11.0093054 3
1 H 0.31 0.33 1.2 1.008 1 1 1.007825032 1
2 He 0.28 0.7 1.4 4.003 2 4 4.002603254 0
3 Li 1.28 1.23 2.2 6.941 1 7 7.01600455 1 -1
4 Be 0.96 0.9 1.9 9.012 2 9 9.0121822 2
5 B 0.84 0.82 1.8 10.812 3 11 11.0093054 3
6 C 0.76 0.77 1.7 12.011 4 12 12 4
7 N 0.71 0.7 1.6 14.007 5 14 14.003074 3
8 O 0.66 0.66 1.55 15.999 6 16 15.99491462 2
9 F 0.57 0.611 1.5 18.998 7 19 18.99840322 1
10 Ne 0.58 0.7 1.54 20.18 8 20 19.99244018 0
11 Na 1.66 1.54 2.4 22.99 1 23 22.98976928 1 -1
11 Na 1.66 1.54 2.4 22.99 1 23 22.98976928 1 -1
12 Mg 1.41 1.36 2.2 24.305 2 24 23.9850417 2 -1
13 Al 1.21 1.18 2.1 26.982 3 27 26.98153863 3 6
14 Si 1.11 0.937 2.1 28.086 4 28 27.97692653 4 6
15 P 1.07 0.89 1.95 30.974 5 31 30.97376163 3 5 7
13 Al 1.21 1.18 2.1 26.982 3 27 26.98153863 3
14 Si 1.11 0.937 2.1 28.086 4 28 27.97692653 4
15 P 1.07 0.89 1.95 30.974 5 31 30.97376163 3 5
16 S 1.05 1.04 1.8 32.067 6 32 31.972071 2 4 6
17 Cl 1.02 0.997 1.8 35.453 7 35 34.96885268 1
18 Ar 1.06 1.74 1.88 39.948 8 40 39.96238312 0
19 K 2.03 2.03 2.8 39.098 1 39 38.96370668 1 -1
19 K 2.03 2.03 2.8 39.098 1 39 38.96370668 1 -1
20 Ca 1.76 1.74 2.4 40.078 2 40 39.96259098 2 -1
21 Sc 1.70 1.44 2.3 44.956 3 45 44.9559119 -1
22 Ti 1.60 1.32 2.15 47.867 4 48 47.9479463 -1
Expand All @@ -76,13 +76,13 @@ const std::string periodicTableAtomData =
28 Ni 1.24 1.15 2.0 58.693 10 58 57.9353429 -1
29 Cu 1.32 1.17 2.0 63.546 11 63 62.9295975 -1
30 Zn 1.22 1.25 2.1 65.39 2 64 63.9291422 -1
31 Ga 1.22 1.26 2.1 69.723 3 69 68.9255736 3
32 Ge 1.20 1.188 2.1 72.61 4 74 73.9211778 4
33 As 1.19 1.2 2.05 74.922 5 75 74.9215965 3 5 7
31 Ga 1.22 1.26 2.1 69.723 3 69 68.9255736 3
32 Ge 1.20 1.188 2.1 72.61 4 74 73.9211778 4
33 As 1.19 1.2 2.05 74.922 5 75 74.9215965 3 5
34 Se 1.20 1.17 1.9 78.96 6 80 79.9165213 2 4 6
35 Br 1.20 1.167 1.9 79.904 7 79 78.9183371 1
36 Kr 1.16 1.91 2.02 83.8 8 84 83.911507 0
37 Rb 2.20 2.16 2.9 85.468 1 85 84.91178974 1
37 Rb 2.20 2.16 2.9 85.468 1 85 84.91178974 1 -1
38 Sr 1.95 1.91 2.55 87.62 2 88 87.9056121 2 -1
39 Y 1.90 1.62 2.4 88.906 3 89 88.9058483 -1
40 Zr 1.75 1.45 2.3 91.224 4 90 89.9047044 -1
Expand All @@ -97,11 +97,11 @@ const std::string periodicTableAtomData =
49 In 1.42 1.44 2.2 114.818 3 115 114.903878 3
50 Sn 1.39 1.385 2.25 118.711 4 120 119.9021947 2 4)DAT"
R"DAT(
51 Sb 1.39 1.4 2.2 121.76 5 121 120.9038157 3 5 7
51 Sb 1.39 1.4 2.2 121.76 5 121 120.9038157 3 5
52 Te 1.38 1.378 2.1 127.6 6 130 129.9062244 2 4 6
53 I 1.39 1.387 2.1 126.904 7 127 126.904473 1 3 5
54 Xe 1.40 1.98 2.16 131.29 8 132 131.9041535 0 2 4 6
55 Cs 2.44 2.35 3.0 132.905 1 133 132.9054519 1
55 Cs 2.44 2.35 3.0 132.905 1 133 132.9054519 1 -1
56 Ba 2.15 1.98 2.7 137.328 2 138 137.9052472 2 -1
57 La 2.07 1.69 2.5 138.906 3 139 138.9063533 -1
58 Ce 2.04 1.83 2.48 140.116 4 140 139.9054387 -1
Expand Down Expand Up @@ -130,7 +130,7 @@ const std::string periodicTableAtomData =
80 Hg 1.32 1.49 2.05 200.59 2 202 201.970643 -1
81 Tl 1.45 1.48 2.2 204.383 3 205 204.9744275 3
82 Pb 1.46 1.48 2.3 207.2 4 208 207.9766521 2 4
83 Bi 1.48 1.45 2.3 208.98 5 209 208.9803987 3 5 7
83 Bi 1.48 1.45 2.3 208.98 5 209 208.9803987 3 5
84 Po 1.40 1.46 2.0 209 6 209 208.9824304 2 4 6
85 At 1.50 1.45 2.0 210 7 210 209.987148 1 3 5)DAT"
// the values for Ra and Rn are from:
Expand All @@ -139,7 +139,7 @@ const std::string periodicTableAtomData =
// https://www.ciaaw.org/radioactive-elements.htm
R"DAT(
86 Rn 1.50 2.4 2.0 222 8 222 222.0175706 0
87 Fr 2.6 2 2.0 223 1 223 223.0197359 1
87 Fr 2.6 2 2.0 223 1 223 223.0197359 1 -1
88 Ra 2.2 1.9 2.0 226 2 226 226.0254026 2 -1
89 Ac 2.15 1.88 2.0 227 3 227 227.0277521 -1
90 Th 2.06 1.79 2.4 232.038 4 232 232.0380553 -1
Expand Down
33 changes: 33 additions & 0 deletions Code/GraphMol/catch_graphmol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4112,3 +4112,36 @@ TEST_CASE("Hybridization of dative bonded atoms") {
}
}
}

TEST_CASE("Valences on Al, Si, P, As, Sb, Bi") {
SECTION("Should fail with AtomValenceException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"[Al](Cl)(Cl)(Cl)(Cl)(Cl)Cl", 0}, {"F[Si](F)(F)(F)(F)F", 1},
{"[P](F)(F)(F)(F)(F)F", 0}, {"P(=O)(O)(O)(O)(O)O", 0},
{"F[As](F)(F)(F)(F)F", 1}, {"O[As](=O)(O)(O)(O)O", 1},
{"[Sb](F)(F)(F)(F)(F)F", 0}, {"[Sb](=O)(O)(O)(O)(O)O", 0},
{"F[Bi](F)(F)(F)(F)F", 1}, {"O[Bi](=O)(O)(O)(O)(O)O", 1},
};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomValenceException);
try {
ROMOL_SPTR m(SmilesToMol(pr.first));
RDUNUSED_PARAM(m);
} catch (const AtomValenceException &e) {
CHECK(e.getType() == "AtomValenceException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("Should not fail") {
std::vector<std::string> smiles = {
"[Al-3](Cl)(Cl)(Cl)(Cl)(Cl)Cl", "F[Si-2](F)(F)(F)(F)F",
"[P-](F)(F)(F)(F)(F)F", "F[As-](F)(F)(F)(F)F",
"F[Sb-](F)(F)(F)(F)F", "[Bi-](F)(F)(F)(F)(F)F",
};
for (auto smi : smiles) {
ROMOL_SPTR m(SmilesToMol(smi));
CHECK(m);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ public void testRings2() {
// testing molecules which have been problematic
@Test
public void testProblems() {
testSpellings("[Al+3]CCC",
new String[] { "CCC[Al+3]", "C(C)(C[Al+3])"});
testSpellings("[Al+2]CCC",
new String[] { "CCC[Al+2]", "C(C)(C[Al+2])"});
testSpellings("C(=O)(Cl)CC(=O)Cl",
new String[] { "ClC(CC(Cl)=O)=O", "C(Cl)(=O)CC(=O)Cl","C(Cl)(=O)CC(Cl)=O"});
testSpellings("C(=O)(Cl)c1ccc(C(=O)Cl)cc1",
Expand Down
2 changes: 1 addition & 1 deletion rdkit/Chem/UnitTestSmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def testRings2(self):
def testProblems(self):
" testing molecules which have been problematic "
smiList = [
('[Al+3]CCC', ('CCC[Al+3]', 'C(C)(C[Al+3])')),
('[Al+2]CCC', ('CCC[Al+2]', 'C(C)(C[Al+2])')),
('C(=O)(Cl)CC(=O)Cl', ('ClC(CC(Cl)=O)=O', 'C(Cl)(=O)CC(=O)Cl', 'C(Cl)(=O)CC(Cl)=O')),
('C(=O)(Cl)c1ccc(C(=O)Cl)cc1', ('O=C(Cl)c1ccc(cc1)C(Cl)=O', 'C(Cl)(=O)C1=CC=C(C=C1)C(Cl)=O',
'ClC(=O)c1ccc(cc1)C(=O)Cl')),
Expand Down
4 changes: 2 additions & 2 deletions rdkit/Chem/test_data/NCI_5K_TPSA.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2896,7 +2896,7 @@ C1CN[Co]23(N1)(NCCN2)NCCN3,72.18
NC1=CC(=C(OC2=CC=CC=C2)C=C1)Cl,35.25
O=C1O[Fe]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
NCC(O)=O,63.32
O=C1O[Al]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
#O=C1O[Al]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
O=C1C[N+]23CC[N+]45CC(=O)O[Ni]24(O1)(OC(=O)C3)OC(=O)C5,105.20
N[Co](N)(N)(N)([N+]([O-])=O)[N+]([O-])=O,190.36
SC#N.Cl[Co++]12(NCCN1)(NCCN2)[SH+]C#N,95.70
Expand Down Expand Up @@ -3368,7 +3368,7 @@ CCCCCCCCCCCN,26.02
C1CCC(CC1)NC2CCCCC2,12.03
CN(C)C(=S)SC(=S)N(C)C,6.48
NC1=CC=C(NC2=CC=CC=C2)C=C1,38.05
CCCCNCCCC.F[Si](F)(F)(F)(F)F,12.03
#CCCCNCCCC.F[Si](F)(F)(F)(F)F,12.03
CCCCCCCCCCCCNC(N)=N,61.90
NC(=S)NC1=CC=C(Br)C=C1,38.05
C[N+]12CCCC1C3=CC=C[N+](=C3)[Ni++]245([N+]6=CC(=CC=C6)C7CCC[N+]47C)[N+]8=CC(=CC=C8)C9CCC[N+]59C.SC#N,35.43
Expand Down

0 comments on commit 2d2ac12

Please sign in to comment.