Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Valence clean-up #7397

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
125 changes: 80 additions & 45 deletions Code/GraphMol/Atom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,44 +312,77 @@ unsigned int Atom::getTotalValence() const {

namespace {

unsigned int calcAtomChg(const Atom &atom, int &chg, int &effectiveChg, int &effectiveValenceChgContrib, bool addDativeBondContrib) {
auto atomicNum = atom.getAtomicNum();
int dativeNbrChg = 0;
int nElectronRichNbrs = 0;
int nOuterElecs = PeriodicTable::getTable()->getNouterElecs(atomicNum);
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
auto nbrAtom = bnd->getOtherAtom(&atom);
// dative bonds from nbr to atom contribute neighbor formal charge to atom
bool isBondDative = (bnd->getBondType() == Bond::DATIVE ||
bnd->getBondType() == Bond::DATIVEONE);
if (addDativeBondContrib && isBondDative &&
bnd->getEndAtom() == &atom) {
dativeNbrChg += nbrAtom->getFormalCharge();
}
auto nbrAtomicNum = nbrAtom->getAtomicNum();
if (!(isBondDative && bnd->getBeginAtom() == &atom) && PeriodicTable::getTable()->getNouterElecs(nbrAtomicNum) > nOuterElecs) {
++nElectronRichNbrs;
}
}
chg = atom.getFormalCharge() + dativeNbrChg;
effectiveChg = chg;
auto atomIsEarly = isEarlyAtom(atomicNum);
if (atomIsEarly) {
effectiveChg = -chg; // <- the usual correction for early atoms
}
// special case for carbon - see GitHub #539
if (atomicNum == 6 && effectiveChg > 0) {
effectiveChg = -chg;
}
effectiveValenceChgContrib = chg;
if (nOuterElecs >= 4 && !atomIsEarly &&
(atomicNum <= 10 || chg > 0 || nElectronRichNbrs < -chg)) {
effectiveValenceChgContrib = -chg;
}
return atomicNum;
}

int calculateExplicitValence(const Atom &atom, bool strict, bool checkIt) {
// FIX: contributions of bonds to valence are being done at best
// approximately
double accum = 0;
double accum = 0.0;
int chg;
int effectiveChg;
int effectiveValenceChgContrib;
auto atomicNum = calcAtomChg(atom, chg, effectiveChg, effectiveValenceChgContrib, true);
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
accum += bnd->getValenceContrib(&atom);
}
accum += atom.getNumExplicitHs();

// check accum is greater than the default valence
auto atomicNum = atom.getAtomicNum();
unsigned int dv = PeriodicTable::getTable()->getDefaultValence(atomicNum);
int chr = atom.getFormalCharge();
if (isEarlyAtom(atomicNum)) {
chr *= -1; // <- the usual correction for early atoms
}
// special case for carbon - see GitHub #539
if (atomicNum == 6 && chr > 0) {
chr = -chr;
}
if (accum > (dv + chr) && isAromaticAtom(atom)) {

if (accum > (dv + effectiveChg) && isAromaticAtom(atom)) {
// this needs some explanation : if the atom is aromatic and
// accum > (dv + chr) we assume that no hydrogen can be added
// to this atom. We set x = (v + chr) such that x is the
// accum > (dv + effectiveChg) we assume that no hydrogen can be added
// to this atom. We set x = (v + effectiveChg) such that x is the
// closest possible integer to "accum" but less than
// "accum".
//
// "v" here is one of the allowed valences. For example:
// sulfur here : O=c1ccs(=O)cc1
// nitrogen here : c1cccn1C

int pval = dv + chr;
int pval = dv + effectiveChg;
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);
for (auto val : valens) {
if (val == -1) {
break;
}
val += chr;
val += effectiveChg;
if (val > accum) {
break;
} else {
Expand Down Expand Up @@ -383,14 +416,23 @@ int calculateExplicitValence(const Atom &atom, bool strict, bool checkIt) {
auto res = static_cast<int>(std::round(accum));

if (strict || checkIt) {
int effectiveValence;
if (PeriodicTable::getTable()->getNouterElecs(atomicNum) >= 4) {
effectiveValence = res - atom.getFormalCharge();
} else {
// for boron and co, we move to the right in the PT, so adding
// extra valences means adding negative charge
effectiveValence = res + atom.getFormalCharge();
}
// For carbon and elements to its right, extra valences can be
// accommodated by establishing covalent bonds through available
// lone electron pairs (e.g., ammonium, phosphonium, oxonium salts)
// Establishing such bonds comes at the expense of adding
// positive formal charge. Therefore, the positive formal
// charge needs to be subtracted to the atom valence to
// obtain the number of valence electrons actually engaged
// in covalent bonds, neglecting the contribution from
// lone pairs.
// However, starting from the 3rd period elements can also
// accommodate lone electrons pairs donated by atoms in
// higher groups (e.g., chalcogens and halogens),
// similarly to boron in [BF4]- and aluminum in
// [AlF6]3-, and therefore gain negative formal charge
// (e.g., hexafluorophosphate [PF6]-, hexafluorosilicate [SiF6]2-,
// hexafluoroantimonate [AsF6]-, bismuthate [BiO3]-)
int effectiveValence = res + effectiveValenceChgContrib;
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);

int maxValence = valens.back();
Expand Down Expand Up @@ -426,21 +468,22 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
explicitValence = calculateExplicitValence(atom, strict, checkIt);
}
// special cases
auto atomic_num = atom.getAtomicNum();
if (atomic_num == 0) {
int chg;
int effectiveChg;
int effectiveValenceChgContrib;
auto atomicNum = calcAtomChg(atom, chg, effectiveChg, effectiveValenceChgContrib, false);
if (atomicNum == 0) {
return 0;
}
for (const auto bnd : atom.getOwningMol().atomBonds(&atom)) {
if (QueryOps::hasComplexBondTypeQuery(*bnd)) {
return 0;
}
}
auto formal_charge = atom.getFormalCharge();
auto num_radical_electrons = atom.getNumRadicalElectrons();
if (explicitValence == 0 && atomic_num == 1 && num_radical_electrons == 0) {
if (formal_charge == 1 || formal_charge == -1) {
if (explicitValence == 0 && atomicNum == 1 && atom.getNumRadicalElectrons() == 0) {
if (chg == 1 || chg == -1) {
return 0;
} else if (formal_charge == 0) {
} else if (chg == 0) {
return 1;
} else {
if (strict) {
Expand All @@ -465,7 +508,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {

// The d-block and f-block of the periodic table (i.e. transition metals,
// lanthanoids and actinoids) have no default valence.
int dv = PeriodicTable::getTable()->getDefaultValence(atomic_num);
int dv = PeriodicTable::getTable()->getDefaultValence(atomicNum);
if (dv == -1) {
return 0;
}
Expand All @@ -481,11 +524,10 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// exception
// finally aromatic cases are dealt with differently - these atoms are allowed
// only default valences
const auto &valens = PeriodicTable::getTable()->getValenceList(atomic_num);
const auto &valens = PeriodicTable::getTable()->getValenceList(atomicNum);

int explicitPlusRadV =
atom.getExplicitValence() + atom.getNumRadicalElectrons();
int chg = atom.getFormalCharge();

// NOTE: this is here to take care of the difference in element on
// the right side of the carbon vs left side of carbon
Expand Down Expand Up @@ -516,19 +558,12 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
//
// So assuming you read all the above stuff - you know why we are
// changing signs for "chg" here
if (isEarlyAtom(atomic_num)) {
chg *= -1;
}
// special case for carbon - see GitHub #539
if (atomic_num == 6 && chg > 0) {
chg = -chg;
}

int res = 0;
// if we have an aromatic case treat it differently
if (isAromaticAtom(atom)) {
if (explicitPlusRadV <= (static_cast<int>(dv) + chg)) {
res = dv + chg - explicitPlusRadV;
if (explicitPlusRadV <= (static_cast<int>(dv) + effectiveChg)) {
res = dv + effectiveChg - explicitPlusRadV;
} else {
// As we assume when finding the explicitPlusRadValence if we are
// aromatic we should not be adding any hydrogen and already
Expand All @@ -541,7 +576,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// formal charge here vs the explicit valence function.
bool satis = false;
for (auto vi = valens.begin(); vi != valens.end() && *vi > 0; ++vi) {
if (explicitPlusRadV == ((*vi) + chg)) {
if (explicitPlusRadV == (*vi + effectiveChg)) {
satis = true;
break;
}
Expand All @@ -565,7 +600,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// and be able to add hydrogens
res = -1;
for (auto vi = valens.begin(); vi != valens.end() && *vi >= 0; ++vi) {
int tot = (*vi) + chg;
int tot = *vi - effectiveValenceChgContrib;
if (explicitPlusRadV <= tot) {
res = tot - explicitPlusRadV;
break;
Expand All @@ -579,7 +614,7 @@ int calculateImplicitValence(const Atom &atom, bool strict, bool checkIt) {
// raise an error
std::ostringstream errout;
errout << "Explicit valence for atom # " << atom.getIdx() << " "
<< PeriodicTable::getTable()->getElementSymbol(atomic_num)
<< PeriodicTable::getTable()->getElementSymbol(atomicNum)
<< " greater than permitted";
std::string msg = errout.str();
BOOST_LOG(rdErrorLog) << msg << std::endl;
Expand Down
36 changes: 18 additions & 18 deletions Code/GraphMol/atomic_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,25 @@ const double electronMass =
// list of allowed valences, -1 for anything
const std::string periodicTableAtomData =
R"DAT(0 * 0 0 0 0 0 0 0 -1
1 H 0.31 0.33 1.2 1.008 1 1 1.007825032 1
2 He 0.28 0.7 1.4 4.003 2 4 4.002603254 0
3 Li 1.28 1.23 2.2 6.941 1 7 7.01600455 1 -1
4 Be 0.96 0.9 1.9 9.012 2 9 9.0121822 2
5 B 0.84 0.82 1.8 10.812 3 11 11.0093054 3
1 H 0.31 0.33 1.2 1.008 1 1 1.007825032 1
2 He 0.28 0.7 1.4 4.003 2 4 4.002603254 0
3 Li 1.28 1.23 2.2 6.941 1 7 7.01600455 1 -1
4 Be 0.96 0.9 1.9 9.012 2 9 9.0121822 2
5 B 0.84 0.82 1.8 10.812 3 11 11.0093054 3
6 C 0.76 0.77 1.7 12.011 4 12 12 4
7 N 0.71 0.7 1.6 14.007 5 14 14.003074 3
8 O 0.66 0.66 1.55 15.999 6 16 15.99491462 2
9 F 0.57 0.611 1.5 18.998 7 19 18.99840322 1
10 Ne 0.58 0.7 1.54 20.18 8 20 19.99244018 0
11 Na 1.66 1.54 2.4 22.99 1 23 22.98976928 1 -1
11 Na 1.66 1.54 2.4 22.99 1 23 22.98976928 1 -1
12 Mg 1.41 1.36 2.2 24.305 2 24 23.9850417 2 -1
13 Al 1.21 1.18 2.1 26.982 3 27 26.98153863 3 6
14 Si 1.11 0.937 2.1 28.086 4 28 27.97692653 4 6
15 P 1.07 0.89 1.95 30.974 5 31 30.97376163 3 5 7
13 Al 1.21 1.18 2.1 26.982 3 27 26.98153863 3
14 Si 1.11 0.937 2.1 28.086 4 28 27.97692653 4
15 P 1.07 0.89 1.95 30.974 5 31 30.97376163 3 5
16 S 1.05 1.04 1.8 32.067 6 32 31.972071 2 4 6
17 Cl 1.02 0.997 1.8 35.453 7 35 34.96885268 1
18 Ar 1.06 1.74 1.88 39.948 8 40 39.96238312 0
19 K 2.03 2.03 2.8 39.098 1 39 38.96370668 1 -1
19 K 2.03 2.03 2.8 39.098 1 39 38.96370668 1 -1
20 Ca 1.76 1.74 2.4 40.078 2 40 39.96259098 2 -1
21 Sc 1.70 1.44 2.3 44.956 3 45 44.9559119 -1
22 Ti 1.60 1.32 2.15 47.867 4 48 47.9479463 -1
Expand All @@ -76,13 +76,13 @@ const std::string periodicTableAtomData =
28 Ni 1.24 1.15 2.0 58.693 10 58 57.9353429 -1
29 Cu 1.32 1.17 2.0 63.546 11 63 62.9295975 -1
30 Zn 1.22 1.25 2.1 65.39 2 64 63.9291422 -1
31 Ga 1.22 1.26 2.1 69.723 3 69 68.9255736 3
32 Ge 1.20 1.188 2.1 72.61 4 74 73.9211778 4
33 As 1.19 1.2 2.05 74.922 5 75 74.9215965 3 5 7
31 Ga 1.22 1.26 2.1 69.723 3 69 68.9255736 3
32 Ge 1.20 1.188 2.1 72.61 4 74 73.9211778 4
33 As 1.19 1.2 2.05 74.922 5 75 74.9215965 3 5
34 Se 1.20 1.17 1.9 78.96 6 80 79.9165213 2 4 6
35 Br 1.20 1.167 1.9 79.904 7 79 78.9183371 1
36 Kr 1.16 1.91 2.02 83.8 8 84 83.911507 0
37 Rb 2.20 2.16 2.9 85.468 1 85 84.91178974 1
37 Rb 2.20 2.16 2.9 85.468 1 85 84.91178974 1 -1
38 Sr 1.95 1.91 2.55 87.62 2 88 87.9056121 2 -1
39 Y 1.90 1.62 2.4 88.906 3 89 88.9058483 -1
40 Zr 1.75 1.45 2.3 91.224 4 90 89.9047044 -1
Expand All @@ -97,11 +97,11 @@ const std::string periodicTableAtomData =
49 In 1.42 1.44 2.2 114.818 3 115 114.903878 3
50 Sn 1.39 1.385 2.25 118.711 4 120 119.9021947 2 4)DAT"
R"DAT(
51 Sb 1.39 1.4 2.2 121.76 5 121 120.9038157 3 5 7
51 Sb 1.39 1.4 2.2 121.76 5 121 120.9038157 3 5
52 Te 1.38 1.378 2.1 127.6 6 130 129.9062244 2 4 6
53 I 1.39 1.387 2.1 126.904 7 127 126.904473 1 3 5
54 Xe 1.40 1.98 2.16 131.29 8 132 131.9041535 0 2 4 6
55 Cs 2.44 2.35 3.0 132.905 1 133 132.9054519 1
55 Cs 2.44 2.35 3.0 132.905 1 133 132.9054519 1 -1
56 Ba 2.15 1.98 2.7 137.328 2 138 137.9052472 2 -1
57 La 2.07 1.69 2.5 138.906 3 139 138.9063533 -1
58 Ce 2.04 1.83 2.48 140.116 4 140 139.9054387 -1
Expand Down Expand Up @@ -130,7 +130,7 @@ const std::string periodicTableAtomData =
80 Hg 1.32 1.49 2.05 200.59 2 202 201.970643 -1
81 Tl 1.45 1.48 2.2 204.383 3 205 204.9744275 -1
82 Pb 1.46 1.48 2.3 207.2 4 208 207.9766521 2 4
83 Bi 1.48 1.45 2.3 208.98 5 209 208.9803987 3 5 7
83 Bi 1.48 1.45 2.3 208.98 5 209 208.9803987 3 5
84 Po 1.40 1.46 2.0 209 6 209 208.9824304 2 4 6
85 At 1.50 1.45 2.0 210 7 210 209.987148 1 3 5)DAT"
// the values for Ra and Rn are from:
Expand All @@ -139,7 +139,7 @@ const std::string periodicTableAtomData =
// https://www.ciaaw.org/radioactive-elements.htm
R"DAT(
86 Rn 1.50 2.4 2.0 222 8 222 222.0175706 0
87 Fr 2.6 2 2.0 223 1 223 223.0197359 1
87 Fr 2.6 2 2.0 223 1 223 223.0197359 1 -1
88 Ra 2.2 1.9 2.0 226 2 226 226.0254026 2 -1
89 Ac 2.15 1.88 2.0 227 3 227 227.0277521 -1
90 Th 2.06 1.79 2.4 232.038 4 232 232.0380553 -1
Expand Down
33 changes: 33 additions & 0 deletions Code/GraphMol/catch_graphmol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4114,6 +4114,39 @@ TEST_CASE("Hybridization of dative bonded atoms") {
}
}

TEST_CASE("Valences on Al, Si, P, As, Sb, Bi") {
SECTION("Should fail with AtomValenceException") {
std::vector<std::pair<std::string, unsigned int>> smiles = {
{"[Al](Cl)(Cl)(Cl)(Cl)(Cl)Cl", 0}, {"F[Si](F)(F)(F)(F)F", 1},
{"[P](F)(F)(F)(F)(F)F", 0}, {"P(=O)(O)(O)(O)(O)O", 0},
{"F[As](F)(F)(F)(F)F", 1}, {"O[As](=O)(O)(O)(O)O", 1},
{"[Sb](F)(F)(F)(F)(F)F", 0}, {"[Sb](=O)(O)(O)(O)(O)O", 0},
{"F[Bi](F)(F)(F)(F)F", 1}, {"O[Bi](=O)(O)(O)(O)(O)O", 1},
};
for (auto pr : smiles) {
CHECK_THROWS_AS(SmilesToMol(pr.first), AtomValenceException);
try {
ROMOL_SPTR m(SmilesToMol(pr.first));
RDUNUSED_PARAM(m);
} catch (const AtomValenceException &e) {
CHECK(e.getType() == "AtomValenceException");
CHECK(e.getAtomIdx() == pr.second);
}
}
}
SECTION("Should not fail") {
std::vector<std::string> smiles = {
"[Al-3](Cl)(Cl)(Cl)(Cl)(Cl)Cl", "F[Si-2](F)(F)(F)(F)F",
"[P-](F)(F)(F)(F)(F)F", "F[As-](F)(F)(F)(F)F",
"F[Sb-](F)(F)(F)(F)F", "[Bi-](F)(F)(F)(F)(F)F",
};
for (auto smi : smiles) {
ROMOL_SPTR m(SmilesToMol(smi));
CHECK(m);
}
}
}

TEST_CASE(
"GitHub Issue #7375: DetectChemistryProblems fails with traceback when run on mols coming from aromatic SMARTS",
"[bug][molops]") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ public void testRings2() {
// testing molecules which have been problematic
@Test
public void testProblems() {
testSpellings("[Al+3]CCC",
new String[] { "CCC[Al+3]", "C(C)(C[Al+3])"});
testSpellings("[Al+2]CCC",
new String[] { "CCC[Al+2]", "C(C)(C[Al+2])"});
testSpellings("C(=O)(Cl)CC(=O)Cl",
new String[] { "ClC(CC(Cl)=O)=O", "C(Cl)(=O)CC(=O)Cl","C(Cl)(=O)CC(Cl)=O"});
testSpellings("C(=O)(Cl)c1ccc(C(=O)Cl)cc1",
Expand Down
2 changes: 1 addition & 1 deletion rdkit/Chem/UnitTestSmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def testRings2(self):
def testProblems(self):
" testing molecules which have been problematic "
smiList = [
('[Al+3]CCC', ('CCC[Al+3]', 'C(C)(C[Al+3])')),
('[Al+2]CCC', ('CCC[Al+2]', 'C(C)(C[Al+2])')),
('C(=O)(Cl)CC(=O)Cl', ('ClC(CC(Cl)=O)=O', 'C(Cl)(=O)CC(=O)Cl', 'C(Cl)(=O)CC(Cl)=O')),
('C(=O)(Cl)c1ccc(C(=O)Cl)cc1', ('O=C(Cl)c1ccc(cc1)C(Cl)=O', 'C(Cl)(=O)C1=CC=C(C=C1)C(Cl)=O',
'ClC(=O)c1ccc(cc1)C(=O)Cl')),
Expand Down
4 changes: 2 additions & 2 deletions rdkit/Chem/test_data/NCI_5K_TPSA.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2896,7 +2896,7 @@ C1CN[Co]23(N1)(NCCN2)NCCN3,72.18
NC1=CC(=C(OC2=CC=CC=C2)C=C1)Cl,35.25
O=C1O[Fe]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
NCC(O)=O,63.32
O=C1O[Al]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
#O=C1O[Al]23(OC1=O)(OC(=O)C(=O)O2)OC(=O)C(=O)O3,157.80
O=C1C[N+]23CC[N+]45CC(=O)O[Ni]24(O1)(OC(=O)C3)OC(=O)C5,105.20
N[Co](N)(N)(N)([N+]([O-])=O)[N+]([O-])=O,190.36
SC#N.Cl[Co++]12(NCCN1)(NCCN2)[SH+]C#N,95.70
Expand Down Expand Up @@ -3368,7 +3368,7 @@ CCCCCCCCCCCN,26.02
C1CCC(CC1)NC2CCCCC2,12.03
CN(C)C(=S)SC(=S)N(C)C,6.48
NC1=CC=C(NC2=CC=CC=C2)C=C1,38.05
CCCCNCCCC.F[Si](F)(F)(F)(F)F,12.03
#CCCCNCCCC.F[Si](F)(F)(F)(F)F,12.03
CCCCCCCCCCCCNC(N)=N,61.90
NC(=S)NC1=CC=C(Br)C=C1,38.05
C[N+]12CCCC1C3=CC=C[N+](=C3)[Ni++]245([N+]6=CC(=CC=C6)C7CCC[N+]47C)[N+]8=CC(=CC=C8)C9CCC[N+]59C.SC#N,35.43
Expand Down