Inconsistence in perception of rotatable bonds before and after adding hydrogens #3776
-
First of all, let me thank all of you for the great work you did with the latest 2020.09 issue of RDKit, it really improved a lot! I am using RDKit both for development and for processing of chemoinformatics libraries, and I realized that the rotatable bond descriptor is perceiving the number of rotatable bonds with some inconsistency. I stumbled upon this issue while I was inspecting some structures of FDA-approved drugs (for which this descriptor is calculated with other tools). Basically, when hydrogens are added with the I think this is related to how the rotors are defined in the Lipinski module (#211). In order to test this, I prepared the smiles string for betrixaban and a modified version of it where all the methyl groups were swapped to their trifluoromethyl version. The code below can be used to reproduce the behavior: from rdkit import Chem
testLib = ['COc1ccc(NC(=O)c2ccc(C(=N)N(C)(C))cc2)c(C(=O)Nc2ccc(Cl)cn2)c1', #betrixarban
'C(F)(F)(F)Oc1ccc(NC(=O)c2ccc(C(=N)N(C(F)(F)(F))(C(F)(F)(F)))cc2)c(C(=O)Nc2ccc(Cl)cn2)c1' #trifluoro
]
names= ['betrixaban','CF3betrixaban']
btx_lib = [Chem.MolFromSmiles(x) for x in testLib]
btx_lib[0].SetProp('_Name',names[0])
btx_lib[1].SetProp('_Name',names[1])
#From examples in the cookbook
def find_bond_groups(mol):
"""Find groups of contiguous rotatable bonds and return them sorted by decreasing size"""
rot_atom_pairs = mol.GetSubstructMatches(RotatableBondSmarts)
print("Rotor atom pairs:",rot_atom_pairs,"")
rot_bond_set = set([mol.GetBondBetweenAtoms(*ap).GetIdx() for ap in rot_atom_pairs])
rot_bond_groups = []
while (rot_bond_set):
i = rot_bond_set.pop()
connected_bond_set = set([i])
stack = [i]
while (stack):
i = stack.pop()
b = mol.GetBondWithIdx(i)
bonds = []
for a in (b.GetBeginAtom(), b.GetEndAtom()):
bonds.extend([b.GetIdx() for b in a.GetBonds() if (
(b.GetIdx() in rot_bond_set) and (not (b.GetIdx() in connected_bond_set)))])
connected_bond_set.update(bonds)
stack.extend(bonds)
rot_bond_set.difference_update(connected_bond_set)
rot_bond_groups.append(tuple(connected_bond_set))
return tuple(sorted(rot_bond_groups, reverse = True, key = lambda x: len(x)))
from rdkit.Chem.Lipinski import RotatableBondSmarts
roBondsDist = {}
#names =[]
roBoNoH=[]
roBoH = []
def test_descriptors(smiles):
try:
mol = Chem.MolFromSmiles(smiles)
except:
mol=smiles
name = mol.GetProp('_Name')
names.append(name)
molH = Chem.AddHs(mol)
print(f"\n-------- ANALYSIS FOR MOL {name} ---------\n")
print(
"------ NO HYDROGENS -------",
"\n#roBonds strict:",Chem.rdMolDescriptors.CalcNumRotatableBonds(mol,strict=True),
"\n#roBonds non-strict:",Chem.rdMolDescriptors.CalcNumRotatableBonds(mol),
"\n#LHBD noHs:",Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol),
"\n#LHBA noHs:",Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol),
"\nTPSA:", np.round(molds.CalcTPSA(mol), 3),
"\nLASA:",np.round(molds.CalcLabuteASA(mol), 3),
"\n------ ADD HYDROGENS -------",
"\n#roBonds strict:",Chem.rdMolDescriptors.CalcNumRotatableBonds(molH,strict=True),
"\n#roBonds non-strict:",Chem.rdMolDescriptors.CalcNumRotatableBonds(molH),
"\n#LHBD:",Chem.rdMolDescriptors.CalcNumLipinskiHBD(molH),
"\n#LHBA:",Chem.rdMolDescriptors.CalcNumLipinskiHBA(molH),
"\nTPSA:", np.round(molds.CalcTPSA(molH), 3),
"\nLASA:", np.round(molds.CalcLabuteASA(molH), 3))
#mol.SetProp('_Name','Testing')
# Find groups of contiguous rotatable bonds in mol
print("\nRotors noH")
bond_groups = find_bond_groups(mol)
print("\nRotors add Hydrogens")
bond_groups_H =find_bond_groups(molH)
print("")
# As bond groups are sorted by decreasing size, the size of the first group (if any)
# is the largest number of contiguous rotatable bonds in mol
largest_n_cont_rot_bonds = len(bond_groups[0]) if bond_groups else 0
roBoNoH.append(Chem.rdMolDescriptors.CalcNumRotatableBonds(mol))
roBoH.append(Chem.rdMolDescriptors.CalcNumRotatableBonds(molH))
displayRDKit2D(mol)
for m in btx_lib:
test_descriptors(m)
And the output for the trifluoro version of betrixaban:
As you can notice, the number of rotatable bonds is now increased by 3, and those 3 bonds are, in this case, referred to the two N-C bonds in the amidine head, and the O-C bond in the methoxy group of the scaffold respectively. I am not sure if this can be considered a bug or eventually a feature request. Best, |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Hi @rubbs14,
This is expected behavior. The num_rotatable_bonds descriptor, like many of RDKit's other "2D" functions, is assuming that you are operating on a molecule which does not have Hs in the graph. |
Beta Was this translation helpful? Give feedback.
Hi @rubbs14,
Here's a much simpler version of what you're asking about:
This is expected behavior. The num_rotatable_bonds descriptor, like many of RDKit's other "2D" functions, is assuming that you are operating on a molecule which does not have Hs in the graph.
Unless you have a very particular reason for not doing so, I do suggest making sure you've either called RemoveHs() or have parsed the molecule without changing the default options (which remove Hs).