Skip to content

Commit

Permalink
Merge pull request #552 from NadineSchneider/CanonicalizationPaper_Au…
Browse files Browse the repository at this point in the history
…g2015

Canonicalization paper Aug2015
  • Loading branch information
greglandrum committed Aug 7, 2015
2 parents 4efed71 + bb60a3d commit 95623b7
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 32 deletions.
12 changes: 11 additions & 1 deletion Code/GraphMol/SmilesParse/SmilesWrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,16 @@ namespace RDKit{
std::vector<unsigned int> atomOrdering;

if(canonical){
Canon::rankMolAtoms(*tmol,ranks,true,doIsomericSmiles,doIsomericSmiles);
if(tmol->hasProp("_canonicalRankingNumbers")){
for(unsigned int i=0;i<tmol->getNumAtoms();++i){
unsigned int rankNum;
tmol->getAtomWithIdx(i)->getPropIfPresent("_canonicalRankingNumber",rankNum);
ranks[i]=rankNum;
}
}
else{
Canon::rankMolAtoms(*tmol,ranks,true,doIsomericSmiles,doIsomericSmiles);
}
} else {
for(unsigned int i=0;i<tmol->getNumAtoms();++i) ranks[i]=i;
}
Expand All @@ -401,6 +410,7 @@ namespace RDKit{
}
#endif


std::vector<Canon::AtomColors> colors(nAtoms,Canon::WHITE_NODE);
std::vector<Canon::AtomColors>::iterator colorIt;
colorIt = colors.begin();
Expand Down
21 changes: 19 additions & 2 deletions Code/GraphMol/hanoitest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,22 @@ std::string smis[]={
"CCCCCCCCCCCCCCCCCCNC(=O)OC[C@H]1C[C@H]([C@@H2]OC(=O)N(Cc2cccc[n+]2CC)C(C)=O)C1.[I-]",
//CHEMBL1172371
"CC.CCCCCCCCCC(C(=O)NCCc1ccc(OP(=S)(Oc2ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc2)N(C)/N=C/c2ccc(OP3(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=NP(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)(Oc4ccc(/C=N/N(C)P(=S)(Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)Oc5ccc(CCNC(=O)C(CCCCCCCCC)P(=O)([O-])O)cc5)cc4)=N3)cc2)cc1)P(=O)([O-])O.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO.CCCCCCCCCCCCCCCC[NH2+]OC(CO)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO",
//Examples first reviewer
"C12C3C4C1C3C1C3C4C1C23", // does not initially work
"C12C3C1C1C4C5C(C23)C5C14",
"C12C3C4C5C1C1C4C5C3C21",
"C12C3C4C1C2C1C2C4C2C31",
"C12C3C4C5C2C2C6C1C(C5C36)C42",
"C12C3C4C5C1C1C3C3C5C1C2C43",
"C12C3C4C5C6C1C1C7C3C3C5C1C1C6C3C2C7C41", // does not initially work
"C12C3C4C5C6C1C1C7C3C3C6C6C4C7C2C3C5C16",
"C12C3C4C5C6C7C8C9C1C6C1C(C37)C9C5C2C8C41",
"C12C3C4C5C6C7C1C1C8C4C4C9C2C2C5C5C1C1C3C(C7C45)C2C8C6C91", // does not initially work
"C12C3C4C5C6C7C1C1C8C4C4C7C7C3C3C8C8C2C2C5C3C4C(C1C72)C68",
"C12C3C4C5C6C7C8C1C1C9C5C5C%10C2C2C%11C%12C%13C3C3C7C%10C7C4C%11C1C3C(C5C8%12)C(C62)C7C9%13", // does not initially work
//drawn examples first reviewer
"C12C3C4C1CC5C46C7C5C1C57C6C53C1C2",
"C1C2C3C4CC5C6C1C17C8C61C5C48C3C27",
"EOS"
};

Expand Down Expand Up @@ -1358,10 +1374,11 @@ int main(){
test9();
test10();
test11();
test8();
test7();
test12();
test7();
test8();
#endif

return 0;
}

80 changes: 54 additions & 26 deletions Code/GraphMol/new_canon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,21 @@ namespace RDKit {
}
}

void compareRingAtomsConcerningNumNeighbors(Canon::canon_atom *atoms, unsigned int nAtoms) {

void compareRingAtomsConcerningNumNeighbors(Canon::canon_atom *atoms, unsigned int nAtoms, const ROMol &mol) {
RingInfo *ringInfo=mol.getRingInfo();
if(!ringInfo->isInitialized()){
ringInfo->initialize();
}
for(unsigned idx = 0; idx < nAtoms; ++idx){
const Canon::canon_atom &a = atoms[idx];
if(ringInfo->numAtomRings(a.atom->getIdx()) < 1){
continue;
}
std::deque<int> neighbors;
neighbors.push_back(idx);
unsigned currentRNIdx=0;
atoms[idx].neighborNum.reserve(1000);
atoms[idx].revistedNeighbors.assign(1000,0);
char *visited=(char *)malloc(nAtoms*sizeof(char));
memset(visited,0,nAtoms*sizeof(char));
unsigned count = 1;
Expand All @@ -78,16 +88,20 @@ namespace RDKit {
memset(lastLevelNbrs,0,nAtoms*sizeof(char));
char *currentLevelNbrs=(char *)malloc(nAtoms*sizeof(char));
memset(currentLevelNbrs,0,nAtoms*sizeof(char));
int *revisitedNeighbors=(int *)malloc(nAtoms*sizeof(int));
memset(revisitedNeighbors,0,nAtoms*sizeof(int));
while(!neighbors.empty()){
unsigned int revisitedNeighbors=0;
unsigned int numLevelNbrs=0;
nextLevelNbrs.resize(0);
while(!neighbors.empty()){
int nidx = neighbors.front();
neighbors.pop_front();
const Canon::canon_atom &atom = atoms[nidx];
if(ringInfo->numAtomRings(atom.atom->getIdx()) < 1){
continue;
}
lastLevelNbrs[nidx]=1;
visited[nidx]=1;
neighbors.pop_front();
for(unsigned int j=0; j<atom.degree; j++){
int iidx = atom.nbrIds[j];
if(!visited[iidx]){
Expand All @@ -104,7 +118,7 @@ namespace RDKit {
for(unsigned int k=0; k<natom.degree; k++){
int jidx = natom.nbrIds[k];
if(currentLevelNbrs[jidx] || lastLevelNbrs[jidx]){
revisitedNeighbors+=1;
revisitedNeighbors[jidx]+=1;
}
}
}
Expand All @@ -116,25 +130,36 @@ namespace RDKit {
}
}
memset(currentLevelNbrs,0,nAtoms*sizeof(char));
if(revisitedNeighbors < 10){
atoms[idx].revistedNeighbors = (atoms[idx].revistedNeighbors*10) + revisitedNeighbors;
std::vector<int>tmp;
tmp.reserve(30);
for(unsigned i=0; i<nAtoms; ++i){
if(revisitedNeighbors[i]>0){
tmp.push_back(revisitedNeighbors[i]);
}
}
else{
atoms[idx].revistedNeighbors = (atoms[idx].revistedNeighbors*100) + revisitedNeighbors;
std::sort(tmp.begin(),tmp.end());
tmp.push_back(-1);
for(unsigned i=0; i<tmp.size(); ++i){
if (currentRNIdx >= atoms[idx].revistedNeighbors.size()){
atoms[idx].revistedNeighbors.resize(atoms[idx].revistedNeighbors.size()+1000);
}
atoms[idx].revistedNeighbors[currentRNIdx] = tmp[i];
currentRNIdx++;
}
memset(revisitedNeighbors,0,nAtoms*sizeof(int));

atoms[idx].neighborNum.push_back(numLevelNbrs);
atoms[idx].neighborNum.push_back(-1);

if(numLevelNbrs < 10){
atoms[idx].neighborNum = (atoms[idx].neighborNum*10) +numLevelNbrs;
}
else{
atoms[idx].neighborNum = (atoms[idx].neighborNum*100) +numLevelNbrs;
}
neighbors.insert(neighbors.end(),nextLevelNbrs.begin(),nextLevelNbrs.end());
count++;
}
atoms[idx].revistedNeighbors.resize(currentRNIdx);

free(visited);
free(currentLevelNbrs);
free(lastLevelNbrs);
free(revisitedNeighbors);
}
}

Expand Down Expand Up @@ -199,28 +224,31 @@ namespace RDKit {
}
ties=false;
unsigned symRingAtoms=0;
unsigned countCls=0;
unsigned ringAtoms=0;
bool branchingRingAtom=false;
RingInfo *ringInfo=mol.getRingInfo();
if(!ringInfo->isInitialized()){
ringInfo->initialize();
}
for(unsigned i=0; i<nAts; ++i){
if(ringInfo->numAtomRings(ftor.dp_atoms[i].atom->getIdx()) > 1){
if(count[i] != 1){
symRingAtoms += 1;
if(ringInfo->numAtomRings(order[i])){
if(count[order[i]] > 2){
symRingAtoms+=count[order[i]];
}
ringAtoms++;
if(ringInfo->numAtomRings(order[i]) > 1 && count[order[i]] > 1){
branchingRingAtom = true;
}
}
if(count[i]){
countCls++;
}
else{
if(!count[i]){
ties=true;
}

}
unsigned int nAts2 = atomsInPlay ? atomsInPlay->count() : nAts;
if(useSpecial && ties && static_cast<float>(countCls)/nAts2 < 0.5 && symRingAtoms>0){
// std::cout << " " << ringAtoms << " " << symRingAtoms << std::endl;
if(useSpecial && ties && ringAtoms > 0 && static_cast<float>(symRingAtoms)/ringAtoms > 0.5 && branchingRingAtom){
SpecialSymmetryAtomCompareFunctor sftor(atoms,mol,atomsInPlay,bondsInPlay);
compareRingAtomsConcerningNumNeighbors(atoms, nAts);
compareRingAtomsConcerningNumNeighbors(atoms, nAts, mol);
ActivatePartitions(nAts,order,count,activeset,next,changed);
RefinePartitions(mol,atoms,sftor,true,order,count,activeset,next,changed,touched);
#ifdef VERBOSE_CANON
Expand Down
6 changes: 3 additions & 3 deletions Code/GraphMol/new_canon.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ namespace RDKit {
bool isRingStereoAtom;
int* nbrIds;
const std::string *p_symbol; // if provided, this is used to order atoms
unsigned int neighborNum;
unsigned int revistedNeighbors;
std::vector<int> neighborNum;
std::vector<int> revistedNeighbors;
std::vector<bondholder> bonds;

canon_atom() : atom(NULL),index(-1),degree(0),totalNumHs(0),
hasRingNbr(false), isRingStereoAtom(false), nbrIds(NULL),
p_symbol(NULL), neighborNum(0), revistedNeighbors(0) {};
p_symbol(NULL) {};
};

void updateAtomNeighborIndex(canon_atom* atoms, std::vector<bondholder> &nbrs);
Expand Down

0 comments on commit 95623b7

Please sign in to comment.