Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade hinformat.cpp InterX #2661

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
187 changes: 172 additions & 15 deletions src/formats/hinformat.cpp
Expand Up @@ -23,7 +23,28 @@ GNU General Public License for more details.

#include <cstdlib>

#include <iostream>
#include <openbabel/generic.h>

using namespace std;
char* strcat_mem(const char *str1, const char *str2) {
char* result;
int l2 = strlen(str2) ;
if (!str1) {
result = (char*) malloc(l2+1);
memcpy(result, str2, l2) ;
result[l2] = '\0' ;
return result;
}
int l1 = strlen(str1) ;
result = (char*) malloc(l1 + l2 + 1);
if(!result) return result;
memcpy(result, str1, l1) ;
memcpy(result + l1, str2, l2 );
result[l1 + l2] = '\0' ;
return result;
}

namespace OpenBabel
{

Expand All @@ -34,16 +55,28 @@ namespace OpenBabel
HINFormat()
{
OBConversion::RegisterFormat("hin",this, "chemical/x-hin");
OBConversion::RegisterOptionParam("r", this, 0, OBConversion::INOPTIONS);
OBConversion::RegisterOptionParam("T", this, 0, OBConversion::INOPTIONS); // "t"-option is already registered in fastsearchformat.cpp
OBConversion::RegisterOptionParam("c", this, 0, OBConversion::INOPTIONS);
OBConversion::RegisterOptionParam("c", this, 0, OBConversion::OUTOPTIONS);
OBConversion::RegisterOptionParam("l", this, 0, OBConversion::OUTOPTIONS);
OBConversion::RegisterOptionParam("u", this, 0, OBConversion::OUTOPTIONS);
}

const char* Description() override // required
{
return "HyperChem HIN format\n"
"No comments yet\n";
"Read Options e.g. -aR -aT\n"
" R Persive residue information (atom label, resname, chainID) automatically, ignoring info parsed from HIN. Works for PROTEIN only.\n"
" T Read atom Types from HIN (glitchy, because convertion formats usually Translate() types using predifined ttab).\n"
" By default, atom types from HIN are ignored, then auto persived at the converted format Write routine (e.g. to mol2 types)\n"
" C Parse pair-formatted comments (fmt: ;field-name value) to OBPairData type, otherwise multi-line Generic OBCommentData\n\n";
}

const char* SpecificationURL() override
{ return ""; } // optional
{ return "\nHyperChem7 Manual - Appendix-D HIN files: http://www.chemistry-software.com/pdf/Hyperchem_full_manual.pdf\nOr at\nhttps://wiki.jmol.org/index.php/File_formats/Formats/HIN"; }





const char* GetMIMEType() override
{ return "chemical/x-hin"; } // optional
Expand All @@ -55,7 +88,7 @@ namespace OpenBabel
bool WriteMolecule(OBBase* pOb, OBConversion* pConv) override;
};
//***

//Make an instance of the format class
HINFormat theHINFormat;

Expand All @@ -79,29 +112,82 @@ namespace OpenBabel
string str,str1;
double x,y,z;
OBAtom *atom;
vector<string> vs;
vector<string> vs,line;
char *comment = nullptr; // IL Modified
int len;

ifs.getline(buffer, BUFF_SIZE);
while (ifs.good() && (strstr(buffer, "mol") == nullptr || buffer[0] == ';')) //The "mol" in comment line should be ignored.
{
if ( buffer[0] == ';' )
{
if (comment) {
comment = strcat_mem(comment, "\n");
}
comment = strcat_mem(comment, buffer + 1);
}
ifs.getline(buffer, BUFF_SIZE);
if (ifs.peek() == EOF || !ifs.good())
return false;
}

tokenize(vs,buffer);

char *mol_title = nullptr;
if ( strcmp((char*)vs[0].c_str(), "mol") == 0 ) {
if (vs.size() == 3){ // Don't really know how long it'll be
mol_title = strcat_mem(mol_title, (char*)vs[2].c_str());
}
}

ifs.getline(buffer, BUFF_SIZE);
if (!ifs.good())
return false; // ended early

// We need to prevent chains perception routines from running while
// we are adding residues from the PDB file
mol.SetChainsPerceived();

mol.BeginModify();

while (ifs.good() && strstr(buffer, "endmol") == nullptr)
{
if(buffer[0]==';'){
ifs.getline(buffer, BUFF_SIZE);
continue; //The comment Line in HIN should be ignored.
}
if(buffer[0]==';'){
// Push comments to mol.SetData using OBCommentData *cd
if (comment) {
comment = strcat_mem(comment, "\n");
}
comment = strcat_mem(comment, buffer + 1);

ifs.getline(buffer, BUFF_SIZE);
continue; //The comment Line in HIN should be ignored.
}

tokenize(vs,buffer);

// HIN Format Specification is in HyperChem7 manual, AppendixD: https://wiki.jmol.org/index.php/File_formats/Formats/HIN
if (vs.size() < 3) // Don't really know how long it'll be
{
ifs.getline(buffer, BUFF_SIZE);
continue;
}
OBResidue *res = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : nullptr;
if ( strcmp((char*)vs[0].c_str(), "res") == 0 ) {
string resnum = (char *)vs[1].c_str();
string resname = (char *)vs[2].c_str();
res = mol.NewResidue();
res->SetName(resname);
res->SetNum(resnum);
char chain;
if (vs.size() >= 6) {
chain = (char)vs[5].c_str()[0];
res->SetChain(chain);
}
ifs.getline(buffer, BUFF_SIZE);
continue;
}

tokenize(vs,buffer); // Don't really know how long it'll be
if (vs.size() < 11)
if (vs.size() < 11) // Don't really know how long it'll be
{
ifs.getline(buffer, BUFF_SIZE);
continue;
Expand Down Expand Up @@ -138,6 +224,21 @@ namespace OpenBabel
}
mol.AddBond(mol.NumAtoms(), atoi((char *)vs[i].c_str()), bo);
}

atom->SetFormalCharge(atof(vs[6].c_str())); // Unlike HyperChem, InterX uses vs[6] for a formal charge not a patial charge as set above (atom->SetPartialCharge)
if (pConv->IsOption("T", OBConversion::INOPTIONS) != nullptr){
atom->SetType((char *)vs[4].c_str());; // Set Types only if auto preception is not forced by "T" option, otherwise ttab.Translate is used, like in MOL2.
}

// Add Default Residue Info if it was not defined in HIN
if (res == nullptr) {
res = mol.NewResidue();
res->SetName("UNL");
res->SetNum("1");
}
res->AddAtom(atom);
res->SetSerialNum(atom, atoi((char*)vs[1].c_str()));
res->SetAtomID(atom, (char *)vs[2].c_str());
ifs.getline(buffer, BUFF_SIZE);
}

Expand All @@ -153,12 +254,53 @@ namespace OpenBabel
while(strlen(buffer) == 0 && !ifs.eof() );
ifs.seekg(ipos);


mol.EndModify();

mol.SetTitle(title);
mol.SetTitle(title); // Use filename as Title
if (mol_title) {
mol.SetTitle(mol_title); // Use molecule name parsed from HIN
}

mol.SetPartialChargesPerceived();
if (pConv->IsOption("R", OBConversion::INOPTIONS) == nullptr){
mol.SetChainsPerceived();
}
if (pConv->IsOption("T", OBConversion::INOPTIONS) != nullptr){
mol.SetAtomTypesPerceived(); // Mark as percieved (will use types parsed from HIN), unless forced by the "T" option which will auto pecieve types.
}

// Parse comments to OBPairData type if read option -C or to Generic OBCommentData
if (comment)
{
if (pConv->IsOption("C", OBConversion::INOPTIONS) != nullptr)
{
tokenize(line, comment,"\n");
comment[0] = '\0'; // Empty the char array by defining its 1st element as end of string
for (i = 0; i < line.size(); i+=1)
{
tokenize(vs,line[i]);
if (vs.size() == 2)
{
OBPairData *pd = new OBPairData;
pd->SetAttribute(vs[0]);
pd->SetValue(vs[1]);
pd->SetOrigin(fileformatInput);
mol.SetData(pd);
}else{
if (comment) {
comment = strcat_mem(comment, "\n");
}
comment = strcat_mem(comment, (char *)line[i].c_str() );
}
}
}
// Use the generic single multi-line comment
//must add generic data after end modify - otherwise it will be blown away
OBCommentData *cd = new OBCommentData;
cd->SetData(comment);
cd->SetOrigin(fileformatInput);
mol.SetData(cd);
}
return(true);
}

Expand All @@ -182,21 +324,36 @@ namespace OpenBabel
vector<OBBond*>::iterator j;
char bond_char;

// Dump CommentData
if (mol.HasData(OBGenericDataType::CommentData))
{
OBCommentData *cd = (OBCommentData*)mol.GetData(OBGenericDataType::CommentData);
ofs << cd->GetData() << endl;
}

// make sure to escape titles in double quotes
// PR#1501694
ofs << "mol " << file_num << " \"" << mol.GetTitle() << "\"\n";

for(i = 1;i <= mol.NumAtoms(); i++)
{
atom = mol.GetAtom(i);
snprintf(buffer, BUFF_SIZE, "atom %d - %-3s ** - %8.5f %8.5f %8.5f %8.5f %d ",
OBResidue *res = atom->GetResidue();
char* atype = atom->GetType();
if (!atype){
atype = (char*)"**";
}
snprintf(buffer, BUFF_SIZE, "atom %d %s %-3s %s - %8.5f %8.5f %8.5f %8.5f %d ",
i,
(char*)res->GetAtomID(atom).c_str(),
OBElements::GetSymbol(atom->GetAtomicNum()),
atype,
atom->GetPartialCharge(),
atom->GetX(),
atom->GetY(),
atom->GetZ(),
atom->GetExplicitDegree());

ofs << buffer;
for (bond = atom->BeginBond(j); bond; bond = atom->NextBond(j))
{
Expand Down