Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bamtools sort #7

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
33 changes: 29 additions & 4 deletions src/api/BamMultiReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,16 +381,41 @@ bool BamMultiReader::SetRegion(const int& leftRefID,
return d->SetRegion(region);
}

/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order)
/*! \fn void BamMultiReader::SetSortOrder(const BamSortCriteria& sort)
\brief Sets the expected sorting order for reading across multiple BAM files.

Default is BamMultiReader::SortedByPosition.
Default is BamMultiReader::SortedByPosition and sorting ascending.

The SortOrder determines how the reader determines which alignment is "next"
from among its open readers.

\param sort Sorting object that specifies the expected sort order and direction
*/
void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) {
d->SetSortOrder(sort);
}

/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending)
\brief Sets the expected sorting order for reading across multiple BAM files.

Default is BamMultiReader::SortedByPosition and sorting ascending.

The SortOrder determines how the reader determines which alignment is "next"
from among its open readers.

\param order expected sort order
\param ascending sorting direction
*/
void BamMultiReader::SetSortOrder(const SortOrder& order) {
d->SetSortOrder(order);
void BamTools::BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending)
{
string sortCol="";
switch(order){
case BamMultiReader::SortedByAlignmentScore:sortCol="AS";break;
case BamMultiReader::SortedByPosition:sortCol="POS";break;
case BamMultiReader::SortedByReadName: sortCol="QNAME";break;
case BamMultiReader::Unsorted:sortCol="";break;
default:cerr<<"BamMultiReader.cpp ERROR: The selected sort order is not known\n";
}
BamSortCriteria sort(sortCol, !ascending);
d->SetSortOrder(sort);
}
13 changes: 8 additions & 5 deletions src/api/BamMultiReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,24 @@

#include <api/api_global.h>
#include <api/BamReader.h>
#include <api/BamSortCriteria.h>
#include <map>
#include <sstream>
#include <string>
#include <utility>

namespace BamTools {

class BamSortCriteria;
namespace Internal {
class BamMultiReaderPrivate;
} // namespace Internal

class API_EXPORT BamMultiReader {

public:
enum SortOrder { SortedByPosition = 0
, SortedByReadName
enum SortOrder { SortedByPosition =0
, SortedByReadName
, SortedByAlignmentScore
, Unsorted
};

Expand Down Expand Up @@ -78,8 +80,9 @@ class API_EXPORT BamMultiReader {
bool GetNextAlignmentCore(BamAlignment& alignment);

// sets the expected sorting order for reading across multiple BAM files
void SetSortOrder(const SortOrder& order);

void SetSortOrder(const BamSortCriteria& sort);

void SetSortOrder(const SortOrder& order, const bool& ascending =true);
// ----------------------
// access auxiliary data
// ----------------------
Expand Down
119 changes: 119 additions & 0 deletions src/api/BamSortCriteria.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#include "BamSortCriteria.h"
#include <algorithm>
#include <api/internal/BamMultiReader_p.h>
#include <api/SamConstants.h>

using namespace BamTools;
const string BamSortCriteria::allowedTags[3]={"QNAME","POS","AS"};
const string BamSortCriteria::coreTags[1]={"POS"};




bool BamSortCriteria::isTagCoreAttribute() {
int length = sizeof(coreTags)/sizeof(coreTags[0]);
for(int i=0;i<length;i++){
if(sortCriteria == coreTags[i]){
return true;
}
}
return false;
}

string BamSortCriteria::getAllowedTags(){
string s;
int length = sizeof(allowedTags)/sizeof(allowedTags[0]);
s = allowedTags[0];
for(int i=1;i<length;i++){
s+=", "+ allowedTags[i];
}
return s;
}

bool BamSortCriteria::isTagValid(const string& tag) {
int length = sizeof(allowedTags)/sizeof(allowedTags[0]);
for(int i=0;i<length;i++){
if(tag.compare(allowedTags[i])==0){
return true;
}
}
return false;
}

bool BamSortCriteria::isTagValid() {
return isTagValid(sortCriteria);
}



IBamMultiMerger* BamSortCriteria::getMerger(void ) {

if(descending){
if (sortCriteria=="QNAME") {
return new CommonMultiMerger<SortReaderAlignment<SortName<greater<string> > > >();
} else if (sortCriteria=="POS") {
return new CommonMultiMerger<SortReaderAlignment<SortPosition<greater<int32_t> > > >();
} else if (sortCriteria=="AS") {
return new CommonMultiMerger<SortReaderAlignment<SortAlignmentScore<greater<int32_t> > > >();
} else if (sortCriteria == "") {
return new UnsortedMultiMerger;
}
}else{
if (sortCriteria=="QNAME") {
return new CommonMultiMerger<SortReaderAlignment<SortName<less<string> > > >();
} else if (sortCriteria=="POS") {
return new CommonMultiMerger<SortReaderAlignment<SortPosition<less<int32_t> > > >();
} else if (sortCriteria=="AS") {
return new CommonMultiMerger<SortReaderAlignment<SortAlignmentScore<less<int32_t> > > >();
} else if (sortCriteria == "") {
return new UnsortedMultiMerger;
}
}
cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl;
return 0;
}


void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterator end) {
/**
* It seems like this step could not be simplified for the
* ascending and descending case
*/
if (!descending ) {
if (sortCriteria=="QNAME") {
sort(begin,end,SortName<less<string> >());
} else if (sortCriteria=="POS") {
sort(begin,end,SortPosition<less<int32_t> >());
} else if (sortCriteria=="AS") {
sort(begin,end,SortAlignmentScore<less<int32_t> >());
} else {
cerr << "BamMultiReader ERROR: requested sort order ("<<sortCriteria<<")is unknown" << endl;
}

} else {
if (sortCriteria=="QNAME") {
sort(begin,end,SortName<greater<string> >());
} else if (sortCriteria=="POS") {
sort(begin,end,SortPosition<greater<int32_t> >());
} else if (sortCriteria=="AS") {
sort(begin,end,SortAlignmentScore<greater<int32_t> >());
} else {
cerr << "BamMultiReader ERROR: requested sort order ("<<sortCriteria<<")is unknown" << endl;
}
}
}



string BamSortCriteria::getSamHeaderSort(){
if(sortCriteria=="QNAME"){
return Constants::SAM_HD_SORTORDER_QUERYNAME;
}else if(sortCriteria =="POS"){
return Constants::SAM_HD_SORTORDER_COORDINATE;
}else if(sortCriteria =="AS"){
return Constants::SAM_HD_SORTORDER_ALIGNMENTSCORE;
}
cerr<<"bamtools sort ERROR: Sort criteria " << sortCriteria <<" could not be found"<<endl;
return "";
}

121 changes: 121 additions & 0 deletions src/api/BamSortCriteria.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#ifndef BAMSORTCRITERIA_H
#define BAMSORTCRITERIA_H

#include <api/BamAlignment.h>
#include <vector>
#include <api/internal/BamMultiMerger_p.h>

using namespace std;


using namespace BamTools::Internal;
namespace BamTools {

typedef binary_function<BamAlignment,BamAlignment,bool> BamAlignmentBFunction;
typedef binary_function<ReaderAlignment,ReaderAlignment,bool> ReadAlignmentBFunction;
typedef vector< BamAlignment >::iterator BamAlignmentIterator;


class BamSortCriteria{
private:
string sortCriteria;
bool descending;
static const string allowedTags[];
static const string coreTags[];
public:
static string getAllowedTags();
BamSortCriteria():sortCriteria("QNAME"),descending(false){}


BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){
if(!isTagValid()){
cerr << "BamSortCriteria ERROR: Requested sort order ("<<sortCriteria << ") is unknown. Valid tags are: "<<getAllowedTags()<<"\n";
}
}

string getSortCriteria(){return sortCriteria;};
bool isDescending(){return descending;};
void sortBuffer(BamAlignmentIterator begin,BamAlignmentIterator end);
IBamMultiMerger* getMerger(void);
string getSamHeaderSort();
bool isTagCoreAttribute();
bool isTagValid();
static bool isTagValid(const string& crit);

};




/**
* Classes that extend the binary_function and have a operator() function to compare two
* BamAlignment for sorting. This class has to be created if bammtools should sort for other
* criterias
**/

// POS
template<typename COMP>
class SortPosition : public binary_function<BamAlignment, BamAlignment, bool >{
public:
bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
COMP c;
if ( lhs.RefID != rhs.RefID ){
return c(lhs.RefID, rhs.RefID);
}else {
int32_t lh,rh;
lh = lhs.Position;
rh = rhs.Position;
return c(lh,rh);
}
}
};


// QNAME
template<typename COMP>
class SortName : public binary_function<BamAlignment, BamAlignment, bool> {
public:
bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) {
COMP c;
return c(lhs.Name, rhs.Name);
}
};

// AS Alignment Score from BFAST

template<typename COMP>
class SortAlignmentScore : public BamAlignmentBFunction{
public:
bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const {
int32_t lh, rh;
lhs.GetTag("AS",lh);
rhs.GetTag("AS",rh);

COMP c;
return c(lh,rh);
}
};
// printf("%d %d %d %d\n",lh,rh,lhs.Position,rhs.Position);
// return c(lhs.Position, rhs.Position);



//--------------------------------------------------------
/**
* The two classes are used for a descending search as they flip the conditions of the defined
* SortLessThan* classes. One is used for the Reader and th other for the BamAlignment
**/
template<typename T>
class SortReaderAlignment: public binary_function<ReaderAlignment, ReaderAlignment, bool>{
public:
bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){
T t;
const BamAlignment l= *lhs.second;
const BamAlignment r= *rhs.second;
return t(l,r);
}
};

}

#endif // BAMSORTCRITERIA_H
2 changes: 2 additions & 0 deletions src/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set( BamToolsAPISources
BamMultiReader.cpp
BamReader.cpp
BamWriter.cpp
BamSortCriteria.cpp
SamHeader.cpp
SamReadGroup.cpp
SamReadGroupDictionary.cpp
Expand Down Expand Up @@ -66,6 +67,7 @@ ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir})
ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir})
ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir})
ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir})
ExportHeader(APIHeaders BamSortCriteria.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir})
ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir})
Expand Down
1 change: 1 addition & 0 deletions src/api/SamConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ const std::string SAM_CO_BEGIN_TOKEN = "@CO";
// HD:SO values
const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate";
const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname";
const std::string SAM_HD_SORTORDER_ALIGNMENTSCORE = "alignmentscore";
const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted";

// HD:GO values
Expand Down