您的位置:首页 > 编程语言 > C语言/C++

【生物信息学】使用genome 作为ref时,由bam格式 or pileup格式 计算 depth 的 cpp程序

2014-01-22 02:42 357 查看
主要是首次测试 map引用指针,构建多重map。
main.cpp

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <set>
#include <cmath>
#include <inttypes.h>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include "depth.h"

using namespace std;

void usage()
{
cout << "./depth_pileup Sample.pileup <genome.fa.fai> <output> " << endl;
cout << " -d depth for count; sep by comma " << endl;
cout << " -h get help information" << endl;
cout << "Usage: "<< endl;
cout << " samtools -O -f <genome.fa> Sample.sort.bam >Sample.pileup "<< endl;
cout << " ./depth_pileup -d 0,4,9 Sample.pileup <genome.fa.fai> <output> " << endl;

exit (0);
}

int main(int argc, char *argv[])
{
int c;
vector<int> Depth;
char *p;
while ( (c=getopt(argc,argv,"d:h")) != -1 )
{
switch(c)
{
case 'd' :
p = optarg;
break;
case 'h' :
usage();break;
default : usage();
}
}
if (argc < 3 ) usage();

stringstream ss;
ss << p;
string s = ss.str();

vector<string> Indepth;
boost::split(Indepth,s,boost::is_any_of(","),boost::token_compress_on);

for ( int i=0; i<Indepth.size(); ++i){
Depth.push_back(boost::lexical_cast<int>(Indepth[i]));
}
sort(Depth.begin(),Depth.end());

string sam_file = argv[optind++];
string fai_file = argv[optind++];

map<string,int> ChrSize;
map<string,int> ChrDepth;
map<string,map<int,int> > Chr_Dep_Cnt;

load_fai(fai_file,ChrSize);

ifstream infile;
infile.open(sam_file.c_str());
if (! infile )
{
cerr << "fail to open input file " << sam_file << endl;
}
string lineStr;
int total_line=0;

while ( getline(infile,lineStr,'\n' ))
{
vector<string> lineVec;
total_line++;
boost::split(lineVec,lineStr,boost::is_any_of("\t"),boost::token_compress_on);
ChrDepth[lineVec[0]] += boost::lexical_cast<int>(lineVec[3]);
for ( int i=0; i<Depth.size(); ++i){
if (Depth[i] <= boost::lexical_cast<int>(lineVec[3]) ){
Chr_Dep_Cnt[lineVec[0]][Depth[i]] += 1;
}
else {
Chr_Dep_Cnt[lineVec[0]][Depth[i]] += 0;
}
}
}
infile.close();

cout << "#Chr\tTotal_length\tCovered-sites\tTotal-covered-bases\tmean-Depth";
for ( int i=0;i<Depth.size();++i){
cout << "\tabove_" << Depth[i] << "x";
}
cout << endl;
map<string,map<int,int> >::iterator multi_tr;
map<int,int>::iterator inter_tr;
for ( multi_tr=Chr_Dep_Cnt.begin(); multi_tr!=Chr_Dep_Cnt.end(); multi_tr++ ){
double mean = boost::lexical_cast<double>(ChrDepth[multi_tr->first]) / boost::lexical_cast<double>(total_line);

cout << multi_tr->first << "\t" << ChrSize[multi_tr->first] << "\t" << total_line << "\t" << ChrDepth[multi_tr->first] << "\t" << mean << "\t";
for ( inter_tr=multi_tr->second.begin(); inter_tr!=multi_tr->second.end();inter_tr++ ){
cout << "\t" << inter_tr->second;
}
cout << endl;
}
}

depth.h

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <set>
#include <cmath>
#include <inttypes.h>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>

void load_fai(std::string &matrix_file,std::map<std::string,int> &GenSize);
depth.cpp

#include "depth.h"
using namespace std;
void load_fai(string &matrix_file, map<string,int> &GenSize){
ifstream infile;
infile.open(matrix_file.c_str());
if (! infile ) {
cerr << "fail to open input file " << matrix_file << endl;
exit(0);
}
string lineStr;

while (getline(infile,lineStr,'\n')){
if (lineStr[0] == ' ' || lineStr[0] == '\n'){
continue;
}

vector<string> lineVec;
boost::split(lineVec,lineStr, boost::is_any_of(":, \t\n"), boost::token_compress_on);
GenSize[lineVec[0]] = boost::lexical_cast<int>(lineVec[1]);
}
infile.close();
}

Makefile

cc=g++
exe=depth_pileup
obj=main.o depth.o

$(exe):$(obj)
$(cc) -o $(exe) $(obj)
main.o:main.cpp depth.h
$(cc) -c main.cpp
depth.o:depth.cpp depth.h
$(cc) -c depth.cpp
clean:
rm -rf *.o $(exe)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  C++ 生物