pysam

2021-05-26  本文已影响0人  rong酱
#!/usr/bin/env python
# -*- coding:utf-8 -*-

unsplit_file = "/unsplit.bam"
out_dir = "/t11/b2/"

import pysam
import sys
import os
import re


itr = 0
CB_hold = 'unset'

samfile = pysam.AlignmentFile(unsplit_file, "rb")

for read in samfile.fetch(until_eof=True):
    if 'CB' in str(read):
        print("read:" + str(read))
        CB_itr = read.get_tag('CB')
        print("CB_itr : "+str(CB_itr))
        if( CB_itr!=CB_hold or itr==0):
            if(itr!=0):
                split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
                split_file.close()     
            CB_hold = CB_itr
            itr = itr + 1
            split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,itr), "wb", template=samfile)
            split_file.write(read) 

split_file.close()
samfile.close()

优化

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pysam
import sys
import os
import re
import threading
unsplit_file = sys.argv[1]
out_dir = sys.argv[2]
def run(unsplit_file,out_dir):
   CB_hold = 'unset'
   samfile = pysam.AlignmentFile(unsplit_file, "rb")
   for read in samfile.fetch(until_eof=True):
       if 'CB' in str(read):  # 过滤不含有CB
           CB_itr = read.get_tag('CB')
           split_file = pysam.AlignmentFile('%s/CB_%s.bam'%(out_dir,CB_itr), "wb", template=samfile)
           split_file.write(read)
   return
if __name__ == '__main__':
   for i in range(10):
       t = threading.Thread(target=run(unsplit_file,out_dir))
       t.start()

生物信息学个人理解: 基础应该是统计学+计算机学

上一篇 下一篇

猜你喜欢

热点阅读