DNase-seq

samtools depth 处理bed文件

2019-05-13  本文已影响0人  线断木偶人

samtools depth 得到文件

awk '{if($3 >60 ) print }' depth.txt > 50.txt
head -20 50.txt
image.png

写个脚本处理下

import re
import sys
import linecache

file=sys.argv[1]

with open(file,'r') as f:
    #lines = f.readlines()
    #last_line = lines[-1]
    for line in f:
        line = line.strip()
        line=line.split('\t')

        #if chr is None and POST is None:
        # 判断chr 是否存在
        if 'chr' not in dir():
            chr = line[0]
            POST = line[1]

        if line[0] == chr:
            if line[1] == POST:
                first = line[1]
                print (line)

            elif int(line[1]) == int(POST) + 1:
                chr = line[0]
                POST = line[1]
                end = line
                #continue
            #elif
            else :
                print ('%s\n%s' %(end,line))
                #print (end)
                #print (line)
                POST = line[1]
        else:
            print (end)
            chr = line[0]
            POST = line[1]
            print (line)

f = open(file,'r')
linecount=len(f.readlines())
#print (linecount)
print (linecache.getline(file,linecount).strip().split('\t'))
f.close()

执行下脚本,生成out文件

python deal.py 50.txt > out

处理下out文件,把改删除的删掉,最后执行

cat out | paste - - | awk '{print $1,$2,$5}' OFS="\t" >final.bed

或者这样写

import re
import sys
from itertools import groupby

file=sys.argv[1]
dict={}

def addtodict2(thedict, key_a, key_b, val):
    if key_a in thedict:
        thedict[key_a].update({key_b: val})
    else:
        thedict.update({key_a:{key_b: val}})


with open(file,'r') as f:
    for line in f:
        line = line.strip()
        line=line.split('\t')

        if 'chr' not in dir():
            chr = line[0]
        #list.append(int(line[1]))
        addtodict2(dict, line[0], line[1], 1)

#print (dict.items())
for key1 in dict:
    list = key1
    list = []
    for key2 in dict[key1]:
        list.append(int(key2))

    list.sort()
    fun = lambda x: x[1] - x[0]
    for k, g in groupby(enumerate(list), fun):
        #print ([v for i, v in g])
        zzz = [v for i, v in g]
        if len(zzz) > 3:
            print ('%s\t%s\t%s' %(key1,zzz[0],zzz[-1]))
上一篇 下一篇

猜你喜欢

热点阅读