samtools depth 处理bed文件
2019-05-13 本文已影响0人
线断木偶人
samtools depth 得到文件
awk '{if($3 >60 ) print }' depth.txt > 50.txt
head -20 50.txt

写个脚本处理下
import re
import sys
import linecache
file=sys.argv[1]
with open(file,'r') as f:
#lines = f.readlines()
#last_line = lines[-1]
for line in f:
line = line.strip()
line=line.split('\t')
#if chr is None and POST is None:
# 判断chr 是否存在
if 'chr' not in dir():
chr = line[0]
POST = line[1]
if line[0] == chr:
if line[1] == POST:
first = line[1]
print (line)
elif int(line[1]) == int(POST) + 1:
chr = line[0]
POST = line[1]
end = line
#continue
#elif
else :
print ('%s\n%s' %(end,line))
#print (end)
#print (line)
POST = line[1]
else:
print (end)
chr = line[0]
POST = line[1]
print (line)
f = open(file,'r')
linecount=len(f.readlines())
#print (linecount)
print (linecache.getline(file,linecount).strip().split('\t'))
f.close()
执行下脚本,生成out文件
python deal.py 50.txt > out
处理下out文件,把改删除的删掉,最后执行
cat out | paste - - | awk '{print $1,$2,$5}' OFS="\t" >final.bed
或者这样写
import re
import sys
from itertools import groupby
file=sys.argv[1]
dict={}
def addtodict2(thedict, key_a, key_b, val):
if key_a in thedict:
thedict[key_a].update({key_b: val})
else:
thedict.update({key_a:{key_b: val}})
with open(file,'r') as f:
for line in f:
line = line.strip()
line=line.split('\t')
if 'chr' not in dir():
chr = line[0]
#list.append(int(line[1]))
addtodict2(dict, line[0], line[1], 1)
#print (dict.items())
for key1 in dict:
list = key1
list = []
for key2 in dict[key1]:
list.append(int(key2))
list.sort()
fun = lambda x: x[1] - x[0]
for k, g in groupby(enumerate(list), fun):
#print ([v for i, v in g])
zzz = [v for i, v in g]
if len(zzz) > 3:
print ('%s\t%s\t%s' %(key1,zzz[0],zzz[-1]))