计算bed文件overlap
2021-03-21 本文已影响0人
rong酱
# -*- coding: utf-8 -*-
#!/usr/bin/env python
abeddict = {}
with open("a.bed","r") as aliness:
alines = aliness.readlines()
for aline in alines:
splitaline = str(aline).strip().split("\t")
chranum = splitaline[0]
astartpos = splitaline[1]
endbpos = splitaline[2]
if chranum in abeddict:
abeddict[chranum].append([astartpos,endbpos])
elif chranum not in abeddict:
abeddict[chranum] = []
abeddict[chranum].append([astartpos,endbpos])
chrkeyslist = abeddict.keys()
print("abeddict: "+str(abeddict))
print("chrkeyslist: "+str(chrkeyslist))
bbeddict = {}
with open("b.bed","r") as bliness:
blines = bliness.readlines()
for bline in blines:
splitbline = str(bline).strip().split("\t")
chrbnum = splitbline[0]
bstartpos = splitbline[1]
endbpos = splitbline[2]
if chrbnum in bbeddict:
bbeddict[chrbnum].append([bstartpos,endbpos])
elif chrbnum not in bbeddict:
bbeddict[chrbnum] = []
bbeddict[chrbnum].append([bstartpos,endbpos])
print("bbeddict: "+str(bbeddict))
overlapcon = open("overlap.txt","w")
overlapvalue = 0
for chrkeyslisti in chrkeyslist:
achrvalues = abeddict[chrkeyslisti]
bchrvalues = bbeddict[chrkeyslisti]
for achrvaluesi in achrvalues:
startachrvaluesi = achrvaluesi[0]
endachrvaluesi = achrvaluesi[1]
for bchrvaluesi in bchrvalues:
startbchrvaluesi = bchrvaluesi[0]
endbchrvaluesi = bchrvaluesi[1]
if int(startachrvaluesi) < int(startbchrvaluesi) and int(endachrvaluesi) < int(endbchrvaluesi):
overlapvalue = int(endachrvaluesi) - int(startbchrvaluesi)
apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
elif int(startachrvaluesi) > int(startbchrvaluesi) and int(endachrvaluesi) > int(endbchrvaluesi):
overlapvalue = int(endbchrvaluesi) - int(startachrvaluesi)
apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
elif int(startachrvaluesi) < int(startbchrvaluesi) and int(endachrvaluesi) > int(endbchrvaluesi):
overlapvalue = int(endbchrvaluesi) - int(startbchrvaluesi)
apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
elif int(startachrvaluesi) > int(startbchrvaluesi) and int(endachrvaluesi) < int(endbchrvaluesi):
overlapvalue = int(endachrvaluesi) - int(startachrvaluesi)
apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")