计算bed文件overlap

2021-03-21  本文已影响0人  rong酱
# -*- coding: utf-8 -*-
#!/usr/bin/env python

abeddict = {}
with open("a.bed","r") as aliness:
    alines = aliness.readlines()
    for aline in alines:
        splitaline = str(aline).strip().split("\t")
        chranum = splitaline[0]
        astartpos = splitaline[1]
        endbpos = splitaline[2]
        if chranum in abeddict:
            abeddict[chranum].append([astartpos,endbpos])
        elif chranum not in abeddict:
            abeddict[chranum] = []
            abeddict[chranum].append([astartpos,endbpos])
chrkeyslist = abeddict.keys()
print("abeddict: "+str(abeddict))
print("chrkeyslist: "+str(chrkeyslist)) 
bbeddict = {}
with open("b.bed","r") as bliness:
    blines = bliness.readlines()
    for bline in blines:
        splitbline = str(bline).strip().split("\t")
        chrbnum = splitbline[0]
        bstartpos = splitbline[1]
        endbpos = splitbline[2]
        if chrbnum in bbeddict:
            bbeddict[chrbnum].append([bstartpos,endbpos])
        elif chrbnum not in bbeddict:
            bbeddict[chrbnum] = []
            bbeddict[chrbnum].append([bstartpos,endbpos])
print("bbeddict: "+str(bbeddict))  
overlapcon = open("overlap.txt","w") 
overlapvalue = 0
for chrkeyslisti in chrkeyslist:
    achrvalues = abeddict[chrkeyslisti]
    bchrvalues = bbeddict[chrkeyslisti]
    for achrvaluesi in achrvalues:
        startachrvaluesi = achrvaluesi[0]
        endachrvaluesi = achrvaluesi[1]
        for bchrvaluesi in bchrvalues:
            startbchrvaluesi = bchrvaluesi[0]
            endbchrvaluesi = bchrvaluesi[1]
            if int(startachrvaluesi) < int(startbchrvaluesi) and int(endachrvaluesi) < int(endbchrvaluesi):
                overlapvalue = int(endachrvaluesi) - int(startbchrvaluesi)
                apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
                bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
                overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
            elif int(startachrvaluesi) > int(startbchrvaluesi) and int(endachrvaluesi) > int(endbchrvaluesi):
                overlapvalue = int(endbchrvaluesi) - int(startachrvaluesi)
                apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
                bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
                overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
            elif int(startachrvaluesi) < int(startbchrvaluesi) and int(endachrvaluesi) > int(endbchrvaluesi):
                overlapvalue = int(endbchrvaluesi) - int(startbchrvaluesi)
                apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
                bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
                overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")
            elif int(startachrvaluesi) > int(startbchrvaluesi) and int(endachrvaluesi) < int(endbchrvaluesi):
                overlapvalue = int(endachrvaluesi) - int(startachrvaluesi)
                apercent = float(overlapvalue/(int(endachrvaluesi) - int(startachrvaluesi)))
                bpercent = float(overlapvalue/(int(endbchrvaluesi) - int(startbchrvaluesi)))
                overlapcon.write(str(chrkeyslisti)+"\t"+str(startachrvaluesi)+"\t"+str(endachrvaluesi)+"\t"+str(startbchrvaluesi)+"\t"+str(endbchrvaluesi)+str(overlapvalue)+"\t"+str(apercent)+"\t"+str(bpercent)+"\n")                           
上一篇下一篇

猜你喜欢

热点阅读