python30

2022-04-28  本文已影响0人  rong酱
# -*- coding: utf-8 -*-

import os
import sys

in1=sys.argv[1]
ou1=sys.argv[2]

insertdic={}
with open(in1,'r') as i:
    li=i.readlines()
    for lin in li:
        linc=lin.strip().split("\t")
        chrID=linc[0]
        posID=linc[1]
        keys=str(chrID)+"_"+str(posID)
        insertdic[keys]=linc[2:]
#print(str(insertdic))

outc=open(ou1,'w')
with open('pos_genev1.txt','r') as r:
    ri=r.readlines()
    for rin in ri:
        rinc=rin.strip().split("\t")
        rincID=rinc[0]
        rincS=rinc[1]
        rincE=rinc[2]
        for keyi in insertdic.keys():
            keyc=str(keyi).strip().split("_")
            chrkey=keyc[0]
            poskey=keyc[1]
            if str(chrkey) == str(rincID):
                if int(poskey) >= int(rincS) and int(poskey) <= int(rincE):
                    print(str(keyc))
                    print(str(rinc))
                    genename=rinc[3]
                    outc.write(str(chrkey)+"\t"+str(poskey)+"\t")
                    for addi in insertdic[keyi]:
                        outc.write(str(addi)+"\t")
                    outc.write(str(genename)+"\n")
outc.close()

#os.system('cat pos_genev1_name.txt | sort -u >%s'%(ou1))
#os.system('rm pos_genev1_name.txt')
# -*- coding: utf-8 -*-

import os
import sys
import re

in1=sys.argv[1]
#ou1=sys.argv[2]

ouc=open("pos_gene.txt",'w')

dic={}
with open(in1,'r') as i:
    li=i.readlines()
    for lin in li:
        if not re.match(r'^#',lin):
            linc=lin.strip().split("\t")
            chrID=str(linc[0])
            chrs=int(linc[3]) # start point
            chre=int(linc[4]) # end point
            typeID=str(linc[2])
            if str(typeID) == "gene":
                annoc=str(linc[-1])
                annoi=annoc.strip().split(";")
                for annoil in annoi:
                    if "gene_name" in str(annoil):
                        str_pat=re.compile(r'"(.*)"')
                        str_genename=str(annoil)
                        gene_name=str_pat.findall(str_genename)[0] # get gene name of the point
                        ouc.write(str(chrID)+"\t"+str(chrs)+"\t"+str(chre)+"\t"+str(gene_name)+"\n")
#           if chrID in dic.keys():
#               dic[chrID].append([chrs,chre,gene_name])
#           else:
#               dic[chrID]=[]
#               dic[chrID].append([chrs,chre,gene_name])    
上一篇下一篇

猜你喜欢

热点阅读