处理GFF文件并重命名基因

2017-07-13  本文已影响0人  南山欧巴
import os
import re
import sys

try:
    infile = sys.argv[1]
    outfile = sys.argv[2]
except:
    print "Usage: python filter_blast.py infile outfile"

gff=file(infile)
newgff=file(outfile,"w")
j=0

hang=10000
genechr=''
num=1
for line in gff:
    j+=1
    line=line.strip().split("\t")
    if re.search('Chr', line[0]) and re.search('gene', line[2]):
        gene=line[8].split("=")[-1]
        #print gene
        chr=re.sub('Chr','Gm',line[0])
        chrn=re.sub('Chr','',line[0])
        newgene=chr+'g'+str(hang)
        hang+=1
        newgff.write(chr+"\t"+line[3]+
        "\t"+line[4]+"\t"+line[6]+"\t"+gene+"\t"+newgene+"\n")
print j,"records read"

gff.close()
newgff.close()
上一篇下一篇

猜你喜欢

热点阅读