根据id提取fasta序列
def get_trans(intrans, outtrans):
with open(intrans, "r") as myfile:
chr19_name = []
database = {}
f = myfile.readlines()
for line in f:
if line.startswith('>'):
lin = line.strip().split(" ")
chr = lin[0].split("-")[1]
keys = line.lstrip('>').strip()
database[keys] = []
if chr == "chr19":
chr19_name.append(keys)
else:
database[keys].append(line.strip())
print(len(chr19_name))
with open(outtrans, "w") as outfile:
for key in database.keys():
if key in chr19_name:
keyname = ">" + key + "\n"
fa = "\n".join(database[key]) +"\n"
outfile.write(keyname)
outfile.write(fa)
根据染色体提取gff文件:
def get_gff(ingff, outgff):
with open(ingff, "r") as mygff:
with open(outgff, "w") as myout:
gff_li = []
f = mygff.readlines()
for line in f:
lin = line.strip().split("\t")
name = lin[0]
if name == "chr19":
gff_li.append(line)
myout.write(line)
myout.close()
print(len(gff_li))