根据id提取fasta序列

2022-07-27  本文已影响0人  纵纵纵小鸮

def get_trans(intrans, outtrans):

    with open(intrans, "r") as myfile:

        chr19_name = []

        database = {}

        f = myfile.readlines()

        for line in f:

            if line.startswith('>'):

                lin = line.strip().split(" ")

                chr = lin[0].split("-")[1]

                keys = line.lstrip('>').strip()

                database[keys] = []

                if chr == "chr19":

                    chr19_name.append(keys)

            else:

                database[keys].append(line.strip())

        print(len(chr19_name))

    with open(outtrans, "w") as outfile:

        for key in database.keys():

            if key in chr19_name:

                keyname = ">" + key + "\n"

                fa = "\n".join(database[key]) +"\n"

                outfile.write(keyname)

                outfile.write(fa)

根据染色体提取gff文件:

def get_gff(ingff, outgff):

    with open(ingff, "r") as mygff:

        with open(outgff, "w") as myout:

            gff_li = []

            f = mygff.readlines()

            for line in f:

                lin = line.strip().split("\t")

                name = lin[0]

                if name == "chr19":

                    gff_li.append(line)

                    myout.write(line)

        myout.close()

        print(len(gff_li))

上一篇下一篇

猜你喜欢

热点阅读