python27

2022-03-12  本文已影响0人  rong酱
需要优化
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys

in1=sys.argv[1]
ou1=sys.argv[2]

ouc=open(ou1,'w')

with open(in1,'r') as i:
    li=i.readlines()
    seq=''
    for lin in li:
        linc=lin.strip().split('\n')
        if str(linc[0][0]) == ">":
            ouc.write(str(seq)+"\n")
            seq=''
            ouc.write(str(linc[0])+"\n")
        else:
            seq=seq+str(linc[0])
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys

in1=sys.argv[1] # 参考基因
ou1=sys.argv[2] # 输出文件

from Bio import SeqIO
from Bio.Seq import Seq

dicti={}
with open(in1,'r') as i:
    li=i.readlines()
    for lin in li[1:]:
        linc=lin.strip().split('\t')
        fil=str(linc[0])
        posnum=str(linc[1])
        if fil not in dicti.keys():
            dicti[fil]=[]
            dicti[fil].append(posnum)
        elif fil in dicti.keys():
            dicti[fil].append(posnum)
print(str(dicti))

outc=open(ou1,'w')
for keys,values in dicti.items():
    filename=str(keys)
    for h in SeqIO.parse(filename,"fasta"):
        idc=h.id
        iseq=str(h.seq)
        for posi in values:
            if "hpv18" in filename:
                if str(posi)=="7857":
                    print(str(posi))
                    print("1")
                    lpos1=int(7857-1-150)
                    rpos1=int(7857)
                    lpos2=int(0)
                    rpos2=int(150)
                    trimfa_5p=str(iseq[lpos1:rpos1])
                    print(str(trimfa_5p))
                    trimfa_3p=str(iseq[0:150])
                    print(str(trimfa_3p))
                    outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n>"+str(filename)+"_"+str(int(posi))+"_3p\n"+str(trimfa_3p)+"\n")
                else:
                    print(str(posi))
                                        print("2")
                    lpos=int(float(int(posi))-1-150)
                    rpos=int(float(int(posi))-1+150)
                    trimfa_5p=str(iseq[lpos:int(posi)])
                    trimfa_3p=str(iseq[int(posi):rpos])
                    outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n"+">"+str(filename)+"_"+str(int(posi))+"_3p\n"+str(trimfa_3p)+"\n")
            else:
                print(str(posi))
                print("3")
                lpos=int(float(int(posi))-1-150)
                rpos=int(float(int(posi))-1+150)
                trimfa_5p=str(iseq[lpos:int(posi)])
                trimfa_3p=str(iseq[int(posi):rpos])
                outc.write(">"+str(filename)+"_"+str(int(posi))+"_5p\n"+str(trimfa_5p)+"\n"+">"+str(filename)+"_"+str(int(posi))+"—3p\n"+str(trimfa_3p)+"\n")
上一篇下一篇

猜你喜欢

热点阅读