2022-09-13 ROSALIND_10: Consensu

2022-09-21  本文已影响0人  小孟在充电
# ----ROSLIND_10: ---- 
# Consensus and Profile

with open("10_Consensus and Profile.txt") as f:
    DNA_strings = f.readlines()
    DNA_strings = [x.strip("\n") for x in DNA_strings]
def myConPro(sample_File): 
    newDNA_strings = []
    DNA_matrix = []
    DNA_profile = []
    DNA_profile_t = []
    
    
   
    # 获取ROSname
    ROSname = [x for x in DNA_strings if "Rosalind" in x]
    ROSindex = [DNA_strings.index(x) for x in ROSname]     

    
    for i in range(0, len(DNA_strings), int(ROSindex[1]) - int(ROSindex[0])):
        
        newDNA_strings.append(DNA_strings[i: i + (int(ROSindex[1]) - int(ROSindex[0]))])        
        
            
    DNA_matrix = ["".join( x[1:] ) for x in newDNA_strings]
    # DNA_matrix = ["".join(x) for x in DNA_matrix] # 合并子集中的字符串,并不是所有字符串都是单行
    
     #转置
    for i in range(len(DNA_matrix[0])):
        for j in range(len(DNA_matrix)):
        
            DNA_profile.append(DNA_matrix[j][i])
            
    DNA_profile_t = [DNA_profile[i: i+len(DNA_matrix[0])] for i in range(0, len(DNA_profile), len(DNA_matrix[0]))]
        
    count_a = []
    count_t = []
    count_c = []
    count_g = []
    
    for i in range(len(DNA_profile_t)):
        count_a.append(DNA_profile_t[i].count("A"))
        count_t.append(DNA_profile_t[i].count("T"))
        count_c.append(DNA_profile_t[i].count("C"))
        count_g.append(DNA_profile_t[i].count("G"))
    # 返回ATCG在每一列的count
    res = [count_a, count_t, count_c, count_g]
    # 返回ATCG在每一列的count
    
    base = "ATCG"
    
    rowcount = {base[i]: res[i] for i in range(len(base))}
    # 返回ATCG所在列的count数
    for k, v in rowcount.items():
        print(k, v)
    
    
    #遍历字典根据每一列最大值返回键值
    Consensus = []
    for i in range(len(list(rowcount.values())[0])):
        Consensus.append({"A": list(rowcount.values())[0][i],
                          "T": list(rowcount.values())[1][i],
                          "C": list(rowcount.values())[2][i],
                          "G": list(rowcount.values())[3][i]})
    # 最大值返回键值   
    result = []
    for i in range(len(Consensus)):
        result.append( max(Consensus[i], key = Consensus[i].get))

    return "".join(result)
image.png
上一篇 下一篇

猜你喜欢

热点阅读