生物信息小教程收藏生信小白

Python 学习之使用十一种方法计算 DNA 序列中特定碱基数

2019-08-01  本文已影响12人  热衷组培的二货潜
# 计算碱基数目

list("ATGC")
['A', 'T', 'G', 'C']
for c in 'ATCG':
    print(c)
A
T
C
G

第一种 列表迭代

# 定义函数
def count_v1(dna, base):
    dna = list(dna) # 将碱基字母字符串转换成列表
    i = 0 # 计数
    for c in dna:
        if c == base:
            i += 1
    return i

count_v1("ATGCCCATG", "C")
3

第二种 字符串迭代

# 可以将 count_v1 修改
def count_v2(dna, base):
    i = 0
    for c in dna:
        if c == base:
            i += 1
    return i

dna = 'ATGCGGGAGGACCC'
base = 'C'
n = count_v2(dna, base)
print('%s appears %d time in %s' % (base, n, dna))
C appears 4 time in ATGCGGGAGGACCC
print('{base} appears {n} times in {dna}'.format 
      (base = base, n = n, dna = dna))
C appears 4 times in ATGCGGGAGGACCC
def count_v2_demo(dna, base):
    print('dna: ', dna)
    print('base: ', base)
    i = 0
    for c in dna:
        print('c: ', c)
        if c == base:
            print('True if test')
            i += 1
            print(i,'\n')
    return i

n = count_v2_demo('ATGCGGACCTAT', 'C')
print('\n{base} appears {n} times in {dna}'.format 
      (base = base, n = n, dna = dna))
dna:  ATGCGGACCTAT
base:  C
c:  A
c:  T
c:  G
c:  C
True if test
1 

c:  G
c:  G
c:  A
c:  C
True if test
2 

c:  C
True if test
3 

c:  T
c:  A
c:  T

C appears 3 times in ATGCGGGAGGACCC

第三种 索引迭代

def count_v3(dna ,base):
    i = 0
    for j in range(len(dna)):
        if dna[j] == base:
            i += 1
    return i
count_v3('ATGCGGACCTAT', 'C')
3

第四种 While 循环

# While 循环
def count_v4(dna, base):
    i = 0
    j = 0
    while j < len(dna):
        if dna[j] == base:
            i += 1
        j += 1
    return i

count_v4('ATGCGGACCTAT', 'C')
3

第五种 创建一个布尔列表,然后计数

# 创建一个布尔列表,然后计数
def count_v5(dna, base):
    m = []
    for c in dna:
        if c == base:
            m.append(True)
        else:
            m.append(False)
    return sum(m)
count_v5('ATGCGGACCTAT', 'C')        
3

第六种

# 内置 if
def count_v6(dna, base):
    m = []
    for c in dna:
        m.append(True if c == base else False)
    return sum(m)

count_v6('ATGCGGACCTAT', 'C') 
3

第七种

# 直接统计布尔值
def count_v7(dna, base):
    m = [] # 如果 dna[i] == base, m[i] = True
    for c in dna:
        m.append(c == base)
    return sum(m)

count_v7('ATGCGGACCTAT', 'C') 
3

第八种


def count_v8(dna, base):
    m = [c == base for c in dna]
    return sum(m)
count_v8('ATGCGGACCTAT', 'C') 
3

第九种

# 基于第八种进一步简化
def count_v9(dna, base):
    return sum([c == base for c in dna])

count_v9('ATGCGGACCTAT', 'C') 
3

第十种

def count_v10(dna, base):
    return sum(c == base for c in dna)
count_v10('ATGCGGACCTAT', 'C') 
3

第十一种

def count_v11(dna, base):
    return len([i for i in range(len(dna)) if dna[i] == base])
count_v11('ATGCGGACCTAT', 'C') 
3
上一篇 下一篇

猜你喜欢

热点阅读