my RNA-seq工作生活

提取FPKM值

2019-06-30  本文已影响62人  e3617f9991e1

慢慢练。记录笔记。这个得到结果很慢,写的不简练
从stringtie生成的gtf文件中提取fpkm值。并且将相同基因的转不同转录本的fpkm值相加。(python)

# -*- coding:utf-8 -*-
# usage: python *.py sample.gtf

import sys
import os
import time
sample = sys.argv[1]
#work_path = os.getcwd()
#print(work_path)

os.system("grep 'FPKM' sample | awk '{print $10,$(NF-2)}' > fpkm.txt ")
os.system("sed -i 's/;//g' fpkm.txt ")
os.system("sed -i 's/\"//g' fpkm.txt ")

time.sleep(0.1)

file1 = open("fpkm.txt","r")
file2 = open("result.txt","w")
list = []

for line in file1:
    dict1 = {}
    name = line.split()[0]
    fpkm = line.split()[1]
    dict1[name] = fpkm
    list.append(dict1)
#print(list)
dict2 = {}

def merge_dict(x,y):
    for k,v in x.items():
        if k in y.keys():
            try:
                y[k] += float(v)
            except ValueError:
                pass
        else:
            try:
                y[k] = float(v)
            except ValueError:
                pass
        
for i in list:
    merge_dict(i,dict2)
#print(type(dict2))

for k in dict2:
    values = round(dict2[k],6)
    file2.write("%s\t%.6f\n" %(k,values))

print("finished")
file1.close()
file2.close()
上一篇下一篇

猜你喜欢

热点阅读