Python18

2021-08-19  本文已影响0人  rong酱
# -*- coding: utf-8 -*-
#!/usr/bin/env python

import os
import sys
import argparse

parser = argparse.ArgumentParser(description="trim data , change geneid to genename")
parser.add_argument('--infile',help=" inputfile ",required=True)
parser.add_argument('--outfile',help="output file",required=True)
parser.add_argument('--difffile',help="diff gene up down file",required=True)
argv = vars(parser.parse_args())
infile = argv['infile'] 
outfile =argv['outfile']
diffgene =argv['difffile']

idname = {}
with open('genename',"r") as nameliness:
    namelines = nameliness.readlines()
    for nameline in namelines:
        namelin = nameline.strip().split("\t")
        idname[namelin[0]]=namelin[1]

difflist = []
with open(diffgene,'r') as diffliness:
    difflines = diffliness.readlines()
    for diffline in difflines:
        difflin = diffline.strip().split("\t")
        geneid = difflin[0]
        difflist.append(geneid)

outcon = open(outfile,"w")
with open(infile,"r") as inputliness:
    inputlines = inputliness.readlines()
    header = inputlines[0].strip().split("\t")
    outcon.write(header[0]+"\tgenename\t"+header[2]+"\t"+header[3]+"\tlog2FoldChange\t"+header[5]+"\tpadj\tUp/Down-Regulation\n")
    for inputline in inputlines[1:]:
        inputlin = inputline.strip().split("\t")
        geneid = inputlin[0]
        genename = idname[geneid]
        controlvalue = inputlin[2]
        treatvalue = inputlin[3]
        Pvalue = inputlin[5]
        padj = inputlin[6]  
        log2FCvalue = inputlin[4]
        if geneid in difflist:
            if float(log2FCvalue)>0:
                outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"Up\n")
            elif float(log2FCvalue)<0:
                outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"Down\n")
        else:
            outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"*\n")
上一篇 下一篇

猜你喜欢

热点阅读