重命名脚本

2021-07-13  本文已影响0人  花生学生信

# -*- coding: utf-8 -*-

import os

import re

import sys

# help

if len(sys.argv)<4:

              print('Usage: python rename.py [rename_txt] [rename_data_dir] [rename_log] [orig_data*] \n \

              rename_txt -- txt file that save company\' name and customer\'s name which divided by tap \n \

              rename_data_dir -- where you put renamed data \n \

              rename_log -- file to save the \`cat data cmd\` \n \

              orig_data -- you can insert more than one directory \n \

              e.g. \n python rename.py orig_data/rename.txt raw_data rename.log orig_data')

              sys.exit()

rename_txt = sys.argv[1]

rename_dir = sys.argv[2]

rename_log = sys.argv[3]

data_dic = sys.argv[4:]

# read sample list

oldsamplename_list = []

newsamplename_list = []

rename_dic = {}

with open(rename_txt, 'r') as f:

        while True:

            line = f.readline()

            if not line:

                break

            line = line.strip('\n')

            match = re.split(r'\s+',line)

            oldname,newname = match[0],match[1]

            oldsamplename_list.append(oldname)

            newsamplename_list.append(newname)

            rename_dic[oldname] = newname

# search all file

#

fileList = []

for filedir in data_dic:

for top, dirs, nondirs in os.walk(filedir):

    for item in nondirs:

    filepath,filename = os.path.split(item)

    if re.search('gz',filename):

        fileList.append(os.path.join(top, item))

# match

sample_dic = {}

SampleList = oldsamplename_list

for sample in SampleList:

samp_list = []

for fq in fileList:

fqpath,fqname = os.path.split(fq)

if re.search(sample,fqname):

samp_list.append(os.path.join(fqpath,fqname))

sample_dic[sample] = samp_list

# cat file

# out log

with open(rename_log, 'w') as f:

for key,value in sample_dic.items():

sample_name = rename_dic[key]

sample_R1 = []

sample_R2 = []

cmd1 = "cat "

cmd2 = "cat "

for file in value:

if re.search('R1',file) or re.search('1.fq.gz',file):

sample_R1.append(file)

if re.search('R2',file) or re.search('2.fq.gz',file):

sample_R2.append(file)

sample_R1.sort()

sample_R2.sort()

for i in sample_R1:

cmd1 += i + ' '

for i in sample_R2:

cmd2 += i + ' '

cmd1 += " > "+rename_dir+"/"+sample_name+".R1.fq.gz"

cmd2 += " > "+rename_dir+"/"+sample_name+".R2.fq.gz"

f.write( cmd1+"\n" )

f.write( cmd2+"\n" )

os.system(cmd1)

os.system(cmd2)

f.close()

上一篇下一篇

猜你喜欢

热点阅读