Checking the data integrity on t
2020-12-21 本文已影响0人
生信学习者2
日常需要传递几G或者十几G的fastq数据,在cp或者rysnc完后,是需要对复制的数据进行md5码检验的。本文先获取源文件md5码再获取目标文件md5码,最后检验两者是否一致,从而判断复制是否完整。更多知识分享请到 https://zouhua.top/。
获取md5码
md5sum RawData/filename > RawData_md5sum.tsv
md5sum Rename/filename > Rename_md5sum.tsv
主程序
#!/usr/bin/python
import sys
import re
import os
import argparse as ap
def parse_argument(args):
parser = ap.ArgumentParser(description='check')
parser.add_argument('-f1', '--file1', metavar='<file1>', type=str)
parser.add_argument('-f2', '--file2', metavar='<file2>', type=str)
parser.add_argument('-o', '--out', metavar='<out>', type=str)
return parser.parse_args()
def main():
args = parse_argument(sys.argv)
dict_f1 = {}
with open(args.file1, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip().split()
dict_f1[line[0]] = line[1]
out_f = open(args.out, 'w')
with open(args.file2, 'r') as f2:
lines2 = f2.readlines()
for line2 in lines2:
line2 = line2.strip().split()
if line2[0] in dict_f1.keys():
res = "\t".join([line2[1], dict_f1[line2[0]], str(line2[0]), "Correct"])
out_f.write(res + "\n")
else:
res = "\t".join([line2[1], line[0], "Wrong"])
out_f.write(res + "\n")
out_f.close()
main()
运行
python check_md5.py -f1 RawData_md5sum.tsv -f2 Rename_md5sum.tsv -o Checkout_md5.tsv