python 读写excel (合并单元格)
2020-12-18 本文已影响0人
树懒吃糖_
第一次使用python来处理excel,主要是为了记录下合并单元格,单元格样式
示例:从左边 变成右边,将第一列的 实现合并单元格
图片.png
工具:python3, xlrd ,xlwt
import xlrd
import sys
import xlwt
import os
#根据化疗位点,来提取突变检出
def Chemo25Sites(path):
chemosites = dict()
file = open(path)
lines = file.readlines()
rows = len(lines)
for i in range(1, rows):
aa = lines[i].strip().split('\t')
chr, pos, rsid, ref, wild_type = aa
chemosites[rsid] = wild_type+'\t'+chr+'\t'+pos+'\t'+ref
file.close()
return chemosites
def getChemoVariants(path, vcf):
#vcf : sample.vatriants_all.vcf
chemosites = Chemo25Sites(path)
outfile = open('./chemo_result_genotype.txt','w')
chemovariants = dict()
for key in chemosites.keys():
cmd = 'grep -w {key} {infile}'.format(key=key, infile=vcf)
res = os.popen(cmd).readlines()
if len(res) == 0: #该化疗位点没有检出
value = chemosites[key].split('\t')
genotype, chrom, pos, ref = value
#print('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype]))
chemovariants[key] = [key, chrom, pos, ref, "*", "*", "*", "*", genotype]
outfile.write('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype])+'\n')
elif len(res) == 1: #该化疗位点有检出
line = res[0].strip().split('\t')
chr, start, _, ref, alt, depth, ad, freq = line[0:8]
if float(freq) > 0.7:
if alt=="-":
genotype = "del/del"
else:
genotype = alt + alt #纯和突变
elif float(freq) < 0.2: #野生型
if len(ref)>1:
genotype = ref+'/'+ref
else:
genotype = ref+ref
else:
if alt=='-':
genotype = ref+'/del'
else:
genotype = ref+alt #杂合突变
#print('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype]))
chemovariants[key] = [key, chr, start, ref, alt, depth, ad, freq, genotype]
outfile.write('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype])+'\n')
elif len(res) > 1:
print("Warnning: {key} has more than one variant!!".format(key=key))
return chemovariants
## 注释的信息表
# 检索key: 药物+rsid+基因型
def chemoAnnotationInfos(annofile):
anno_dict = dict()
with open(annofile, encoding='utf-8') as file:
for line in file:
aa = line.strip().split('\t')
if len(aa) != 6:
print(line)
_, drug, rsid, genotype, _, anno_chinese = aa
key = drug+'\t'+rsid+'\t'+genotype
anno_dict[key] = anno_chinese
return anno_dict
def GetChemoAnnotation(annofile, path, vcf, excelfile):
anno_dict = chemoAnnotationInfos(annofile) #key: drug+'\t'+rsid+'\t'+genotype ; val :anno_chinese
chemovariants = getChemoVariants(path, vcf) #key: rsid ; val:[key, chr, start, ref, alt, depth, ad, freq, genotype]
#read excel.sheet
data = xlrd.open_workbook(excelfile)
table = data.sheet_by_name('sheet1')
table2 = data.sheet_by_name('sheet2')
table3 = data.sheet_by_name('sheet3')
row_numb = table.nrows
col_numb = table.ncols
row_numb2 = table2.nrows
col_numb2 = table2.ncols
row_numb3 = table3.nrows
col_numb3 = table3.ncols
new_excel = xlwt.Workbook()
sheet1 = new_excel.add_sheet('sheet1', cell_overwrite_ok=True)
sheet2 = new_excel.add_sheet('sheet2', cell_overwrite_ok=True)
sheet3 = new_excel.add_sheet('sheet3', cell_overwrite_ok=True)
pattern = xlwt.Pattern()
pattern.pattern = xlwt.Pattern.SOLID_PATTERN
pattern.pattern_fore_colour = 12
#单元格边框样式
borders = xlwt.Borders()
borders.left = 2
borders.right = 2
borders.top = 2
borders.bottom = 2
# 初始化样式1
style1 = xlwt.XFStyle()
style1.pattern = pattern
style1.borders = borders
# 初始化样式2
style2 = xlwt.XFStyle()
style2.borders = borders
for st in [sheet1, sheet2, sheet3]:
st.write(0, 0, "化疗药物", style1)
st.write(0, 1, "检测基因", style1)
st.write(0, 2, "检测位点", style1)
st.write(0, 3, "基因型", style1)
st.write(0, 4, "证据等级", style1)
st.write(0, 5, "注释", style1)
#或者 table 中的单元格值
for i in range(1, row_numb):
drug = table.cell(i, 0).value
gene = table.cell(i, 1).value
rsid = table.cell(i, 2).value
genotype = table.cell(i, 3).value
level = table.cell(i, 4).value
key = drug+'\t'+rsid+'\t'+genotype
cur_genotype = chemovariants.get(rsid, 'NA')
if cur_genotype == "NA":
cur_genotype = genotype
else:
cur_genotype = cur_genotype[-1]
annotation_chinese = anno_dict.get(key, 'NA')
sheet1.write(i, 0, drug, style2)
sheet1.write(i, 1, gene, style2)
sheet1.write(i, 2, rsid, style2)
sheet1.write(i, 3, cur_genotype, style2)
sheet1.write(i, 4, level, style2)
sheet1.write(i, 5, annotation_chinese, style2)
sheet1.write_merge(1,12,0,0, table.cell(1,0).value, style2)
sheet1.write_merge(14,16,0,0, table.cell(14,0).value, style2)
sheet1.write_merge(18,24,0,0, table.cell(18,0).value, style2)
sheet1.write_merge(25,26,0,0, table.cell(25,0).value, style2)
sheet1.write_merge(28,29,0,0, table.cell(28,0).value, style2)
sheet1.write_merge(30,31,0,0, table.cell(30,0).value, style2)
sheet1.write_merge(32,33,0,0, table.cell(32,0).value, style2)
for i in range(1, row_numb2):
drug = table2.cell(i, 0).value
gene = table2.cell(i, 1).value
rsid = table2.cell(i, 2).value
genotype = table2.cell(i, 3).value
level = table2.cell(i, 4).value
key = drug+'\t'+rsid+'\t'+genotype
cur_genotype = chemovariants.get(rsid, 'NA')
if cur_genotype == "NA":
cur_genotype = genotype
else:
cur_genotype = cur_genotype[-1]
annotation_chinese = anno_dict.get(key, 'NA')
sheet2.write(i, 0, drug, style2)
sheet2.write(i, 1, gene, style2)
sheet2.write(i, 2, rsid, style2)
sheet2.write(i, 3, cur_genotype, style2)
sheet2.write(i, 4, level, style2)
sheet2.write(i, 5, annotation_chinese, style2)
sheet2.write_merge(1,7,0,0,table.cell(1,0).value, style2)
sheet2.write_merge(8,11,0,0,table.cell(8,0).value, style2)
sheet2.write_merge(12,13,0,0,table.cell(12,0).value, style2)
sheet2.write_merge(14,17,0,0,table.cell(14,0).value, style2)
sheet2.write_merge(18,20,0,0,table.cell(18,0).value, style2)
sheet2.write_merge(22,24,0,0,table.cell(22,0).value, style2)
sheet2.write_merge(25,30,0,0,table.cell(25,0).value, style2)
sheet2.write_merge(31,33,0,0,table.cell(31,0).value, style2)
for i in range(1, row_numb3):
drug = table3.cell(i, 0).value
gene = table3.cell(i, 1).value
rsid = table3.cell(i, 2).value
genotype = table3.cell(i, 3).value
level = table3.cell(i, 4).value
key = drug+'\t'+rsid+'\t'+genotype
cur_genotype = chemovariants.get(rsid, 'NA')
if cur_genotype == "NA":
cur_genotype = genotype
else:
cur_genotype = cur_genotype[-1]
annotation_chinese = anno_dict.get(key, 'NA')
sheet3.write(i, 0, drug, style2)
sheet3.write(i, 1, gene, style2)
sheet3.write(i, 2, rsid, style2)
sheet3.write(i, 3, cur_genotype, style2)
sheet3.write(i, 4, level, style2)
sheet3.write(i, 5, annotation_chinese, style2)
sheet3.write_merge(4,6,0,0,table.cell(4,0).value, style2)
sheet3.write_merge(9,14,0,0,table.cell(9,0).value, style2)
sheet3.write_merge(16,18,0,0,table.cell(16,0).value, style2)
new_excel.save(r'./chemo_result.xls')
annofile = '~/chemo/chemo_annotation_info.txt'
path = '~/chemo/chemo_25sites_info.txt'
#vcf = './BP20201204-2.vatriants_all.txt'
excelfile = '~/chemo/tample.xls'
vcf = sys.argv[1]
GetChemoAnnotation(annofile, path, vcf, excelfile)