python 读写excel (合并单元格)

2020-12-18  本文已影响0人  树懒吃糖_

第一次使用python来处理excel,主要是为了记录下合并单元格,单元格样式
示例:从左边 变成右边,将第一列的 实现合并单元格


图片.png

工具:python3, xlrd ,xlwt

import xlrd
import sys
import xlwt
import os

#根据化疗位点,来提取突变检出
def Chemo25Sites(path):
    chemosites = dict()
    file = open(path)
    lines = file.readlines()
    rows = len(lines)
    for i in range(1, rows):
        aa = lines[i].strip().split('\t')
        chr, pos, rsid, ref, wild_type = aa
        chemosites[rsid] = wild_type+'\t'+chr+'\t'+pos+'\t'+ref
    file.close()
    return chemosites


def getChemoVariants(path, vcf):
    #vcf : sample.vatriants_all.vcf
    chemosites = Chemo25Sites(path)
    outfile = open('./chemo_result_genotype.txt','w')
    chemovariants = dict()
    for key in chemosites.keys():
        cmd = 'grep -w {key} {infile}'.format(key=key, infile=vcf)
        res = os.popen(cmd).readlines()

        if len(res) == 0:   #该化疗位点没有检出
            value = chemosites[key].split('\t')
            genotype, chrom, pos, ref = value
            #print('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype]))
            chemovariants[key] = [key, chrom, pos, ref, "*", "*", "*", "*", genotype]
            outfile.write('\t'.join([key, chrom, pos, ref, "*", "*", "*", "*", genotype])+'\n')
        elif len(res) == 1:  #该化疗位点有检出
            line = res[0].strip().split('\t')
            chr, start, _, ref, alt, depth, ad, freq = line[0:8]
            if float(freq) > 0.7:
                if alt=="-":
                    genotype = "del/del"
                else:
                    genotype = alt + alt  #纯和突变
            elif float(freq) < 0.2:   #野生型
                if len(ref)>1:
                    genotype = ref+'/'+ref
                else:
                    genotype = ref+ref
            else:
                if alt=='-':
                    genotype = ref+'/del'
                else:
                    genotype = ref+alt    #杂合突变
            #print('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype]))
            chemovariants[key] = [key, chr, start, ref, alt, depth, ad, freq, genotype]
            outfile.write('\t'.join([key, chr, start, ref, alt, depth, ad, freq, genotype])+'\n')
        elif len(res) > 1:
            print("Warnning: {key} has more than one variant!!".format(key=key))
    return chemovariants

## 注释的信息表
# 检索key: 药物+rsid+基因型
def chemoAnnotationInfos(annofile):
    anno_dict = dict()
    with open(annofile, encoding='utf-8') as file:
        for line in file:
            aa = line.strip().split('\t')
            if len(aa) != 6:
                print(line)
            _, drug, rsid, genotype, _, anno_chinese = aa
            key = drug+'\t'+rsid+'\t'+genotype
            anno_dict[key] = anno_chinese
    return anno_dict

def GetChemoAnnotation(annofile, path, vcf, excelfile):
    anno_dict = chemoAnnotationInfos(annofile)  #key:  drug+'\t'+rsid+'\t'+genotype ; val :anno_chinese
    chemovariants = getChemoVariants(path, vcf)  #key: rsid ;   val:[key, chr, start, ref, alt, depth, ad, freq, genotype]

    #read excel.sheet
    data = xlrd.open_workbook(excelfile)
    table = data.sheet_by_name('sheet1')
    table2 = data.sheet_by_name('sheet2')
    table3 = data.sheet_by_name('sheet3')
    row_numb = table.nrows
    col_numb = table.ncols

    row_numb2 = table2.nrows
    col_numb2 = table2.ncols

    row_numb3 = table3.nrows
    col_numb3 = table3.ncols

    new_excel = xlwt.Workbook()
    sheet1 = new_excel.add_sheet('sheet1', cell_overwrite_ok=True)
    sheet2 = new_excel.add_sheet('sheet2', cell_overwrite_ok=True)
    sheet3 = new_excel.add_sheet('sheet3', cell_overwrite_ok=True)

    pattern = xlwt.Pattern()
    pattern.pattern = xlwt.Pattern.SOLID_PATTERN
    pattern.pattern_fore_colour = 12
    #单元格边框样式
    borders = xlwt.Borders()
    borders.left = 2
    borders.right = 2
    borders.top = 2
    borders.bottom = 2
    # 初始化样式1
    style1 = xlwt.XFStyle()
    style1.pattern = pattern
    style1.borders = borders
    # 初始化样式2
    style2 = xlwt.XFStyle()
    style2.borders = borders


    for st in [sheet1, sheet2, sheet3]:
        st.write(0, 0, "化疗药物", style1)
        st.write(0, 1, "检测基因", style1)
        st.write(0, 2, "检测位点", style1)
        st.write(0, 3, "基因型", style1)
        st.write(0, 4, "证据等级", style1)
        st.write(0, 5, "注释", style1)

    #或者 table 中的单元格值
    for i in range(1, row_numb):
        drug = table.cell(i, 0).value
        gene = table.cell(i, 1).value
        rsid = table.cell(i, 2).value
        genotype = table.cell(i, 3).value
        level = table.cell(i, 4).value

        key = drug+'\t'+rsid+'\t'+genotype

        cur_genotype = chemovariants.get(rsid, 'NA')
        if cur_genotype == "NA":
            cur_genotype = genotype
        else:
            cur_genotype = cur_genotype[-1]
        annotation_chinese = anno_dict.get(key, 'NA')

        sheet1.write(i, 0, drug, style2)
        sheet1.write(i, 1, gene, style2)
        sheet1.write(i, 2, rsid, style2)
        sheet1.write(i, 3, cur_genotype, style2)
        sheet1.write(i, 4, level, style2)
        sheet1.write(i, 5, annotation_chinese, style2)
    sheet1.write_merge(1,12,0,0, table.cell(1,0).value, style2)
    sheet1.write_merge(14,16,0,0, table.cell(14,0).value, style2)
    sheet1.write_merge(18,24,0,0, table.cell(18,0).value, style2)
    sheet1.write_merge(25,26,0,0, table.cell(25,0).value, style2)
    sheet1.write_merge(28,29,0,0, table.cell(28,0).value, style2)
    sheet1.write_merge(30,31,0,0, table.cell(30,0).value, style2)
    sheet1.write_merge(32,33,0,0, table.cell(32,0).value, style2)

    for i in range(1, row_numb2):
        drug = table2.cell(i, 0).value
        gene = table2.cell(i, 1).value
        rsid = table2.cell(i, 2).value
        genotype = table2.cell(i, 3).value
        level = table2.cell(i, 4).value

        key = drug+'\t'+rsid+'\t'+genotype

        cur_genotype = chemovariants.get(rsid, 'NA')
        if cur_genotype == "NA":
            cur_genotype = genotype
        else:
            cur_genotype = cur_genotype[-1]
        annotation_chinese = anno_dict.get(key, 'NA')

        sheet2.write(i, 0, drug, style2)
        sheet2.write(i, 1, gene, style2)
        sheet2.write(i, 2, rsid, style2)
        sheet2.write(i, 3, cur_genotype, style2)
        sheet2.write(i, 4, level, style2)
        sheet2.write(i, 5, annotation_chinese, style2)
    sheet2.write_merge(1,7,0,0,table.cell(1,0).value, style2)
    sheet2.write_merge(8,11,0,0,table.cell(8,0).value, style2)
    sheet2.write_merge(12,13,0,0,table.cell(12,0).value, style2)
    sheet2.write_merge(14,17,0,0,table.cell(14,0).value, style2)
    sheet2.write_merge(18,20,0,0,table.cell(18,0).value, style2)
    sheet2.write_merge(22,24,0,0,table.cell(22,0).value, style2)
    sheet2.write_merge(25,30,0,0,table.cell(25,0).value, style2)
    sheet2.write_merge(31,33,0,0,table.cell(31,0).value, style2)


    for i in range(1, row_numb3):
        drug = table3.cell(i, 0).value
        gene = table3.cell(i, 1).value
        rsid = table3.cell(i, 2).value
        genotype = table3.cell(i, 3).value
        level = table3.cell(i, 4).value

        key = drug+'\t'+rsid+'\t'+genotype

        cur_genotype = chemovariants.get(rsid, 'NA')
        if cur_genotype == "NA":
            cur_genotype = genotype
        else:
            cur_genotype = cur_genotype[-1]
        annotation_chinese = anno_dict.get(key, 'NA')

        sheet3.write(i, 0, drug, style2)
        sheet3.write(i, 1, gene, style2)
        sheet3.write(i, 2, rsid, style2)
        sheet3.write(i, 3, cur_genotype, style2)
        sheet3.write(i, 4, level, style2)
        sheet3.write(i, 5, annotation_chinese, style2)
    sheet3.write_merge(4,6,0,0,table.cell(4,0).value, style2)
    sheet3.write_merge(9,14,0,0,table.cell(9,0).value, style2)
    sheet3.write_merge(16,18,0,0,table.cell(16,0).value, style2)

    new_excel.save(r'./chemo_result.xls')

annofile = '~/chemo/chemo_annotation_info.txt'
path = '~/chemo/chemo_25sites_info.txt'
#vcf = './BP20201204-2.vatriants_all.txt'
excelfile = '~/chemo/tample.xls'
vcf = sys.argv[1]
GetChemoAnnotation(annofile, path, vcf, excelfile)

上一篇下一篇

猜你喜欢

热点阅读