Python学习内容整理

openpyxl和pandas的一次小实践

2020-02-09  本文已影响0人  Lorence

工作中,从数据库中导出一份Excel表格,里面全是未经加工的数据。需要对它们进行重新的整合,然后输出为一份报告表。以前曾经使用vba写过程序处理,但最近原始数据库中的表格架构变了。改VBA代码相当于重写。于是尝试使用openpyxl和pandas库进行一轮操作。


功能需求:

原始表格中,含有工单及其对应的一系列生产数据:


image.png

要求针对不同的产品型号,工单,生成对应的表格报告。


代码的逻辑大概如下:

  1. 读取原始数据,然后把它们归类为指定的文件中去
def readOrigData(TLA):  #读取原始从Ipro下载出来的数据,然后把它们归类为指定的PWA文件中去
    originalData=getfile()
    df=pd.read_excel(str(originalData))
    data=df[['整机型号','采购单号','TLA 物料号','故障原因','故障原因描述','备注','坏机数量',\
            '坏件数量','OFD','坏点数量','检查台数','产品总数','产品序列号','零件位置']]
    d_TLA=data[(data['整机型号']==TLA)]
    for pwa in d_TLA['TLA 物料号'].drop_duplicates():
        create_PWA_Report(TLA,pwa)
        d_pwa=d_TLA[(d_TLA['TLA 物料号']==pwa)]
        outputlink="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
        book=load_workbook(outputlink)
        writer=pd.ExcelWriter(outputlink,engine='openpyxl')
        writer.book=book
        d_pwa[::].to_excel(writer,index=False,sheet_name=pwa[0:10])  
        writer.save()

上面我定义了一个函数getfile(),用于获取原始数据的路径。

def getfile():
    root=tk.Tk()
    root.withdraw()
    #Folderpath=filedialog.askdirectory() #获得选择好的文件夹路径
    Filepath=filedialog.askopenfilename() #获得选择好的文件路径
    return Filepath

而函数create_PWA_Report(TLA,pwa) 从模板中复制创建了新的文件,以PWA名字开头。

def create_PWA_Report(TLA,pwa):  #从模板中复制创建了新的文件,以PWA名字开头
    sourcefile="D:\\My Documents\\Wave Solder Report\\Wave_Solder_Report_Template.xlsx"
    path="D:\\My Documents\\Wave Solder Report\\"+TLA
    if os.path.isdir(path):
        pass
    else:
        os.mkdir(path)
    newfile="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
    if os.path.isfile(newfile):
        print("Exist file: "+pwa[0:10])
    else:
        print("Create file: "+pwa[0:10])
        shutil.copyfile(sourcefile,newfile)
  1. 针对不同的excel表,填充数据及图表
def writeExcelData(filelink):       #针对不同的excel表,填充数据及图表
    #filelink="D:\\My Documents\\Wave Solder Report\\700-014896-0000\\790-023619 Wave Solder Defect.xlsx"
    pwa=filelink.split("\\")[-1][0:10]
    all_dj_data=pd.read_excel(filelink,sheet_name=pwa)
    #if len(all_dj_data['零件位置'])==1:
    if len(all_dj_data[['零件位置']])<2:
        dj=all_dj_data[['采购单号']].iloc[0,0]
        wb=load_workbook(filelink)
        sht=wb.worksheets[0]  #Template sheet
        sht_copy=wb.copy_worksheet(sht)    #复制Template sheet
        sht.sheet_state ='hidden'         #隐藏template页
        sht_copy.title=dj                 #以DJ改名新增的sheet
        dj_sht=wb[dj]
        dj_sht.sheet_view.showGridLines = False  #隐藏表格中的网格线                        
        dj_sht["B5"]=all_dj_data.iloc[0,0]  #TLA PN
        dj_sht["G6"]=all_dj_data['产品总数'].max()  #DJ 总数
        dj_sht["U5"]=all_dj_data['OFD'].max()  #OFD
        wb.save(filelink)
    else:
        for dj in all_dj_data['采购单号'].drop_duplicates():
            print(dj)
            wb=load_workbook(filelink)
            sht=wb.worksheets[0]  #Template sheet
            sht_copy=wb.copy_worksheet(sht)    #复制Template sheet
            sht.sheet_state ='hidden'         #隐藏template页
            sht_copy.title=dj                 #以DJ改名新增的sheet
            dj_data=all_dj_data[(all_dj_data['采购单号']==dj)].sort_values(by=['坏件数量'],ascending=False)  #过滤指定dj的数据
            dj_sht=wb[dj]
            dj_sht.sheet_view.showGridLines = False  #隐藏表格中的网格线                        
            dj_sht["B5"]=dj_data.iloc[0,0]  #TLA PN
            dj_sht["G6"]=dj_data['产品总数'].max()  #DJ 总数
            dj_sht["U5"]=dj_data['OFD'].max()  #OFD
            dj_data=dj_data.dropna() #删除缺失值 
            
            maxrow=27  #对应template中的problem下一个空行
            original_ref_col=2 #对应template中的Circuit  Code/Location
            original_def_col=2 #对应template中的Defects
            original_qty_col=2 #对应template中的Failed Quantity
            for index, row in dj_data[['零件位置','故障原因','故障原因描述','备注','坏件数量']].iterrows():
                defect_description=row['零件位置'] + ": "+ row['故障原因描述'] + " ("+ row['备注']+")" + "\n"+"坏件数量: " +str(row['坏件数量'])
                #maxrow=dj_sht.max_row
                dj_sht.cell(row=19,column=original_def_col).value=row['故障原因']
                dj_sht.cell(row=20,column=original_ref_col).value=row['零件位置']
                dj_sht.cell(row=21,column=original_qty_col).value=row['坏件数量']
                dj_sht.cell(row=maxrow,column=1).value=defect_description  #把不良写入problem表中
                maxrow+=1
                original_ref_col+=1
                original_def_col+=1
                original_qty_col+=1
            #以下代码给表单添加barchart
            chart1 = BarChart()
            chart1.type = "col"
            chart1.style = 10
            chart1.title = "Wave Solder Defect"
            chart1.y_axis.title = 'Defect number'
            data = Reference(dj_sht, min_col=1, max_col=21, min_row=21, max_row=21)  #定义数据的来源
            cats = Reference(dj_sht, min_col=2, max_col=21, min_row=20, max_row=20)  #定义x轴的分类
            chart1.add_data(data=data,titles_from_data=True, from_rows=True)
            chart1.set_categories(cats)
            chart1.shape = 4
            chart1.legend.position = "t"    
            chart1.height=5   #chart的高度
            chart1.width=20   #chart的宽度
            dj_sht.add_chart(chart1, "B8")  
        
            wb.save(filelink)

这里面的难点是利用openpyxl画图表。需要提前导入相关函数。而且要区分清楚行和列的范围。

from openpyxl.chart import BarChart, Series, Reference
from openpyxl.chart.layout import Layout, ManualLayout

下面整个程序的完整代码:

import shutil
from openpyxl import load_workbook
import os
from openpyxl.chart import BarChart, Series, Reference
from openpyxl.chart.layout import Layout, ManualLayout
from openpyxl import Workbook
import tkinter as tk
from tkinter import filedialog

def getfile():
    root=tk.Tk()
    root.withdraw()
    #Folderpath=filedialog.askdirectory() #获得选择好的文件夹
    Filepath=filedialog.askopenfilename() #获得选择好的文件
    return Filepath

def create_PWA_Report(TLA,pwa):  #从模板中复制创建了新的文件,以PWA名字开头
    sourcefile="D:\\My Documents\\Wave Solder Report\\Wave_Solder_Report_Template.xlsx"
    path="D:\\My Documents\\Wave Solder Report\\"+TLA
    if os.path.isdir(path):
        pass
    else:
        os.mkdir(path)
    newfile="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
    if os.path.isfile(newfile):
        print("Exist file: "+pwa[0:10])
    else:
        print("Create file: "+pwa[0:10])
        shutil.copyfile(sourcefile,newfile)

def readOrigData(TLA):  #读取原始从Ipro下载出来的数据,然后把它们归类为指定的PWA文件中去
    originalData=getfile()
    df=pd.read_excel(str(originalData))
    data=df[['整机型号','采购单号','TLA 物料号','故障原因','故障原因描述','备注','坏机数量',\
            '坏件数量','OFD','坏点数量','检查台数','产品总数','产品序列号','零件位置']]
    d_TLA=data[(data['整机型号']==TLA)]
    for pwa in d_TLA['TLA 物料号'].drop_duplicates():
        create_PWA_Report(TLA,pwa)
        d_pwa=d_TLA[(d_TLA['TLA 物料号']==pwa)]
        outputlink="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
        book=load_workbook(outputlink)
        writer=pd.ExcelWriter(outputlink,engine='openpyxl')
        writer.book=book
        d_pwa[::].to_excel(writer,index=False,sheet_name=pwa[0:10])  
        writer.save()

def writeExcelData(filelink):       #针对不同的excel表,填充数据及图表
    #filelink="D:\\My Documents\\Wave Solder Report\\700-014896-0000\\790-023619 Wave Solder Defect.xlsx"
    pwa=filelink.split("\\")[-1][0:10]
    all_dj_data=pd.read_excel(filelink,sheet_name=pwa)
    #if len(all_dj_data['零件位置'])==1:
    if len(all_dj_data[['零件位置']])<2:
        dj=all_dj_data[['采购单号']].iloc[0,0]
        wb=load_workbook(filelink)
        sht=wb.worksheets[0]  #Template sheet
        sht_copy=wb.copy_worksheet(sht)    #复制Template sheet
        sht.sheet_state ='hidden'         #隐藏template页
        sht_copy.title=dj                 #以DJ改名新增的sheet
        dj_sht=wb[dj]
        dj_sht.sheet_view.showGridLines = False  #隐藏表格中的网格线                        
        dj_sht["B5"]=all_dj_data.iloc[0,0]  #TLA PN
        dj_sht["G6"]=all_dj_data['产品总数'].max()  #DJ 总数
        dj_sht["U5"]=all_dj_data['OFD'].max()  #OFD
        wb.save(filelink)
    else:
        for dj in all_dj_data['采购单号'].drop_duplicates():
            print(dj)
            wb=load_workbook(filelink)
            sht=wb.worksheets[0]  #Template sheet
            sht_copy=wb.copy_worksheet(sht)    #复制Template sheet
            sht.sheet_state ='hidden'         #隐藏template页
            sht_copy.title=dj                 #以DJ改名新增的sheet
            dj_data=all_dj_data[(all_dj_data['采购单号']==dj)].sort_values(by=['坏件数量'],ascending=False)  #过滤指定dj的数据
            dj_sht=wb[dj]
            dj_sht.sheet_view.showGridLines = False  #隐藏表格中的网格线                        
            dj_sht["B5"]=dj_data.iloc[0,0]  #TLA PN
            dj_sht["G6"]=dj_data['产品总数'].max()  #DJ 总数
            dj_sht["U5"]=dj_data['OFD'].max()  #OFD
            dj_data=dj_data.dropna() #删除缺失值 
            
            maxrow=27  #对应template中的problem下一个空行
            original_ref_col=2 #对应template中的Circuit  Code/Location
            original_def_col=2 #对应template中的Defects
            original_qty_col=2 #对应template中的Failed Quantity
            for index, row in dj_data[['零件位置','故障原因','故障原因描述','备注','坏件数量']].iterrows():
                defect_description=row['零件位置'] + ": "+ row['故障原因描述'] + " ("+ row['备注']+")" + "\n"+"坏件数量: " +str(row['坏件数量'])
                #maxrow=dj_sht.max_row
                dj_sht.cell(row=19,column=original_def_col).value=row['故障原因']
                dj_sht.cell(row=20,column=original_ref_col).value=row['零件位置']
                dj_sht.cell(row=21,column=original_qty_col).value=row['坏件数量']
                dj_sht.cell(row=maxrow,column=1).value=defect_description  #把不良写入problem表中
                maxrow+=1
                original_ref_col+=1
                original_def_col+=1
                original_qty_col+=1
            #以下代码给表单添加barchart
            chart1 = BarChart()
            chart1.type = "col"
            chart1.style = 10
            chart1.title = "Wave Solder Defect"
            chart1.y_axis.title = 'Defect number'
            data = Reference(dj_sht, min_col=1, max_col=21, min_row=21, max_row=21)  #定义数据的来源
            cats = Reference(dj_sht, min_col=2, max_col=21, min_row=20, max_row=20)  #定义x轴的分类
            chart1.add_data(data=data,titles_from_data=True, from_rows=True)
            chart1.set_categories(cats)
            chart1.shape = 4
            chart1.legend.position = "t"    
            chart1.height=5   #chart的高度
            chart1.width=20   #chart的宽度
            dj_sht.add_chart(chart1, "B8")  
        
            wb.save(filelink)

def searchTLAfolder(TLA):
    filelist=[]
    file_dir="D:\\My Documents\\Wave Solder Report\\"+TLA
    for files in os.walk(file_dir):
        for file in files[2]:
            filelist.append(file_dir + "\\"+file)
    return filelist

print("Hi, This tool is help to generate a wave solder defect report from Ipro data")
TLA=input("Please input a TLA PN: ")
print("Select Ipro download data")
readOrigData(TLA)
for j in searchTLAfolder(TLA):
    print(j)
    writeExcelData(j)
print("Sir, Wave Solder Build Report Is Ready")

上一篇 下一篇

猜你喜欢

热点阅读