Python与Excel……python的坑

【python】读写Excel

2019-10-10  本文已影响0人  MichalLiu

1.确定工作簿中工作表的数量、名称和每个工作表中行列的数量

#!/usr/bin/env python3
from xlrd import open_workbook
workbook = open_workbook(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx')
print('Number of worksheets:', workbook.nsheets)
for worksheet in workbook.sheets():
    print("Worksheet name:", worksheet.name, "\tRows:",\
          worksheet.nrows, "\tColumns:", worksheet.ncols)

2.读写Excel文件

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
                           sheet_name='january_2013')
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_pandas.xlsx')
data_frame.to_excel(writer, sheet_name='jan_13_output', index=False)
writer.save()

3.多条件筛选特定行

import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
                           'january_2013', index_col=None)
data_frame_value_meets_condition = \
                data_frame[(data_frame['Sale Amount'].astype(float) > 1400.0) & (data_frame['Customer ID'] == 2345)]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_rows.xlsx')
data_frame_value_meets_condition.to_excel(writer, sheet_name='jan_13_output',\
                                          index=False)
writer.save()

4.筛选出客户姓名以大写字母 J 开头的那些行

import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
                           'january_2013', index_col=None)
data_frame_value_matches_pattern = data_frame[data_frame['Customer Name']\
                                              .str.startswith("J")]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_J.xlsx')
data_frame_value_matches_pattern.to_excel(writer, sheet_name='jan_13_output',\
                                          index=False)
writer.save()

5.索引值同时选择特定的行与特定的列

import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
                           'january_2013', index_col=None)
# 逗号是只选指定的列
# loc用列名称,iloc用行列索引值(数字)
data_frame_column_by_index = data_frame.iloc[:, [1, 4]]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_value.xlsx')
data_frame_column_by_index.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()

6.列标题同时选择特定的行与特定的列

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
                           'january_2013', index_col=None)
data_frame_column_by_name = data_frame.loc[:, ['Customer ID', 'Purchase Date']]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_col.xlsx')
data_frame_column_by_name.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()

7.更改excel文件的字段格式

import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\20190930QH360名单.xlsx', \
                           '工作表1', index_col=None,converters={'标签更新时间':str,'用户ID':str})
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\date.xlsx')
data_frame.to_excel(writer, sheet_name='date',\
                 index=False)
writer.save()

8.在所有工作表中筛选特定行

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
                           sheetname=None, index_col=None)
row_output = []
for worksheet_name, data in data_frame.items():
    row_output.append(data[data['Sale Amount'].astype(float) > 2000.0])
filtered_rows = pd.concat(row_output, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_all_rows.xlsx')
filtered_rows.to_excel(writer, sheet_name='sale_amount_gt2000', index=False)
writer.save()

9.在所有工作表中选取特定列合成一列

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
                           sheetname=None, index_col=None)
column_output = []
for worksheet_name, data in data_frame.items():
    column_output.append(data.loc[:, ['Customer Name', 'Sale Amount']])
selected_columns = pd.concat(column_output, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_all_cols.xlsx')
selected_columns.to_excel(writer, sheet_name='selected_columns_all_worksheets',\
index=False)
writer.save()

10.在Excel工作簿中读取一组工作表

#!/usr/bin/env python3
import pandas as pd
my_sheets = [0,1]
threshold = 1900.0
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
                           sheetname=my_sheets, index_col=None)
row_list = []
for worksheet_name, data in data_frame.items():
    row_list.append(data[data['Sale Amount'].astype(float) > threshold])
filtered_rows = pd.concat(row_list, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_some_tables.xlsx')
filtered_rows.to_excel(writer, sheet_name='set_of_worksheets', index=False)
writer.save()

11.工作表计数以及每个工作表中的行列计数

#!/usr/bin/env python3
import glob
import os
#import sys
from xlrd import open_workbook
#input_directory = sys.argv[1]
workbook_counter = 0
for input_file in glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel', \
                                         '*.xls*')):
    workbook = open_workbook(input_file)
    print('Workbook: %s' % os.path.basename(input_file))
    print('Number of worksheets: %d' % workbook.nsheets)
    for worksheet in workbook.sheets():
        print('Worksheet name:', worksheet.name, '\tRows:',\
              worksheet.nrows, '\tColumns:', worksheet.ncols)
    workbook_counter += 1
print('Number of Excel workbooks: %d' % (workbook_counter))

12.从多个工作簿中连接数据

import pandas as pd
import glob
import os
all_workbooks = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel',\
                                       '*.xls*'))
data_frames = []
for workbook in all_workbooks:
    all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
    for worksheet_name, data in all_worksheets.items():
        data_frames.append(data)
all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel\some.xlsx')
all_data_concatenated.to_excel(writer, sheet_name='all_data_all_workbooks',\
index=False)
writer.save()

13.为每个工作簿和工作表计算总数和均值

#!/usr/bin/env python3
import pandas as pd
import glob
import os

all_workbooks = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel',\
                                       '*.xls*'))
data_frames = []
for workbook in all_workbooks:
    all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
    workbook_total_sales = []
    workbook_number_of_sales = []
    worksheet_data_frames = []
    worksheets_data_frame = None
    workbook_data_frame = None
    for worksheet_name, data in all_worksheets.items():
        total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data.loc[:, 'Sale Amount']]).sum()
        number_of_sales = len(data.loc[:, 'Sale Amount'])
        average_sales = pd.DataFrame(total_sales / number_of_sales)
        
        workbook_total_sales.append(total_sales)
        workbook_number_of_sales.append(number_of_sales)
        
        data = {'workbook': os.path.basename(workbook),\
                'worksheet': worksheet_name,\
                'worksheet_total': total_sales,\
                'worksheet_average': average_sales}

        worksheet_data_frames.append(pd.DataFrame(data, columns=['workbook', 'worksheet', 'worksheet_total', 'worksheet_average']))
    worksheets_data_frame = pd.concat(worksheet_data_frames, axis=0, ignore_index=True)

    workbook_total = pd.DataFrame(workbook_total_sales).sum()
    workbook_total_number_of_sales = pd.DataFrame(workbook_number_of_sales).sum()
    workbook_average = pd.DataFrame(workbook_total / workbook_total_number_of_sales)
    
    workbook_stats = {'workbook': os.path.basename(workbook),
                     'workbook_total': workbook_total,
                     'workbook_average': workbook_average}

    workbook_stats = pd.DataFrame(workbook_stats, columns=['workbook', 'workbook_total', 'workbook_average'])
    workbook_data_frame = pd.merge(worksheets_data_frame, workbook_stats, on='workbook', how='left')
    data_frames.append(workbook_data_frame)

all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)

writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel\sum.xlsx')
all_data_concatenated.to_excel(writer, sheet_name='sums_and_averages', index=False)
writer.save()
上一篇 下一篇

猜你喜欢

热点阅读