【python】读写Excel
2019-10-10 本文已影响0人
MichalLiu
1.确定工作簿中工作表的数量、名称和每个工作表中行列的数量
#!/usr/bin/env python3
from xlrd import open_workbook
workbook = open_workbook(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx')
print('Number of worksheets:', workbook.nsheets)
for worksheet in workbook.sheets():
print("Worksheet name:", worksheet.name, "\tRows:",\
worksheet.nrows, "\tColumns:", worksheet.ncols)
2.读写Excel文件
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
sheet_name='january_2013')
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_pandas.xlsx')
data_frame.to_excel(writer, sheet_name='jan_13_output', index=False)
writer.save()
3.多条件筛选特定行
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
'january_2013', index_col=None)
data_frame_value_meets_condition = \
data_frame[(data_frame['Sale Amount'].astype(float) > 1400.0) & (data_frame['Customer ID'] == 2345)]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_rows.xlsx')
data_frame_value_meets_condition.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()
4.筛选出客户姓名以大写字母 J 开头的那些行
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
'january_2013', index_col=None)
data_frame_value_matches_pattern = data_frame[data_frame['Customer Name']\
.str.startswith("J")]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_J.xlsx')
data_frame_value_matches_pattern.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()
5.索引值同时选择特定的行与特定的列
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
'january_2013', index_col=None)
# 逗号是只选指定的列
# loc用列名称,iloc用行列索引值(数字)
data_frame_column_by_index = data_frame.iloc[:, [1, 4]]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_value.xlsx')
data_frame_column_by_index.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()
6.列标题同时选择特定的行与特定的列
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
'january_2013', index_col=None)
data_frame_column_by_name = data_frame.loc[:, ['Customer ID', 'Purchase Date']]
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_col.xlsx')
data_frame_column_by_name.to_excel(writer, sheet_name='jan_13_output',\
index=False)
writer.save()
7.更改excel文件的字段格式
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\20190930QH360名单.xlsx', \
'工作表1', index_col=None,converters={'标签更新时间':str,'用户ID':str})
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\date.xlsx')
data_frame.to_excel(writer, sheet_name='date',\
index=False)
writer.save()
8.在所有工作表中筛选特定行
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx', \
sheetname=None, index_col=None)
row_output = []
for worksheet_name, data in data_frame.items():
row_output.append(data[data['Sale Amount'].astype(float) > 2000.0])
filtered_rows = pd.concat(row_output, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_all_rows.xlsx')
filtered_rows.to_excel(writer, sheet_name='sale_amount_gt2000', index=False)
writer.save()
9.在所有工作表中选取特定列合成一列
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
sheetname=None, index_col=None)
column_output = []
for worksheet_name, data in data_frame.items():
column_output.append(data.loc[:, ['Customer Name', 'Sale Amount']])
selected_columns = pd.concat(column_output, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_all_cols.xlsx')
selected_columns.to_excel(writer, sheet_name='selected_columns_all_worksheets',\
index=False)
writer.save()
10.在Excel工作簿中读取一组工作表
#!/usr/bin/env python3
import pandas as pd
my_sheets = [0,1]
threshold = 1900.0
data_frame = pd.read_excel(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013.xlsx',\
sheetname=my_sheets, index_col=None)
row_list = []
for worksheet_name, data in data_frame.items():
row_list.append(data[data['Sale Amount'].astype(float) > threshold])
filtered_rows = pd.concat(row_list, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\sales_2013_some_tables.xlsx')
filtered_rows.to_excel(writer, sheet_name='set_of_worksheets', index=False)
writer.save()
11.工作表计数以及每个工作表中的行列计数
#!/usr/bin/env python3
import glob
import os
#import sys
from xlrd import open_workbook
#input_directory = sys.argv[1]
workbook_counter = 0
for input_file in glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel', \
'*.xls*')):
workbook = open_workbook(input_file)
print('Workbook: %s' % os.path.basename(input_file))
print('Number of worksheets: %d' % workbook.nsheets)
for worksheet in workbook.sheets():
print('Worksheet name:', worksheet.name, '\tRows:',\
worksheet.nrows, '\tColumns:', worksheet.ncols)
workbook_counter += 1
print('Number of Excel workbooks: %d' % (workbook_counter))
12.从多个工作簿中连接数据
import pandas as pd
import glob
import os
all_workbooks = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel',\
'*.xls*'))
data_frames = []
for workbook in all_workbooks:
all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
for worksheet_name, data in all_worksheets.items():
data_frames.append(data)
all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel\some.xlsx')
all_data_concatenated.to_excel(writer, sheet_name='all_data_all_workbooks',\
index=False)
writer.save()
13.为每个工作簿和工作表计算总数和均值
#!/usr/bin/env python3
import pandas as pd
import glob
import os
all_workbooks = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel',\
'*.xls*'))
data_frames = []
for workbook in all_workbooks:
all_worksheets = pd.read_excel(workbook, sheetname=None, index_col=None)
workbook_total_sales = []
workbook_number_of_sales = []
worksheet_data_frames = []
worksheets_data_frame = None
workbook_data_frame = None
for worksheet_name, data in all_worksheets.items():
total_sales = pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data.loc[:, 'Sale Amount']]).sum()
number_of_sales = len(data.loc[:, 'Sale Amount'])
average_sales = pd.DataFrame(total_sales / number_of_sales)
workbook_total_sales.append(total_sales)
workbook_number_of_sales.append(number_of_sales)
data = {'workbook': os.path.basename(workbook),\
'worksheet': worksheet_name,\
'worksheet_total': total_sales,\
'worksheet_average': average_sales}
worksheet_data_frames.append(pd.DataFrame(data, columns=['workbook', 'worksheet', 'worksheet_total', 'worksheet_average']))
worksheets_data_frame = pd.concat(worksheet_data_frames, axis=0, ignore_index=True)
workbook_total = pd.DataFrame(workbook_total_sales).sum()
workbook_total_number_of_sales = pd.DataFrame(workbook_number_of_sales).sum()
workbook_average = pd.DataFrame(workbook_total / workbook_total_number_of_sales)
workbook_stats = {'workbook': os.path.basename(workbook),
'workbook_total': workbook_total,
'workbook_average': workbook_average}
workbook_stats = pd.DataFrame(workbook_stats, columns=['workbook', 'workbook_total', 'workbook_average'])
workbook_data_frame = pd.merge(worksheets_data_frame, workbook_stats, on='workbook', how='left')
data_frames.append(workbook_data_frame)
all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)
writer = pd.ExcelWriter(r'C:\Users\liuzw3\Desktop\python_analysis_3.1-3.6\some_excel\sum.xlsx')
all_data_concatenated.to_excel(writer, sheet_name='sums_and_averages', index=False)
writer.save()