【python】读写csv
2019-10-10 本文已影响0人
MichalLiu
1.pandas读写csv文件
import pandas as pd
df = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
print(df)
# 带列索引建立csv文件
df.to_csv(r'C:\Users\Dell\Desktop\output_files.csv',encoding='utf-8')
# 不带列索引建立csv文件
df_1 = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
df_1.to_csv(r'C:\Users\Dell\Desktop\output_files.csv',index=False,encoding='utf-8')
2.对数据框去重 保存
df = df.drop_duplicates(subset=['a.user_id'], keep='last')
df.to_csv(r'C:\Users\liuzw3\Desktop\客户\x_y1.csv',index=False,encoding='utf-8')
3.使用 pandas 来分析CSV 文件,并将满足条件的行写入输出文件
import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
data_frame['Cost'] = data_frame['Cost'].str.strip('$').str.replace(',','').astype(float)
data_frame_value_meets_condition = data_frame.loc[(data_frame['Supplier Name']\
.str.contains('Z')) | (data_frame['Cost'] > 600.0), :]
data_frame_value_meets_condition.to_csv(r'C:\Users\Dell\Desktop\output_files_1.csv',index=False,encoding='utf-8')
4.使用列标题选取列
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
data_frame_column_by_name = data_frame.loc[:, ['Invoice Number', 'Purchase Date']]
data_frame_column_by_name.to_csv(r'C:\Users\Dell\Desktop\output_files_4.csv',index=False,encoding='utf-8')
5.保留列标题行和数据行,除去不需要的头部和尾部
#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data_unnecessary_header_footer.csv',header=None,\
encoding="ISO-8859-1")
data_frame = data_frame.drop([0,1,2,16,17,18])
data_frame.columns = data_frame.iloc[0]
data_frame = data_frame.reindex(data_frame.index.drop(3))
data_frame.to_csv(r'C:\Users\Dell\Desktop\output_files_5.csv', index=False)
6. 添加列标题
#!/usr/bin/env python3
import pandas as pd
header_list = ['Supplier Name', 'Invoice Number',\
'Part Number', 'Cost', 'Purchase Date']
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data_no_header_row.csv', header=None, names=header_list)
data_frame.to_csv(r'C:\Users\Dell\Desktop\output_files_6.csv', index=False)
df = pd.read_csv(r'C:\Users\Dell\Desktop\output_files_6.csv',header=None)
df = df.drop([0])
header_list = ['Supplier Name', 'Invoice Number',\
'Part Number', 'Cost', 'Purchase Date']
df=df.columns.name(header_list)
print(df)
7.文件计数与文件中的行列计数
import csv
import glob
import os
file_counter = 0
## os.path.join 连接圆括号内的两部分内容
## glob.glob 将 'sales_*' 中的星号( *)转换为实际的文件名
for input_file in glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*')):
row_counter = 1
with open(input_file, 'r', newline='') as csv_in_file:
filereader = csv.reader(csv_in_file)
header = next(filereader, None)
for row in filereader:
row_counter += 1
print('{0!s}: \t{1:d} rows \t{2:d} columns'.format(\
os.path.basename(input_file), row_counter, len(header)))
file_counter += 1
print('Number of files: {0:d}'.format(file_counter))
8.从多个文件中连接数据
import pandas as pd
import glob
import os
all_files = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*'))
all_data_frames = []
for file in all_files:
data_frame = pd.read_csv(file, index_col=None)
all_data_frames.append(data_frame)
data_frame_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
data_frame_concat.to_csv(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv\all_sales.csv', index = False)
9.pandas 提供了可以用来计算行和列统计量的摘要统计函数,比如 sum 和 mean
import pandas as pd
import glob
import os
all_files = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*'))
all_data_frames = []
for input_file in all_files:
data_frame = pd.read_csv(input_file, index_col=None)
total_cost = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
for value in data_frame.loc[:, 'Sale Amount']]).sum()
average_cost = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
for value in data_frame.loc[:, 'Sale Amount']]).mean()
data = {'file_name': os.path.basename(input_file), \
'total_sales': total_cost, \
'average_sales': average_cost}
all_data_frames.append(pd.DataFrame(data, \
columns=['file_name', 'total_sales', 'average_sales']))
data_frames_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
data_frames_concat.to_csv(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv\aggregate_sales.csv', index = False)