python的坑

【python】读写csv

2019-10-10  本文已影响0人  MichalLiu

1.pandas读写csv文件

import pandas as pd
df = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
print(df)
# 带列索引建立csv文件
df.to_csv(r'C:\Users\Dell\Desktop\output_files.csv',encoding='utf-8')
# 不带列索引建立csv文件
df_1 = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
df_1.to_csv(r'C:\Users\Dell\Desktop\output_files.csv',index=False,encoding='utf-8')

2.对数据框去重 保存

df = df.drop_duplicates(subset=['a.user_id'], keep='last')
df.to_csv(r'C:\Users\liuzw3\Desktop\客户\x_y1.csv',index=False,encoding='utf-8')

3.使用 pandas 来分析CSV 文件,并将满足条件的行写入输出文件

import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
data_frame['Cost'] = data_frame['Cost'].str.strip('$').str.replace(',','').astype(float)
data_frame_value_meets_condition = data_frame.loc[(data_frame['Supplier Name']\
.str.contains('Z')) | (data_frame['Cost'] > 600.0), :]
data_frame_value_meets_condition.to_csv(r'C:\Users\Dell\Desktop\output_files_1.csv',index=False,encoding='utf-8')

4.使用列标题选取列

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data.csv')
data_frame_column_by_name = data_frame.loc[:, ['Invoice Number', 'Purchase Date']]
data_frame_column_by_name.to_csv(r'C:\Users\Dell\Desktop\output_files_4.csv',index=False,encoding='utf-8')

5.保留列标题行和数据行,除去不需要的头部和尾部

#!/usr/bin/env python3
import pandas as pd
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data_unnecessary_header_footer.csv',header=None,\
                         encoding="ISO-8859-1")
data_frame = data_frame.drop([0,1,2,16,17,18])
data_frame.columns = data_frame.iloc[0]
data_frame = data_frame.reindex(data_frame.index.drop(3))
data_frame.to_csv(r'C:\Users\Dell\Desktop\output_files_5.csv', index=False)

6. 添加列标题

#!/usr/bin/env python3
import pandas as pd
header_list = ['Supplier Name', 'Invoice Number',\
'Part Number', 'Cost', 'Purchase Date']
data_frame = pd.read_csv(r'C:\Users\Dell\Desktop\supplier_data_no_header_row.csv', header=None, names=header_list)
data_frame.to_csv(r'C:\Users\Dell\Desktop\output_files_6.csv', index=False)

df = pd.read_csv(r'C:\Users\Dell\Desktop\output_files_6.csv',header=None)
df = df.drop([0])
header_list = ['Supplier Name', 'Invoice Number',\
'Part Number', 'Cost', 'Purchase Date']
df=df.columns.name(header_list)
print(df)

7.文件计数与文件中的行列计数

import csv
import glob
import os
file_counter = 0
## os.path.join 连接圆括号内的两部分内容
## glob.glob 将 'sales_*' 中的星号( *)转换为实际的文件名
for input_file in glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*')):
    row_counter = 1
    with open(input_file, 'r', newline='') as csv_in_file:
        filereader = csv.reader(csv_in_file)
        header = next(filereader, None)
        for row in filereader:
            row_counter += 1
    print('{0!s}: \t{1:d} rows \t{2:d} columns'.format(\
          os.path.basename(input_file), row_counter, len(header)))
    file_counter += 1
print('Number of files: {0:d}'.format(file_counter))

8.从多个文件中连接数据

import pandas as pd
import glob
import os
all_files = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*'))
all_data_frames = []
for file in all_files:
    data_frame = pd.read_csv(file, index_col=None)
    all_data_frames.append(data_frame)
    data_frame_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
    data_frame_concat.to_csv(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv\all_sales.csv', index = False)

9.pandas 提供了可以用来计算行和列统计量的摘要统计函数,比如 sum 和 mean

import pandas as pd
import glob
import os
all_files = glob.glob(os.path.join(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv','sales_*'))
all_data_frames = []
for input_file in all_files:
    data_frame = pd.read_csv(input_file, index_col=None)
    total_cost = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
                               for value in data_frame.loc[:, 'Sale Amount']]).sum()
    average_cost = pd.DataFrame([float(str(value).strip('$').replace(',','')) \
                               for value in data_frame.loc[:, 'Sale Amount']]).mean()
    data = {'file_name': os.path.basename(input_file), \
            'total_sales': total_cost, \
            'average_sales': average_cost}
    all_data_frames.append(pd.DataFrame(data, \
                columns=['file_name', 'total_sales', 'average_sales']))
data_frames_concat = pd.concat(all_data_frames, axis=0, ignore_index=True)
data_frames_concat.to_csv(r'C:\Users\liuzw3\Desktop\python_analysis_2.1.1\sales_csv\aggregate_sales.csv', index = False)
上一篇 下一篇

猜你喜欢

热点阅读