test_xia4:合并列头相同的多个文件

2020-06-08  本文已影响0人  夕颜00

1、文件:

A1  A2  A3  B1  B2  B3
909 312 670 660 264 943
726 335 905 143 696 892
A1  A2  A3  B1  B2  B3
633 429 534 941 124 370
223 540 845 488 878 134

2、目的:合并多个文件,并标注来源

A1  A2  A3  B1  B2  B3 source
909 312 670 660 264 943  test1.txt
726 335 905 143 696 892  test2.txt

3、脚本1: 用pandas append实现

import os
import pandas as pd

path = "E:/Script/python/xia_test/4"
output = "E:/Script/python/xia_test/4/out.csv"

file_list = os.listdir(path)
file_list = [i for i in file_list if i.endswith(".txt")]

df_w = pd.DataFrame()
for i in file_list:
    data = pd.read_table(i)
    data["source"] = i.replace(".txt", "")
    # print(type(data))
    df_w = df_w.append(data,ignore_index=True)

print(df_w)
df_w.to_csv(output,index=False)
import pandas as pd
import os

dir0 = "E:/资料/201804_学习/Python/练习/xia_test/4/"
sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum_1.csv'

def get_name(dir0):
    xtt_name = []
    for root, dirname, files in os.walk(dir0):
        for name in files:
            if name.endswith('txt'):
                xtt = name.split('.')[0]
                xtt_name.append(xtt)
    return xtt_name

list_name = get_name(dir0)
df1 = pd.read_table(dir0 + str(list_name[0]) + '.txt')
df1['source'] = list_name[0]
df2 = pd.read_table(dir0 + str(list_name[1]) + '.txt')
df2['source'] = list_name[1]
df3 = pd.read_table(dir0 + str(list_name[2]) + '.txt')
df3['source'] = list_name[2]
df4 = pd.read_table(dir0 + str(list_name[3]) + '.txt')
df4['source'] = list_name[3]

reader = pd.concat([df1, df2, df3, df4], axis=0, ignore_index=True)
reader.to_csv(sum0, index=False)
import os
import csv

file_dir = "E:/资料/201804_学习/Python/练习/xia_test/4/file"
sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum.csv'


def file_name(file_dir):
    name_list = []
    for dirpath, dirnames, filenames in os.walk(file_dir):
        for i in filenames:
            name = i.split(".")[0]
            name_list.append(name)
        return name_list

def b(name):
    res_list = []
    path = file_dir + "/" + name + ".csv"
    with open(path, 'r') as file:
        reader = csv.DictReader(file)
        # with open(sum0, 'a+', newline="") as newfile:
        #     writer = csv.DictWriter(newfile, fieldnames=reader.fieldnames + ['source'])
        # writer.writeheader()
        for row in reader:
            row.update({'source': name})
            res_list.append(row)
    return res_list


with open(sum0, 'w', newline="") as new:
    header = ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'source']
    f_csv = csv.DictWriter(new, header)
    f_csv.writeheader()
    name_list = file_name(file_dir)

    for n in name_list:
        # a(n,f_csv)
        rows = b(n)
        f_csv.writerows(rows)
上一篇 下一篇

猜你喜欢

热点阅读