python技巧

python转换文件的格式编码脚本

2018-07-09  本文已影响0人  陆_志东

这里的例子以utf-16 转换为utf-8 为例, 你也可以转其他编码的只需要修改参数即可

# import pandas
import os


def convert_file(file_dir,new_dir,desc_type,previous_type):
    error_list = list()
    for root,dirs,files in os.walk(file_dir):
        for file in files:
            file_path = os.path.join(root,file)
            # try:
            #     df1 = pandas.read_csv(file_path,encoding=previous_type)
            #     new_path = os.path.join(new_dir,file)
            #     df1.to_csv(new_path,encoding=desc_type)
            # except Exception as e:
            #     print(e)
            #     print("file :{}  open is error and continue".format(file_path))
            #     error_list.append(file_path)
            #     continue
            try:
                with open(file_path, "rb") as f:
                    res = f.read().decode(previous_type)   # decode 是将二进制bytes编码转换为unicode,
                with open(os.path.join(new_dir,file),"w",encoding=desc_type) as f:  # encode 是将unicode编码转换为其他编码
                    f.write(res)
            except Exception as e:
                print("file :{} because error : [{}] continue".format(file,e))
                error_list.append(file)
        print(error_list)


if __name__ == '__main__':
    # 如果想要知道原始文件的格式,使用notepad++打开文件,右下角有文件的编码格式
    file_dir = "./csv_data"    # 存放原文件数据的目录
    new_dir = "./csv_new_data"   # 存放新数据的目录
    desc_type = "utf-8"    # 新文件的格式
    previous_type = "utf-16-le"   # UCS-2 Little Endian(即 utf-16)  # 新文件的原格式
    convert_file(file_dir,new_dir,desc_type,previous_type)   
上一篇下一篇

猜你喜欢

热点阅读