知识管理

python按章节分割小说txt文件

2018-10-15  本文已影响572人  AI视客
# -*- coding: utf-8 -*-
# __author__:'Administrator'
# @Time    : 2018/8/31 14:19
import os
dst = "D:\\test"   # 生成文件目录


# 将一个txt文件的内容,按照第几章进行分割
def SplitFile(file_path1, dst):
    with open(file_path1, 'rb') as f1:
        # 获取文件每一行
        lines1 = f1.readlines()
        # 获取file的名称
        file_dir1 = file_path1.replace("\\", '/').split("/")[-1].split(".")[0]
        path1 = os.path.join(dst, file_dir1)
        if not os.path.exists(path1):
            os.makedirs(path1)
        i = 1
        for line in lines1:
            try:
                if ("第" in line and "章 " in line) or ("第" in line and "章..." in line) or ("第" in line and "章\r\n" in line):
                    name = line.strip().decode('utf8')
                    i += 1
                else:
                    fp = open(file_name1, 'ab+')
                    fp.write(line)
                    fp.close()
                file_name1 = os.path.join(path1, "%s_%s.txt" % (i-1, name))
            except Exception as e:
                print e.message


# 将一个txt文件的内容,按照数字顺序进行分割
def SplitFile_by_Num(file_path2, dst):
    with open(file_path2, 'rb') as f2:
        lines2 = f2.readlines()
        file_dir2 = file_path2.replace("\\", '/').split("/")[-1].split(".")[0]
        path2 = os.path.join(dst, file_dir2)
        if not os.path.exists(path2):
            os.makedirs(path2)
        i = 1
        for line in lines2:
            try:
                if ("%s\r\n" % i) in line or ("%s、" % i in line) or ("%s " % i in line) or str(i) in line:
                    name = line.strip().decode('utf8')
                    i += 1
                else:
                    fp = open(file_name2, 'ab+')
                    fp.write(line)
                    fp.close()
                file_name2 = os.path.join(path2, "%s_%s.txt" % (i-1, name))
            except Exception as e:
                 print e.message


# 获取某个目录下面的所有txt
def get_all_txt(path):
    filepaths = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if '.txt' in name:
                filepaths.append(os.path.join(root, name))
    return filepaths


if __name__ == "__main__":
    file_dir = "D:\\xiaoshuo"
    file_paths = get_all_txt(unicode(file_dir, "utf8"))
    for one in file_paths:
        SplitFile(one, dst)
        for root, dirs, files in os.walk(os.path.join(dst, one.replace("\\", '/').split("/")[-1].split(".")[0])):
            if not files:
                SplitFile_by_Num(one, dst)


上一篇下一篇

猜你喜欢

热点阅读