三国演义

2021-04-15  本文已影响0人  __method__
import jieba
from matplotlib import pyplot as plt
#解决中文显示问题
plt.rcParams['font.sans-serif'] = ['KaiTi'] # 指定默认字体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 读取小说
class SanGuo():
    def __init__(self, n=10):
        """top n"""
        self.n = n
        self.new_word_list = []
        self.counts = {}
        self.stop_words = {"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议",
                "如何", "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下",
                "东吴", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人马", "不知",
                "孔明曰", "玄德曰", "刘备",'云长'}
    def read_data(self):
        with open('./novel/threekingdom.txt', mode='r', encoding='utf-8') as f:
            return f.read()
    def parse(self, data):
        word_list = jieba.lcut(data)
        for word in word_list:
            if len(word) <= 1:
                continue
            else:
                self.counts[word] =self.counts.get(word, 0) + 1
    def sort_and_filter(self):
        self.counts['孔明'] = self.counts['孔明'] + self.counts['孔明曰']
        self.counts['玄德'] = self.counts['玄德曰'] + self.counts['刘备'] + self.counts['玄德']
        self.counts['关公'] = self.counts['关公'] + self.counts['云长']
        for word in self.stop_words:
            del self.counts[word]
        self.new_word_list = list(self.counts.items())
        self.new_word_list.sort(key=lambda x:x[1], reverse=True)
    def show(self):
        num_list = []
        role_list = []
        print("前top{}的分析结果".format(self.n))
        for i in range(self.n):
            name, num = self.new_word_list[i]
            print(name, num)
            num_list.append(num)
            role_list.append(name)
        plt.pie(num_list, labels=role_list, shadow=True, autopct='%1.1f%%')
        plt.axis('equal')
        plt.title('三国TOP{}人物出场频次占比图'.format(self.n), fontsize=30)
        plt.show()

    def run(self):
        data = self.read_data()
        self.parse(data)
        self.sort_and_filter()
        self.show()

if __name__ == '__main__':
    s = SanGuo(5)
    s.run()
上一篇 下一篇

猜你喜欢

热点阅读