用 Python 对 QQ 群消息分析生成词云图

2018-10-02  本文已影响0人  ajycc20


import re
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from scipy.misc import imread


def seg_sentence(filename):
    message_regex = "(201\d-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) (.*)\n(.*)(?=\n\n)"
    message_complile = re.compile(message_regex)
    with open(filename, encoding='utf-8') as f:
        message_data = message_complile.findall(f.read())
        words = ''
        for rows in message_data:
            if (rows[2] == str):
                seg_list = jieba.cut(rows[3], cut_all=False)
                seg_list = ' '.join(seg_list)
                if len(seg_list) > 1:
                    if seg_list != '\t':
                        words += seg_list
                        words += ' '
        return words


def draw_wordcloud(filename):
 
    words = seg_sentence(filename)
 
    font = 'C:\Windows\Fonts\simhei.ttf'
    color_mask = imread('test.jpg')
 
    cloud = WordCloud(font_path=font,
                      background_color='white',
                      mask=color_mask,
                      max_words=400,
                      prefer_horizontal=0.9,
                      stopwords=stopwordslist('stopwords.txt'),
                      # stopwords=None,
                      max_font_size=200)
    word_cloud = cloud.generate(words)
 
    # word_cloud.to_file("cloud.png")
    return word_cloud



def stopwordslist(filepath):
    stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
    return stopwords


plt.imshow(word_cloud)
plt.title(u'name', fontproperties='SimHei', fontsize='large')
plt.axis("off")
plt.show()
上一篇 下一篇

猜你喜欢

热点阅读