python-wordcloud

2018-08-21  本文已影响0人  点点渔火

https://pypi.org/project/wordcloud/

安装

pip install wordcloud

conda install -c conda-forge wordcloud

命令行

   $ wordcloud_cli --text mytext.txt --imagefile wordcloud.png

PDF文档也能 处理

$ pdftotext mydocument.pdf - | wordcloud_cli --imagefile wordcloud.png

脚本

#!/usr/bin/env python2
# -*- coding: utf8 -*-

import csv
import codecs
import jieba
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import random
from os import path
import sys
reload(sys)
sys.setdefaultencoding('utf8')
d = path.dirname(__file__)


# 定义中文词云函数
def word_cloud_plot(txt):
    print path.join(d, "car.jpeg")
    fp = Image.open(path.join(d, "car.jpeg"))
    print fp.size
    alice_mask = np.array(fp)
    print alice_mask.shape
    wordcloud = WordCloud(background_color="white", max_words=1000,
                          mask=alice_mask,
                          margin=2, width=1800, height=800)  # 长宽度控制清晰程度​

    wordcloud.generate(txt)
    wordcloud.to_file(path.join(d, "user.png"))
    # Open a plot of the generated image.
    plt.figure()
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
    plt.savefig
    # plt.imshow(wordcloud)
    # plt.axis("off")
    # plt.figure()
    # plt.imshow(alice_mask, cmap=plt.cm.gray)
    # plt.axis("off")
    # plt.show()


def plot_title_cloud(txtlist):
    txt = r' '.join(txtlist)
    print txt
    print type(txt)
    word_cloud_plot(txt)


def read_word(file_name):
    fo = open(file_name, 'r+')
    dic = map((lambda line: line.rstrip().decode("utf-8").split("\t")), fo)
    temp = []
    for t in dic:
        print t[0]
        temp.extend([t[0]] * int(t[1]))
    fo.close()
    print len(temp)
    return temp

if __name__ == '__main__':
    with codecs.open('/Users/西游记.txt', "r",'utf-8') as fo:
        t1 = map(lambda line: line.rstrip().lower().split("\t")[0] + '\n'.encode("utf-8"), fo)
    plot_title_cloud(t1)
上一篇 下一篇

猜你喜欢

热点阅读