上海Python

京东在售华为P30评价分析

2019-06-04  本文已影响0人  徐_c90e
import requests
from lxml import etree
import json
import time
from wordcloud import WordCloud
from matplotlib import pyplot as plt


url = 'https://item.jd.com/100004404916.html'
headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'
    }

res = requests.get(url, headers=headers)
res.encoding = 'gbk'
html = etree.HTML(res.text.encode('gbk'))
#print(res.text)

productname=html.xpath('//*[@id="detail"]/div[2]/div[1]/div[1]/ul[3]/li[1]/@title')[0]
print('***************',productname,'**************')
time.sleep(2)
all_content=''
headers['Referer'] = url
comment_url_0 = 'https://sclub.jd.com/comment/productPageComments.action?&productId=100004404916&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&rid=0&fold=1'
j = 0
for i in range(0, 20):
    comment_url = comment_url_0.format(i)
    comment_res = requests.get(comment_url, headers=headers)
    print(comment_res.status_code, comment_res.text)
    json_data = json.loads(comment_res.text)
    comments = json_data['comments']
    for comment in comments:
        content = comment['content'].replace('\n', '')
        nickname = comment['nickname']
        j = j+1
        print(str(j)+'、',nickname+':',content)
        all_content = all_content +'\n' +content
    time.sleep(10)
print(all_content)
font = r'C:\Windows\Fonts\simhei.ttf'
wc = WordCloud(
    font_path=font, #如果是中文必须要添加这个,否则会显示成框框
    background_color='white',
    width=959,
    height=959,
    #mask=img_array,
    #stopwords=stopword #设置停止词,也就是你不想显示的词
).generate(all_content)
#wc.to_file('ss.png') #保存图片
plt.imshow(wc)  #用plt显示图片
plt.axis('off') #不显示坐标轴
plt.show() #显示图片
Figure_1.png
上一篇下一篇

猜你喜欢

热点阅读