大数据 爬虫Python AI Sql生活不易 我用pythonpython爬虫

Python使用itchat获取微信好友

2018-07-01  本文已影响4人  AwesomeTang

最近发现了一个好玩的包itchat,通过调用微信网页版的接口实现收发消息,获取好友信息等一些功能,各位可以移步itchat项目介绍查看详细信息。

目标:

获取好友信息

# -*- coding: utf-8 -*-
import itchat
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import jieba
import re
from scipy.misc import imread  
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
itchat.login()
#登陆网页版微信,需要手机扫码确认
warnings.filterwarnings("ignore")
friends = itchat.get_friends(update=True)
for counter,content in enumerate(friends[1:]):
    if counter == 0:
        df=pd.DataFrame(content)
        df.columns=content.keys()
    else:
        df.loc[counter]=content.values()
print '获取到%d位好友信息'%counter
df.columns
Index([u'UserName', u'City', u'DisplayName', u'UniFriend', u'MemberList',
       u'PYQuanPin', u'RemarkPYInitial', u'Sex', u'AppAccountFlag',
       u'VerifyFlag', u'Province', u'KeyWord', u'RemarkName', u'PYInitial',
       u'IsOwner', u'ChatRoomId', u'HideInputBarFlag', u'EncryChatRoomId',
       u'AttrStatus', u'SnsFlag', u'MemberCount', u'OwnerUin', u'Alias',
       u'Signature', u'ContactFlag', u'NickName', u'RemarkPYQuanPin',
       u'HeadImgUrl', u'Uin', u'StarFriend', u'Statues'],
      dtype='object')

好友城市分布

df['City'][df['Province']==u'北京']= u'北京'
df['City'][df['Province']==u'上海']= u'上海'
df['City'][df['Province']==u'重庆']= u'重庆'
#微信对于直辖市将city字段填写为区
plt.figure(figsize = (15,12))
data_temp = df[df['City']!='']
#剔除城市未填写的记录
data_temp = data_temp.groupby(['City'])['UserName'].count().reset_index()
data_temp = data_temp.nlargest(15,'UserName')
data_temp.columns = ['City','Count']
sns.barplot(data=data_temp ,y='City',x='Count')
for y,x in enumerate(data_temp['Count']):
    plt.text(x,y,x,fontsize = 20)
plt.title(u'好友城市分布',fontsize =25)
plt.show()

性别分布

df['Sex'] = df['Sex'].replace({0:u'性别不明',1:u'男',2:u'女'})
plt.figure(figsize = (15,8))
plt.subplot(1,2,1)
data_temp = df.groupby(['Sex'])['UserName'].count().reset_index()
data_temp = data_temp.sort_values('UserName')
data_temp.columns = ['Sex','Count']
sns.barplot(data=data_temp ,x='Sex',y='Count')
for x,y in enumerate(data_temp['Count']):
    plt.text(x-0.05,y,y,fontsize = 20)
plt.subplot(1,2,2)
sex_list = [u'男',u'女',u'性别不明']
countlist = [292,227,25]
explode = (0.05,0,0)
plt.pie(countlist,labels = sex_list,explode =explode,startangle = 90,autopct = '%3.1f%%')
plt.axis('equal')
plt.show()

根据好友签名绘制词云

back_color = imread('tencent.jpg')  # 解析该图片
wc = WordCloud(background_color='white',  # 背景颜色
               max_words=1000,  # 最大词数
               mask=back_color,  # 以该参数值作图绘制词云,这个参数不为空时,width和height会被忽略
               max_font_size=100,  # 显示字体的最大值
               font_path="/Users/tangwenpan/Downloads/simhei.ttf",  # 解决显示口字型乱码问题
               random_state=42,  # 为每个词返回一个PIL颜色
               )

text=''
xx= u"[\u4e00-\u9fa5]" #保留汉字
for x in df['Signature']:
    pattern = re.compile(xx)  
    text_temp =  pattern.findall(x) 
    for xxx in text_temp:
        text = text +xxx

def word_cloud(texts):
    words_list = []
    word_generator = jieba.cut(texts, cut_all=False)  # 返回的是一个迭代器
    for word in word_generator:
        if len(word) > 1:  #去掉单字
            words_list.append(word)
    return ' '.join(words_list)  


text = word_cloud(text)

wc.generate(text)
# 基于彩色图像生成相应彩色
image_colors = ImageColorGenerator(back_color)
plt.figure(figsize = (15,15))
plt.axis('off')
# 绘制词云
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
# 保存图片
wc.to_file('comment.png')
print 'comment.png has bee saved!'

peace~

上一篇下一篇

猜你喜欢

热点阅读