基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionWords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行。
#wxfriends.py 2018-07-09 import itchat import sys import pandas as pd import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei']#绘图时可以显示中文 plt.rcParams['axes.unicode_minus']=False#绘图时可以显示中文 import jieba import jieba.posseg as pseg from scipy.misc import imread from wordcloud import WordCloud from os import path #解决编码问题 non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) #获取好友信息 def getFriends(): friends = itchat.get_friends(update=True)[0:] flists = [] for i in friends: fdict={} fdict['NickName']=i['NickName'].translate(non_bmp_map) if i['Sex'] == 1: fdict['Sex']='男' elif i['Sex'] == 2: fdict['Sex']='女' else: fdict['Sex']='雌雄同体' if i['Province'] == '': fdict['Province'] ='未知' else: fdict['Province']=i['Province'] fdict['City']=i['City'] fdict['Signature']=i['Signature'] flists.append(fdict) return flists #将好友信息保存成CSV def saveCSV(lists): df = pd.DataFrame(lists) try: df.to_csv("wxfriends.csv",index = True,encoding='gb18030') except Exception as ret: print(ret) return df #统计性别、省份字段 def anysys(df): df_sex = pd.DataFrame(df['Sex'].value_counts()) df_province = pd.DataFrame(df['Province'].value_counts()[:15]) df_signature = pd.DataFrame(df['Signature']) return df_sex,df_province,df_signature #绘制柱状图,并保存 def draw_chart(df_list,x_feature): try: x = list(df_list.index) ylist = df_list.values y = [] for i in ylist : for j in i: y.append(j) plt.bar(x,y,label=x_feature) plt.legend() plt.savefig(x_feature) plt.close() except: print("绘图失败") #解析取个性签名构成列表 def getSignList(signature): sig_list = [] for i in signature.values: for j in i: sig_list.append(j.translate(non_bmp_map)) return sig_list #分词处理,并根据需要填写停用词、自定义词、合并词替换 def segmentWords(txtlist): stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8')) newslist = [] #新增自定义词 jieba.load_userdict("newdit.txt") for subject in txtlist: if subject.isspace(): continue word_list = pseg.cut(subject) for word, flag in word_list: if not word in stop_words and flag == 'n' or flag == 'eng' and word !='span' and word !='class': newslist.append(word) #合并指定的相似词 for line in open('unionWords.txt', encoding='utf-8'): newline = line.encode('utf-8').decode('utf-8-sig') #解决\ufeff问题 unionlist = newline.split("*") for j in range(1,len(unionlist)): #wordDict[unionlist[0]] += wordDict.pop(unionlist[j],0) for index,value in enumerate(newslist): if value == unionlist[j]: newslist[index] = unionlist[0] return newslist #高频词统计 def countWords(newslist): wordDict = {} for item in newslist: wordDict[item] = wordDict.get(item,0) + 1 itemList = list(wordDict.items()) itemList.sort(key=lambda x:x[1],reverse=True) for i in range(100): word, count = itemList[i] print("{}:{}".format(word,count)) #绘制词云 def drawPlant(newslist): d = path.dirname(__file__) mask_image = imread(path.join(d, "timg.png")) content = ' '.join(newslist) wordcloud = WordCloud(font_path='simhei.ttf', background_color="white",width=1300,height=620, max_words=200).generate(content) #mask=mask_image, # Display the generated image: plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file('wordcloud.jpg') plt.show() def main(): #登陆微信 itchat.auto_login() # 登陆后不需要扫码 hotReload=True flists = getFriends() fdf = saveCSV(flists) df_sex,df_province,df_signature = anysys(fdf) draw_chart(df_sex,"性别") draw_chart(df_province,"省份") wordList = segmentWords(getSignList(df_signature)) countWords(wordList) drawPlant(wordList) main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
暂无评论...
更新日志
2024年11月25日
2024年11月25日
- 凤飞飞《我们的主题曲》飞跃制作[正版原抓WAV+CUE]
- 刘嘉亮《亮情歌2》[WAV+CUE][1G]
- 红馆40·谭咏麟《歌者恋歌浓情30年演唱会》3CD[低速原抓WAV+CUE][1.8G]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[320K/MP3][193.25MB]
- 【轻音乐】曼托凡尼乐团《精选辑》2CD.1998[FLAC+CUE整轨]
- 邝美云《心中有爱》1989年香港DMIJP版1MTO东芝首版[WAV+CUE]
- 群星《情叹-发烧女声DSD》天籁女声发烧碟[WAV+CUE]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[FLAC/分轨][748.03MB]
- 理想混蛋《Origin Sessions》[320K/MP3][37.47MB]
- 公馆青少年《我其实一点都不酷》[320K/MP3][78.78MB]
- 群星《情叹-发烧男声DSD》最值得珍藏的完美男声[WAV+CUE]
- 群星《国韵飘香·贵妃醉酒HQCD黑胶王》2CD[WAV]
- 卫兰《DAUGHTER》【低速原抓WAV+CUE】
- 公馆青少年《我其实一点都不酷》[FLAC/分轨][398.22MB]
- ZWEI《迟暮的花 (Explicit)》[320K/MP3][57.16MB]