Python获取微博热搜

2020-10-08  本文已影响0人  ZYiDa

内容部分参考资料网络,根据自己的需求修改和增加了一部分内容。
爬取的微博热搜内容主要包括下面四个字段:

  • 热搜榜排序
  • 热搜关键字
  • 热搜次数
  • 热搜内容对应的地址

解析和获取热搜内容主要依赖lxml库的etree模块以及requests
全部内容如下:

from lxml import etree
import os,time,requests,json,schedule

# 设置写入文件夹路径,为当前根目录下 './WeiboHotLocalSave'
path = os.path.abspath('./WeiboHotLocalSave/')
if not os.path.exists(path):
  os.makedirs(path)
  print("当前路径为:{}".format(path))

# 目标文件路径
target_path = path + os.sep + 'WeiboHotSearchKey' + '.txt'

# 热搜获取模块
def getWeiboHotSearch():
  # 新浪微博热搜地址
  BASE_URL = "https://s.weibo.com/top/summary?cate=realtimehot"
  # 获取和解析热搜页面
  request = requests.get(BASE_URL)
  html = etree.HTML(request.text)
  # 查找和过滤热搜内容
  nodes = html.xpath("//div[@class='data']/table/tbody/tr")

  all_hot_objects = []

  for node in nodes[1:]:
    # 热搜榜排序
    hot_paiming = node.xpath('./td[1]/text()')[0]
    # 热搜关键字
    hot_keyword = node.xpath('./td[2]/a/text()')[0]
    # 热搜次数
    hot_search_nums = node.xpath('./td[2]/span/text()')[0]
    # 热搜内容对应的地址
    hot_url = "https://s.weibo.com" + node.xpath('./td[2]/a/@href')[0] 
    
    hot_object = {
      "hot_paiming":hot_paiming,
      "hot_keyword":hot_keyword,
      "hot_search_nums":hot_search_nums,
      "hot_url":hot_url
    }
    print(hot_object)

    all_hot_objects.append(hot_object)

  # python对象转json
  all_datas = json.dumps(all_hot_objects)
  # 写入热搜数据到本地文件
  with open(target_path,"w") as file:
    json.dump(all_datas,file)
    file.close()
    print("写入成功,时间:{},路径:{}".format(time.strftime("%Y%m%d"),target_path))


# 循环执行模块 定时获取热搜内容
schedule.every(10).seconds.do(getWeiboHotSearch)
while True:
  schedule.run_pending()
  time.sleep(1)

控制台输出

aoteman@atm-iMac ~ % /usr/local/bin/python3 /Users/aoteman/Desktop/WeiboHot/WeiboHot.py
{'hot_paiming': '1', 'hot_keyword': '孙怡在澡堂被人认出', 'hot_search_nums': '5747043', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%AD%99%E6%80%A1%E5%9C%A8%E6%BE%A1%E5%A0%82%E8%A2%AB%E4%BA%BA%E8%AE%A4%E5%87%BA%23&Refer=top'}
{'hot_paiming': '2', 'hot_keyword': '特朗普承诺美民众免费治疗新冠', 'hot_search_nums': '2345961', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%89%B9%E6%9C%97%E6%99%AE%E6%89%BF%E8%AF%BA%E7%BE%8E%E6%B0%91%E4%BC%97%E5%85%8D%E8%B4%B9%E6%B2%BB%E7%96%97%E6%96%B0%E5%86%A0%23&Refer=top'}
{'hot_paiming': '3', 'hot_keyword': '石原里美结婚原因', 'hot_search_nums': '2323270', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%9F%B3%E5%8E%9F%E9%87%8C%E7%BE%8E%E7%BB%93%E5%A9%9A%E5%8E%9F%E5%9B%A0%23&Refer=top'}
{'hot_paiming': '4', 'hot_keyword': '央视镜头下的吴亦凡', 'hot_search_nums': '2093942', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%A4%AE%E8%A7%86%E9%95%9C%E5%A4%B4%E4%B8%8B%E7%9A%84%E5%90%B4%E4%BA%A6%E5%87%A1%23&Refer=top'}
{'hot_paiming': '5', 'hot_keyword': '日本将取消对中国旅行禁令', 'hot_search_nums': '1799724', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E6%97%A5%E6%9C%AC%E5%B0%86%E5%8F%96%E6%B6%88%E5%AF%B9%E4%B8%AD%E5%9B%BD%E6%97%85%E8%A1%8C%E7%A6%81%E4%BB%A4%23&Refer=top'}
{'hot_paiming': '6', 'hot_keyword': '原来七里香是鸡屁股', 'hot_search_nums': '1545329', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%8E%9F%E6%9D%A5%E4%B8%83%E9%87%8C%E9%A6%99%E6%98%AF%E9%B8%A1%E5%B1%81%E8%82%A1%23&Refer=top'}
{'hot_paiming': '7', 'hot_keyword': '杜卡迪致歉声明', 'hot_search_nums': '1440257', 'hot_url': 'https://s.weibo.com/weibo?q=%E6%9D%9C%E5%8D%A1%E8%BF%AA%E8%87%B4%E6%AD%89%E5%A3%B0%E6%98%8E&Refer=top'}
{'hot_paiming': '8', 'hot_keyword': '2020年假期全部结束', 'hot_search_nums': '782024', 'hot_url': 'https://s.weibo.com/weibo?q=%232020%E5%B9%B4%E5%81%87%E6%9C%9F%E5%85%A8%E9%83%A8%E7%BB%93%E6%9D%9F%23&Refer=top'}
{'hot_paiming': '9', 'hot_keyword': '特朗普宣布驻阿富汗美军圣诞前撤回', 'hot_search_nums': '644295', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%89%B9%E6%9C%97%E6%99%AE%E5%AE%A3%E5%B8%83%E9%A9%BB%E9%98%BF%E5%AF%8C%E6%B1%97%E7%BE%8E%E5%86%9B%E5%9C%A3%E8%AF%9E%E5%89%8D%E6%92%A4%E5%9B%9E%23&Refer=top'}
{'hot_paiming': '10', 'hot_keyword': '13岁猪坚强别墅中散步', 'hot_search_nums': '465735', 'hot_url': 'https://s.weibo.com/weibo?q=%2313%E5%B2%81%E7%8C%AA%E5%9D%9A%E5%BC%BA%E5%88%AB%E5%A2%85%E4%B8%AD%E6%95%A3%E6%AD%A5%23&Refer=top'}
{'hot_paiming': '11', 'hot_keyword': '王菲在草莓音乐节把手机蹦丢了', 'hot_search_nums': '451515', 'hot_url': 'https://s.weibo.com/weibo?q=%E7%8E%8B%E8%8F%B2%E5%9C%A8%E8%8D%89%E8%8E%93%E9%9F%B3%E4%B9%90%E8%8A%82%E6%8A%8A%E6%89%8B%E6%9C%BA%E8%B9%A6%E4%B8%A2%E4%BA%86&Refer=top'}
{'hot_paiming': '12', 'hot_keyword': '假期最后一天', 'hot_search_nums': '402664', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%81%87%E6%9C%9F%E6%9C%80%E5%90%8E%E4%B8%80%E5%A4%A9%23&Refer=top'}
{'hot_paiming': '13', 'hot_keyword': '国庆前7天超6亿人次出游', 'hot_search_nums': '355363', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%9B%BD%E5%BA%86%E5%89%8D7%E5%A4%A9%E8%B6%856%E4%BA%BF%E4%BA%BA%E6%AC%A1%E5%87%BA%E6%B8%B8%23&Refer=top'}
{'hot_paiming': '14', 'hot_keyword': '跪杀弗洛伊德前警官保释出狱', 'hot_search_nums': '355266', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%B7%AA%E6%9D%80%E5%BC%97%E6%B4%9B%E4%BC%8A%E5%BE%B7%E5%89%8D%E8%AD%A6%E5%AE%98%E4%BF%9D%E9%87%8A%E5%87%BA%E7%8B%B1%23&Refer=top'}
{'hot_paiming': '15', 'hot_keyword': '寒露', 'hot_search_nums': '351534', 'hot_url': 'https://s.weibo.com/weibo?q=%E5%AF%92%E9%9C%B2&Refer=top'}
{'hot_paiming': '16', 'hot_keyword': '王一博摔车官方仲裁', 'hot_search_nums': '351305', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%8E%8B%E4%B8%80%E5%8D%9A%E6%91%94%E8%BD%A6%E5%AE%98%E6%96%B9%E4%BB%B2%E8%A3%81%23&Refer=top'}
{'hot_paiming': '17', 'hot_keyword': '山东小伙行李被妈妈塞秋裤超重10斤', 'hot_search_nums': '351185', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%B1%B1%E4%B8%9C%E5%B0%8F%E4%BC%99%E8%A1%8C%E6%9D%8E%E8%A2%AB%E5%A6%88%E5%A6%88%E5%A1%9E%E7%A7%8B%E8%A3%A4%E8%B6%85%E9%87%8D10%E6%96%A4%23&Refer=top'}
{'hot_paiming': '18', 'hot_keyword': '湖人改在总决赛G5穿曼巴球衣', 'hot_search_nums': '351011', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E6%B9%96%E4%BA%BA%E6%94%B9%E5%9C%A8%E6%80%BB%E5%86%B3%E8%B5%9BG5%E7%A9%BF%E6%9B%BC%E5%B7%B4%E7%90%83%E8%A1%A3%23&Refer=top'}
{'hot_paiming': '19', 'hot_keyword': '男子10000米世界纪录告破', 'hot_search_nums': '350757', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%94%B7%E5%AD%9010000%E7%B1%B3%E4%B8%96%E7%95%8C%E7%BA%AA%E5%BD%95%E5%91%8A%E7%A0%B4%23&Refer=top'}
{'hot_paiming': '20', 'hot_keyword': '女生给朋友拍照时有多努力', 'hot_search_nums': '350466', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%A5%B3%E7%94%9F%E7%BB%99%E6%9C%8B%E5%8F%8B%E6%8B%8D%E7%85%A7%E6%97%B6%E6%9C%89%E5%A4%9A%E5%8A%AA%E5%8A%9B%23&Refer=top'}
{'hot_paiming': '21', 'hot_keyword': '秦时明月', 'hot_search_nums': '350146', 'hot_url': 'https://s.weibo.com/weibo?q=%E7%A7%A6%E6%97%B6%E6%98%8E%E6%9C%88&Refer=top'}
{'hot_paiming': '22', 'hot_keyword': '这就是喝奶茶时的你', 'hot_search_nums': '349922', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%BF%99%E5%B0%B1%E6%98%AF%E5%96%9D%E5%A5%B6%E8%8C%B6%E6%97%B6%E7%9A%84%E4%BD%A0%23&Refer=top'}
{'hot_paiming': '23', 'hot_keyword': '李佳琦双十一预告', 'hot_search_nums': '349646', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E6%9D%8E%E4%BD%B3%E7%90%A6%E5%8F%8C%E5%8D%81%E4%B8%80%E9%A2%84%E5%91%8A%23&Refer=top'}
{'hot_paiming': '24', 'hot_keyword': '瘫痪作家一根手指敲出300万字', 'hot_search_nums': '349287', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%98%AB%E7%97%AA%E4%BD%9C%E5%AE%B6%E4%B8%80%E6%A0%B9%E6%89%8B%E6%8C%87%E6%95%B2%E5%87%BA300%E4%B8%87%E5%AD%97%23&Refer=top'}
{'hot_paiming': '25', 'hot_keyword': '四川一农家乐屋顶垮塌致26伤', 'hot_search_nums': '349246', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%9B%9B%E5%B7%9D%E4%B8%80%E5%86%9C%E5%AE%B6%E4%B9%90%E5%B1%8B%E9%A1%B6%E5%9E%AE%E5%A1%8C%E8%87%B426%E4%BC%A4%23&Refer=top'}
{'hot_paiming': '26', 'hot_keyword': '这就是贵公子喝酒', 'hot_search_nums': '347460', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%BF%99%E5%B0%B1%E6%98%AF%E8%B4%B5%E5%85%AC%E5%AD%90%E5%96%9D%E9%85%92%23&Refer=top'}
{'hot_paiming': '27', 'hot_keyword': '张艺兴生日长文', 'hot_search_nums': '320294', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%BC%A0%E8%89%BA%E5%85%B4%E7%94%9F%E6%97%A5%E9%95%BF%E6%96%87%23&Refer=top'}
{'hot_paiming': '28', 'hot_keyword': '百岁火车站', 'hot_search_nums': '287670', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%99%BE%E5%B2%81%E7%81%AB%E8%BD%A6%E7%AB%99%23&Refer=top'}
{'hot_paiming': '29', 'hot_keyword': '天问一号将脱离地球轨道', 'hot_search_nums': '275657', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%A4%A9%E9%97%AE%E4%B8%80%E5%8F%B7%E5%B0%86%E8%84%B1%E7%A6%BB%E5%9C%B0%E7%90%83%E8%BD%A8%E9%81%93%23&Refer=top'}
{'hot_paiming': '30', 'hot_keyword': '密室逃脱玩成了健身', 'hot_search_nums': '273910', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%AF%86%E5%AE%A4%E9%80%83%E8%84%B1%E7%8E%A9%E6%88%90%E4%BA%86%E5%81%A5%E8%BA%AB%23&Refer=top'}
{'hot_paiming': '31', 'hot_keyword': '薇娅双十一爆款清单', 'hot_search_nums': '232979', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%96%87%E5%A8%85%E5%8F%8C%E5%8D%81%E4%B8%80%E7%88%86%E6%AC%BE%E6%B8%85%E5%8D%95%23&Refer=top'}
{'hot_paiming': '32', 'hot_keyword': '全球最大霸王龙化石拍卖', 'hot_search_nums': '222626', 'hot_url': 'https://s.weibo.com/weibo?q=%E5%85%A8%E7%90%83%E6%9C%80%E5%A4%A7%E9%9C%B8%E7%8E%8B%E9%BE%99%E5%8C%96%E7%9F%B3%E6%8B%8D%E5%8D%96&Refer=top'}
{'hot_paiming': '33', 'hot_keyword': '许佳琪婚纱造型', 'hot_search_nums': '218726', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%AE%B8%E4%BD%B3%E7%90%AA%E5%A9%9A%E7%BA%B1%E9%80%A0%E5%9E%8B%23&Refer=top'}
{'hot_paiming': '34', 'hot_keyword': '我国提前完成1亿人口落户目标', 'hot_search_nums': '204588', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E6%88%91%E5%9B%BD%E6%8F%90%E5%89%8D%E5%AE%8C%E6%88%901%E4%BA%BF%E4%BA%BA%E5%8F%A3%E8%90%BD%E6%88%B7%E7%9B%AE%E6%A0%87%23&Refer=top'}
{'hot_paiming': '35', 'hot_keyword': '舍不得国庆的我', 'hot_search_nums': '199031', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E8%88%8D%E4%B8%8D%E5%BE%97%E5%9B%BD%E5%BA%86%E7%9A%84%E6%88%91%23&Refer=top'}
{'hot_paiming': '36', 'hot_keyword': '当代年轻人的四大坎', 'hot_search_nums': '189382', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%BD%93%E4%BB%A3%E5%B9%B4%E8%BD%BB%E4%BA%BA%E7%9A%84%E5%9B%9B%E5%A4%A7%E5%9D%8E%23&Refer=top'}
{'hot_paiming': '37', 'hot_keyword': '双节出游时拍下的风景', 'hot_search_nums': '149645', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%8F%8C%E8%8A%82%E5%87%BA%E6%B8%B8%E6%97%B6%E6%8B%8D%E4%B8%8B%E7%9A%84%E9%A3%8E%E6%99%AF%23&Refer=top'}
{'hot_paiming': '38', 'hot_keyword': '破绽百出的魔术表演', 'hot_search_nums': '133409', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%A0%B4%E7%BB%BD%E7%99%BE%E5%87%BA%E7%9A%84%E9%AD%94%E6%9C%AF%E8%A1%A8%E6%BC%94%23&Refer=top'}
{'hot_paiming': '39', 'hot_keyword': '风犬少年的天空', 'hot_search_nums': '115935', 'hot_url': 'https://s.weibo.com/weibo?q=%E9%A3%8E%E7%8A%AC%E5%B0%91%E5%B9%B4%E7%9A%84%E5%A4%A9%E7%A9%BA&Refer=top'}
{'hot_paiming': '40', 'hot_keyword': '在寝室睡的太晚的后果', 'hot_search_nums': '109643', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%9C%A8%E5%AF%9D%E5%AE%A4%E7%9D%A1%E7%9A%84%E5%A4%AA%E6%99%9A%E7%9A%84%E5%90%8E%E6%9E%9C%23&Refer=top'}
{'hot_paiming': '41', 'hot_keyword': '天龙座流星雨', 'hot_search_nums': '102929', 'hot_url': 'https://s.weibo.com/weibo?q=%E5%A4%A9%E9%BE%99%E5%BA%A7%E6%B5%81%E6%98%9F%E9%9B%A8&Refer=top'}
{'hot_paiming': '42', 'hot_keyword': '广东汕尾300米长奇葩减速带', 'hot_search_nums': '93509', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%B9%BF%E4%B8%9C%E6%B1%95%E5%B0%BE300%E7%B1%B3%E9%95%BF%E5%A5%87%E8%91%A9%E5%87%8F%E9%80%9F%E5%B8%A6%23&Refer=top'}
{'hot_paiming': '43', 'hot_keyword': '找对象一定要找广东的', 'hot_search_nums': '91529', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E6%89%BE%E5%AF%B9%E8%B1%A1%E4%B8%80%E5%AE%9A%E8%A6%81%E6%89%BE%E5%B9%BF%E4%B8%9C%E7%9A%84%23&Refer=top'}
{'hot_paiming': '44', 'hot_keyword': '被塞满的后备箱专治乡愁', 'hot_search_nums': '90057', 'hot_url': 'https://s.weibo.com/weibo?q=%E8%A2%AB%E5%A1%9E%E6%BB%A1%E7%9A%84%E5%90%8E%E5%A4%87%E7%AE%B1%E4%B8%93%E6%B2%BB%E4%B9%A1%E6%84%81&Refer=top'}
{'hot_paiming': '45', 'hot_keyword': '王子茹裁掉张芝芝', 'hot_search_nums': '89950', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%8E%8B%E5%AD%90%E8%8C%B9%E8%A3%81%E6%8E%89%E5%BC%A0%E8%8A%9D%E8%8A%9D%23&Refer=top'}
{'hot_paiming': '46', 'hot_keyword': '美国新冠肺炎超753万例', 'hot_search_nums': '84802', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E7%BE%8E%E5%9B%BD%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E8%B6%85753%E4%B8%87%E4%BE%8B%23&Refer=top'}
{'hot_paiming': '47', 'hot_keyword': '彭昱畅露腹肌', 'hot_search_nums': '83184', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%BD%AD%E6%98%B1%E7%95%85%E9%9C%B2%E8%85%B9%E8%82%8C%23&Refer=top'}
{'hot_paiming': '48', 'hot_keyword': '尹正 你们在欢呼什么', 'hot_search_nums': '78618', 'hot_url': 'https://s.weibo.com/weibo?q=%E5%B0%B9%E6%AD%A3%20%E4%BD%A0%E4%BB%AC%E5%9C%A8%E6%AC%A2%E5%91%BC%E4%BB%80%E4%B9%88&Refer=top'}
{'hot_paiming': '49', 'hot_keyword': '俄罗斯一座军火库爆炸', 'hot_search_nums': '78381', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E4%BF%84%E7%BD%97%E6%96%AF%E4%B8%80%E5%BA%A7%E5%86%9B%E7%81%AB%E5%BA%93%E7%88%86%E7%82%B8%23&Refer=top'}
{'hot_paiming': '50', 'hot_keyword': '孙俪再演房姐', 'hot_search_nums': '74181', 'hot_url': 'https://s.weibo.com/weibo?q=%23%E5%AD%99%E4%BF%AA%E5%86%8D%E6%BC%94%E6%88%BF%E5%A7%90%23&Refer=top'}
写入成功,时间:20201008,路径:/Users/aoteman/WeiboHotLocalSave/WeiboHotSearchKey.txt
上一篇下一篇

猜你喜欢

热点阅读