B站搜索结果爬虫

2019-08-07  本文已影响0人  年画儿
import requests
import json
import pandas as pd
# b站api:http://api.bilibili.com/x/web-interface/search/type?search_type=video&highlight=1&keyword=insta360&from_source=banner_search&page=3&jsonp=jsonp&callback=__jp1 &nbsp


# 好像data都不重要
data = {"search_type":"video",
      "highlight":"1",
      "keyword":"keyword",
      "from_source":"banner_search",
      "page":10,
      "jsonp":"jsonp",
      "callback":"__jp1"
}
header = {
    "Referer":"https://search.bilibili.com/all?keyword=insta360&from_source=banner_search",
    "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}

urls = []
for i in range(51)[1:]:
    url = "http://api.bilibili.com/x/web-interface/search/type?search_type=video&highlight=1&keyword=insta360&page=%i"%i
    urls.append(url)
# print(urls)

list = []

for url in urls:
    r = requests.get(url=url,data=data)
    # print(r.text)
    content=json.loads(r.text)["data"]["result"] #解析返回来的json
    list.extend(content)
df = pd.DataFrame(list)
df.to_csv("insta360_result.csv") 
上一篇下一篇

猜你喜欢

热点阅读