用Python从零开始写一个简单爬虫
2017-09-20 本文已影响0人
__Gavin__
实现步骤
配置 PyCharm
- 选择
Python
版本 - 安装
requests
urllib3
lxml
beautifulsoup4
config.png
编写代码
import requests
from bs4 import BeautifulSoup
url = "https://tieba.baidu.com/f?kw=王者荣耀&fr=home&fp=0&ie=utf-8"
wbdata = requests.get(url).text
soup = BeautifulSoup(wbdata,'lxml')
posts_titles = soup.select("div.threadlist_lz > div.threadlist_title > a.j_th_tit")
posts_times = soup.select("div.threadlist_author > span.pull-right")
# 对返回的列表进行遍历
for post in posts_titles:
# 提取出标题和链接信息
# title = post.get_text()
title = post.get("title")
link = post.get("href")
data = {
'标题':title,
'链接':link
}
print(data)
for time in posts_times:
post_time = time.get_text()
title = time.get("title")
data = {
title: post_time
}
print(data)