用Python从零开始写一个简单爬虫

2017-09-20  本文已影响0人  __Gavin__

实现步骤

配置 PyCharm
编写代码
import requests
from bs4 import BeautifulSoup

url = "https://tieba.baidu.com/f?kw=王者荣耀&fr=home&fp=0&ie=utf-8"
wbdata = requests.get(url).text
soup = BeautifulSoup(wbdata,'lxml')
posts_titles = soup.select("div.threadlist_lz > div.threadlist_title > a.j_th_tit")
posts_times = soup.select("div.threadlist_author > span.pull-right")

# 对返回的列表进行遍历
for post in posts_titles:
    # 提取出标题和链接信息
    # title = post.get_text()
    title = post.get("title")
    link = post.get("href")
    data = {
        '标题':title,
        '链接':link
    }
    print(data)

for time in posts_times:
    post_time = time.get_text()
    title = time.get("title")
    data = {
        title: post_time
    }
    print(data)
上一篇下一篇

猜你喜欢

热点阅读