中国天气网爬虫

2019-07-29  本文已影响0人  徐弱西
from bs4 import BeautifulSoup
import requests
from pyecharts import Bar
ALL_DATA = []

def parse_page(url):
    headers = {
        'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36 '
    }
    response = requests.get(url, headers=headers)
    text = response.content.decode('utf-8')
    # soup = BeautifulSoup(text, 'lxml')
    soup = BeautifulSoup(text, 'html5lib')  # html5lib解析不规则的HTML代码,速度慢
    conMidtab = soup.find('div', class_='conMidtab')  # 得到div
    tables = conMidtab.find_all('table')
    for table in tables:
        trs = table.find_all('tr')[2:]  # 过滤前两个
        for index, tr in enumerate(trs):
            tds = tr.find_all('td')  # 提取td
            city_td = tds[0]
            if index == 0:
                city_td = tds[1]
            city = list(city_td.stripped_strings)[0]  # 获得城市
            temp_td = tds[-2]
            min_temp = list(temp_td.stripped_strings)[0]  # 最低温度
            ALL_DATA.append({'city': city, 'min_temp': int(min_temp)})
            # print({'city': city, 'min_temp': int(min_temp)})
        # print(table)
    # print(response.content.decode('utf-8'))


def main():
    urls = [
        'http://www.weather.com.cn/textFC/hb.shtml',
        'http://www.weather.com.cn/textFC/db.shtml',
        'http://www.weather.com.cn/textFC/hd.shtml',
        'http://www.weather.com.cn/textFC/hn.shtml',
        'http://www.weather.com.cn/textFC/xb.shtml',
        'http://www.weather.com.cn/textFC/gat.shtml'
    ]
    for url in urls:
        parse_page(url)
    #  分析数据,根据最低气温排序
    ALL_DATA.sort(key=lambda data: data['min_temp'])  # 排序。key是以那个属性排序
    data = ALL_DATA[:10]  # 取前10个
    # print(ALL_DATA)
    # def sorr_key(data):
    #     min_temp = data['min_temp']
    #     return min_temp
    #     pass
    chart = Bar('最低气温柱状图')
    # cities = []
    # temps = []
    cities = list(map(lambda x: x['city'], data))
    temps = list(map(lambda x: x['min_temp'], data))
    chart.add('', cities, temps)
    chart.render('template.html')


if __name__ == '__main__':
    main()

上一篇下一篇

猜你喜欢

热点阅读