Python爬虫案例分享python

Python采集全国疫情数据,可视化展示各数据数值

2022-06-01  本文已影响0人  Python案例教学

前言

最近很多同学因为毕设和大作业的原因,想要分析疫情的数据,今天就在这里写一篇


开发环境

知识点

  1. 代码基本流程
  2. requests 发送请求
  3. re 正则表达式
  4. json 结构化数据解析
  5. pyecharts 可视化

先是疫情的数据

实现代码

  1. 发送请求
  2. 获取数据
  3. 解析数据
  4. 保存数据

1. 发送请求

headers = {
    # 浏览器基本信息
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
print(response)

返回<Response [200]>: 已经请求成功了

2. 获取数据

html_data = response.text

3. 解析数据

: 转义字符(把一些含有特定字符的内容转变为普通的字符)
[(.*)]
[]: [ ]
(): 我只需要 (里面的内容)
.: 匹配任意字符一次
*: 匹配零次或者多次

json_str = re.findall('"component":\[(.*)\],', html_data)[0]
# python 字典数据容器
# 键值对取值
json_dict = json.loads(json_str)
caseList = json_dict['caseList']
for case in caseList:
    area = case['area']     # 省份
    curConfirm = case['curConfirm']     # 确诊人数
    curConfirmRelative = case['curConfirmRelative']     # 确诊人数
    confirmed = case['confirmed']     # 确诊人数
    crued = case['crued']     # 治愈人数
    died = case['died']     # 死亡人数
    print(area, curConfirm, curConfirmRelative, confirmed, crued, died)

4. 保存数据(表格)

with open('data.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow([area, curConfirm, curConfirmRelative, confirmed, crued, died])

可视化代码

导入数据

df = pd.read_csv('data.csv', encoding='utf-8')
df.head()

各地区确诊人数

china_map = (
    Map()
    .add("现有确诊", [list(i) for i in zip(df['area'].values.tolist(),df['curConfirm'].values.tolist())], "china")
    .set_global_opts(
        title_opts=opts.TitleOpts(title="各地区确诊人数"),
        visualmap_opts=opts.VisualMapOpts(max_=200, is_piecewise=True),
    )
)
china_map.render_notebook()

新型冠状病毒全国疫情地图

import pyecharts
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.datasets import register_url

cofirm, currentCofirm, cured, dead = [], [], [], []

tab = Tab()

_map = (
    Map(init_opts=opts.InitOpts(theme='dark', width='1000px'))
    .add("累计确诊人数", [list(i) for i in zip(df['area'].values.tolist(),df['confirmed'].values.tolist())], 
         "china", is_map_symbol_show=False,  is_roam=False)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=True))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="新型冠状病毒全国疫情地图",
                                  ),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=True, max_=1000,
                                          is_piecewise=False,
                                          range_color=['#FFFFE0', '#FFA07A', '#CD5C5C', '#8B0000'])
    )
)
tab.add(_map, '累计确诊')

_map = (
    Map(init_opts=opts.InitOpts(theme='dark', width='1000px'))
    .add("当前确诊人数", [list(i) for i in zip(df['area'].values.tolist(),df['curConfirm'].values.tolist())], "china", is_map_symbol_show=False,  is_roam=False)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=True))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="新型冠状病毒全国疫情地图",
                                  ),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=True, max_=100,
                                          is_piecewise=False,
                                          range_color=['#FFFFE0', '#FFA07A', '#CD5C5C', '#8B0000'])
    )
)
tab.add(_map, '当前确诊')

_map = (
    Map(init_opts=opts.InitOpts(theme='dark', width='1000px'))
    .add("治愈人数", [list(i) for i in zip(df['area'].values.tolist(),df['crued'].values.tolist())], "china", is_map_symbol_show=False,  is_roam=False)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=True))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="新型冠状病毒全国疫情地图",
                                  ),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=True, max_=1000,
                                          is_piecewise=False,
                                          range_color=['#FFFFE0', 'green'])
    )
)
tab.add(_map, '治愈')

_map = (
    Map(init_opts=opts.InitOpts(theme='dark', width='1000px'))
    .add("死亡人数", [list(i) for i in zip(df['area'].values.tolist(),df['died'].values.tolist())], "china", is_map_symbol_show=False,  is_roam=False)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=True))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="新型冠状病毒全国疫情地图",
                                  ),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=True, max_=50,
                                          is_piecewise=False,
                                          range_color=['#FFFFE0', '#FFA07A', '#CD5C5C', '#8B0000'])
    )
)
tab.add(_map, '死亡')

tab.render_notebook()

各地区确诊人数与死亡人数情况

bar = (
    Bar()
    .add_xaxis(list(df['area'].values)[:6])
    .add_yaxis("死亡", df['died'].values.tolist()[:6])
    .add_yaxis("治愈", df['crued'].values.tolist()[:6])
    .set_global_opts(
        title_opts=opts.TitleOpts(title="各地区确诊人数与死亡人数情况"),
        datazoom_opts=[opts.DataZoomOpts()],
        )
)
bar.render_notebook()
上一篇 下一篇

猜你喜欢

热点阅读