生信分析流程

R画疫情图,源自Y叔公众号

2020-01-31  本文已影响0人  欧阳松

特别感谢Y叔(微信公众号[biobabble])的教程,教程来源:https://mp.weixin.qq.com/s/62ax8LCffS9yJhGPGVBpzg


R画疫情图,数据来源https://news.qq.com/zt2020/page/feiyan.htm

此教程需要Y叔在github上的chinamap包,地址为:http://github.com/guangchuangyu/chinamap,但考虑到部分地区无法访问github.com,故我下载好上传腾讯微云:https://share.weiyun.com/5FgrHT0
下载文件为chinamap-master.zip,首先解压成chinamap-master文件夹,记住安装路径,如D盘

install.packages("D:/chinamap-master",repos=NULL,type="source")

正式开始表演

require(jsonlite)
require(chinamap) # github.com/guangchuangyu/chinamap
require(ggplot2)

url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=1580373566110'
x = readLines(url, encoding="UTF-8")
x = sub("^\\d+", "", x)
x = sub("^\\(", "", x)
x = sub("\\)$", "", x)
y = fromJSON(x)
d = fromJSON(y$data)

d$chinaTotal #总数据
$date
#[1] "01.31"
$confirm
[1] "9708"
$suspect
[1] "15238"
$dead
[1] "213"
$heal
[1] "171"
d$chinaDayList #每天的数据
#    date confirm suspect dead heal
1  01.13      41       0    1    0
2  01.14      41       0    1    0
3  01.15      41       0    2    5
4  01.16      45       0    2    8
5  01.17      62       0    2   12
6  01.18     198       0    3   17
7  01.19     275       0    4   18
8  01.20     291      54    6   25
9  01.21     440      37    9   25
10 01.22     571     393   17   25
11 01.23     830    1072   25   34
12 01.24    1287    1965   41   38
13 01.25    1975    2684   56   49
14 01.26    2744    5794   80   51
15 01.27    4515    6973  106   60
16 01.28    5974    9239  132  103
17 01.29    7711   12167  170  124
18 01.30    9692   15238  213  171

我们可以拿到各个省市的数字,比如说湖北省的:

d$areaTree[1,2][[1]][1,2][[1]][,1:2]
 #        name total.confirm total.suspect total.dead total.heal
1        武汉          2639             0        159         72
2        黄冈           573             0         12          3
3        孝感           541             0          9          2
4        襄阳           286             0          0          0
5        随州           228             0          0          0
6        荆门           227             0          5          0
7        荆州           221             0          3          1
8        鄂州           189             0          6          0
9        黄石           168            49          1          0
10       宜昌           167             0          1          0
11       咸宁           166             0          0          0
12       十堰           150             0          0          0
13       仙桃            90             0          1          0
14     恩施州            75             0          0          0
15       天门            67             0          6          0
16       潜江            12             0          1          0
17     神农架             7             0          0          2
18 地区待确认             0             0          0         30

各个省的数据提取,数据切割

df = data.frame(name = d$areaTree$children[[1]]$name,
                confirm = cut(d$areaTree$children[[1]]$total$confirm, 
                c(1,10,100,500,1000,10000),
               include.lowest = T, right=F))

整理地图数据

cn = get_map_china()
cn$province = sub("省", "", cn$province)
cn$province = sub("自治区", "", cn$province)
cn$province = sub("市", "", cn$province)
cn$province = sub("特别行政区", "", cn$province)
cn$province = sub("维吾尔", "", cn$province)
cn$province = sub("壮族", "", cn$province)
cn$province = sub("回族", "", cn$province)

数据合并

cn2 = merge(cn, df, by.x='province', by.y='name', all.x=TRUE)
cn2 = cn2[order(cn2$order),]

画图

p=ggplot() +geom_map(aes(long, lat, map_id=id, group=group, fill=confirm), 
map=cn2, data=cn2, colour='grey') 
+coord_map() +scale_fill_viridis_d() +theme_minimal() 
+xlab(NULL) + ylab(NULL) +
labs(title = '2019nCov',subtitle = paste('confirmed cases:', d$chinaTotal$confirm),
caption=paste("accessed date:", d$chinaTotal$date))
# Warning: Ignoring unknown aesthetics: x, y
p #看一下图

Rplot.png

定义颜色,如红色

cols = RColorBrewer::brewer.pal(5, 'Reds')
names(cols) = levels(df$confirm)

再次画图

p+ scale_fill_manual(values=cols, breaks=names(cols))
#Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
Rplot01.png

改变背景

ggplot() +geom_map(aes(long, lat, map_id=id, group=group, fill=confirm), map=cn2, data=cn2, colour='grey') +coord_map() +scale_fill_viridis_d() +theme_bw() +xlab(NULL) + ylab(NULL) +labs(title = '2019nCov',subtitle = paste('confirmed cases:', d$chinaTotal$confirm),caption=paste("accessed date:", d$chinaTotal$date))+ scale_fill_manual(values=cols, breaks=names(cols))
Rplot02.png

遗憾的是不能写中文,可以导出为pdf,然后自己diy


把确诊、怀疑、死亡、治愈数量加上

ggplot() +geom_map(aes(long, lat, map_id=id, group=group, fill=confirm), map=cn2, data=cn2, colour='grey') +coord_map() +scale_fill_viridis_d() +theme_bw() +xlab(NULL) + ylab(NULL) +labs(title = '2019nCov',subtitle = paste('Confirmed:',d$chinaTotal$confirm,"Suspected:",d$chinaTotal$suspect,"Dead:",d$chinaTotal$dead,"Heal:",d$chinaTotal$heal),caption=paste("update to:", d$lastUpdateTime))+ scale_fill_manual(values=cols, breaks=names(cols))

2019nCoV

中文显示

#修改df
df = data.frame(name = d$areaTree$children[[1]]$name,
                确诊人数 = cut(d$areaTree$children[[1]]$total$confirm, 
                c(1,10,100,500,1000,10000),
               include.lowest = T, right=F))
#整合修改cn2
cn2 = merge(cn, df, by.x='province', by.y='name', all.x=TRUE)
cn2 = cn2[order(cn2$order),]
#修改颜色
cols = RColorBrewer::brewer.pal(5, 'Reds')
names(cols) = levels(df$确诊人数)
#重新画图
ggplot() +geom_map(aes(long, lat, map_id=id, group=group, fill=确诊人数), 
map=cn2, data=cn2, colour='grey') +coord_map() +scale_fill_viridis_d() 
+theme_bw() +xlab(NULL) + ylab(NULL) + 
   labs(title = '2019nCov',subtitle = paste('确诊病例:',d$chinaTotal$confirm,  ";疑似病例:",d$chinaTotal$suspect,  ";死亡人数:",d$chinaTotal$dead,";治愈人数:",d$chinaTotal$heal), 
caption=paste("更新至:", d$lastUpdateTime))
+ scale_fill_manual(values=cols, breaks=names(cols))
# mac不显示中文的 添加一行代码
+ theme(text = element_text(family='Kai'))
2019新型冠状病毒性肺炎
上一篇下一篇

猜你喜欢

热点阅读