数字格式转换、老年婚姻状况图及拼图

2019-09-30 本文已影响0人冬之心

title: "数字格式转换、老年婚姻状况图及拼图"
author: "李亮"
date: "2019/7/21"
output:
html_document: default
pdf_document: default

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

自然数转为比例

语法

rescale(x, to = c(0, 1), from = range(x, na.rm = TRUE, finite = TRUE), ...)

参数

x：continuous vector of values to manipulate.
to： output range (numeric vector of length two)
from：input range (vector of length two). If not given, is calculated from the range of x
... ：other arguments passed on to methods

例子

library(scales)

x <- c(127.31, 5787.47, 3703.58, 78.61)

#转成（0,1)区间
rescale(x, to=c(0,1), from=c(0,sum(x)))

#转成（0，100）区间
rescale(x, to=c(0,100), from=c(0,sum(x)))

小数点转成百分比

语法

percent(x, accuracy = NULL, scale = 100, prefix = "",
suffix = "%", big.mark = " ", decimal.mark = ".", trim = TRUE,
...)

参数

accuracy: Number to round to, NULL for automatic guess.
scale：A scaling factor: x will be multiply by scale before formating (useful if the underlying data is on another scale, e.g. for computing percentages or thousands).
prefix, suffix：Symbols to display before and after value.
big.mark：Character used between every 3 digits to separate thousands.
decimal.mark：The character to be used to indicate the numeric decimal point.
trim：Logical, if FALSE, values are right-justified to a common width

例子


#不保留小数位
percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))), accuracy = 1)

#保留两位小数
percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))), accuracy = .01)

中国老年人婚姻状况变化

library(reshape2)
library(tidyverse)
library(scales)

# 录入数据，并整理成数据框
marriage <- c("未婚","有配偶","丧偶","离婚")
y1990 <- c(127.31, 5787.47, 3703.58, 78.61)
y2000 <- c(212.17, 8616.39, 3885.58,84.26)
y2010 <- c(313.68, 12459.03, 4747.92, 138.08)


marriage2 <- data.frame(marriage, y1990, y2000, y2010)

marriage3 <- melt(marriage2, id.vars="marriage", variable.name="year", value.name="population")

marriage3$marriage <- factor(marriage3$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))

marriage3$year <- str_replace(marriage3$year,"y","")



#根据人数，计算比例
y1990p <- percent(rescale(y1990, to =c(0,1), from=c(0,sum(y1990))),accuracy=.01, suffix = "")
y2000p <- percent(rescale(y2000, to =c(0,1), from=c(0,sum(y2000))),accuracy=.01, suffix = "")
y2010p <- percent(rescale(y2010, to =c(0,1), from=c(0,sum(y2010))),accuracy=.01, suffix = "")

marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p)
marriage_P2 <- melt(marriage_P, id.vars="marriage", variable.name = "year", value.name = "percent")

marriage_P2$marriage <- factor(marriage_P2$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))

#删除year值中的"y"和“p"，只提取年份。
marriage_P2$year <- str_sub(marriage_P2$year, 2,5)

# 不知道为什么，如果直接把marriage_P2整个表合并进来，不会改变原变量的数据类型。但是如果只合并marriage_P2$percent,会改变percent变量的数据类型，变成因子型。
marriage4 <- cbind(marriage3, marriage_P2$percent)

names(marriage4)[4] <- "percent"

#因子型 转数值型，不能直接转，一定要先转成字符型，再转成数值型。

marriage4$percent <- as.numeric(as.character(marriage4$percent))

library(ggplot2)

ggplot(marriage4, aes(x=year, y=percent, group=marriage) ) + 
  geom_col(aes(fill=marriage), position="dodge") + 
  geom_text(aes(label=percent, y= percent+0.5), position = position_dodge(width = 0.9), vjust=0) + 
  labs(x=NULL, y=NULL, fill="婚姻类型", title="比例变化图") +
  theme(legend.position = c(0.8, 0.8))

ggplot(marriage4, aes(x=year, y=population, group=marriage)) + 
  geom_line(aes(colour=marriage), size=2) +
  geom_point(aes(shape=marriage),size=2) + 
  facet_wrap(.~marriage, scales="free") + 
  labs(x=NULL, y="人口数（万人）", title="人口变化图") + 
  theme(legend.position = "none")

Rplot.png

000007.png

拼图

拼图包常用有三个：

gridExtra包的grid.arrange()函数
ggpubr包的ggarange()函数
cowplot包的ggdraw()+draw_plot()函数

参见:


library(cowplot)

p1<- ggplot(marriage4, aes(x=year, y=percent, group=marriage) ) + 
     geom_col(aes(fill=marriage), position="dodge") + 
     geom_text(aes(label=percent, y= percent+0.5), position = position_dodge(width = 0.9), vjust=0) + 
     labs(x=NULL, y="比例(%)", fill="婚姻类型") + 
     theme(legend.position = c(0.92, 0.85), legend.background = element_blank())

p2 <- ggplot(marriage4, aes(x=year, y=population, group=marriage)) + 
  geom_line(aes(colour=marriage), size=2) +
  geom_point(aes(shape=marriage),size=2) + 
  facet_wrap(.~marriage, scales="free") + 
  labs(x=NULL, y="人口数（万人）") + 
  theme(legend.position = "none")

ggdraw() + 
  draw_plot(p1,  x=0, y=0.1, width=0.5, height=0.85) + 
  draw_plot(p2, x=0.5, y=0.1, width=0.5, height=0.85) + 
  draw_plot_label(c("比例图","人口图"),x=c(0,0.5), y=c(1,1)) +
  draw_plot_label("数据来源：中国人口普查 制图：李亮", x=0.63, y=0.1, size=8)

draw_plot() 中，
x, y 表示子图的起点坐标(左下角坐标)，在0-1之间，表示占母图的比例，
width, height 表示子图长宽所占比例，在0-1之间

Rplot01.png

程序改进

利用通道分组计算新值。


marriage <- c("未婚","有配偶","丧偶","离婚")
y1990 <- c(127.31, 5787.47, 3703.58, 78.61)
y2000 <- c(212.17, 8616.39, 3885.58,84.26)
y2010 <- c(313.68, 12459.03, 4747.92, 138.08)


marriage2 <- data.frame(marriage, y1990, y2000, y2010)

marriage3 <- melt(marriage2, id.vars="marriage", variable.name="year", value.name="population")

marriage3$marriage <- factor(marriage3$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))

marriage3$year <- str_replace(marriage3$year,"y","")

# 按year分组，计算各婚姻类别人口占某一year组人口的百分比。
marriage3 <- marriage3 %>%
 group_by(year) %>%
 mutate(percent= percent(rescale(population, to= c(0,1), from=c(0,sum(population))), accuracy=0.01, suffix=""))

marriage3

合并数据框时应注意数据类型

y1990p <- percent(rescale(y1990, to =c(0,1), from=c(0,sum(y1990))),accuracy=.01, suffix = "")
y2000p <- percent(rescale(y2000, to =c(0,1), from=c(0,sum(y2000))),accuracy=.01, suffix = "")
y2010p <- percent(rescale(y2010, to =c(0,1), from=c(0,sum(y2010))),accuracy=.01, suffix = "")

# 注意percent转化出来的是字符型列表
# 注意字符型列表转成数据框时，默认会变成因子，给后面数据处理带来麻烦。因此要加参数stringsAsFactors=FALSE

marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p, stringsAsFactors=FALSE)
marriage_P2 <- melt(marriage_P, id.vars="marriage", variable.name = "year", value.name = "percent")
marriage_P2 <- as_tibble(marriage_P2)
marriage_P2
# 注意melt()函数在数据框转置时measure.vars变成的新变量是因子型，如本例中的year


# 使用tidyr包中的gather()函数, 默认factor_key = FALSE, 即Key值被存为字符型。如果TRUE，则存为因子型。
marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p, stringsAsFactors=FALSE)
marriage_P2 <- gather(marriage_P, key = "year", value = "percent", - marriage)
marriage_P2 <- as_tibble(marriage_P2)
marriage_P2

数字格式转换、老年婚姻状况图及拼图

自然数转为比例

语法

参数

例子

小数点转成百分比

语法

参数

例子

中国老年人婚姻状况变化

拼图

程序改进

利用通道分组计算新值。

合并数据框时应注意数据类型

猜你喜欢

热点阅读