R-Modified Mann-Kendall 趋势分析(改进后
2019-12-05 本文已影响0人
TroyShen
0. 问题导入
很多情况下,我们需要研究时间序列,如降水,气温,GDP,人口等要素历史或未来的演变规律。特别是面向全球的研究,我们往往需要将空间的时空矩阵通过计算趋势值,进而压缩到一张图上来展示我们所感兴趣要素的时空变异规律(图1)。
图1 数据结构及计算流程本文介绍的改进后的Mann-Kendall趋势分析方法已被广泛应用到分析时间序列演变规律过程中的应用(文末附改进后MK趋势分析方法函数)。
1. 示例数据
本次示例数据百度云链接如下,可点击下载。
全球SSP情景GDP数据说明
全球SSP情景GDP数据
2. 数据导入与预处理
在分析计算空间上点的MK趋势之前,我们首先得确定其在真个时间区间内有变化。若无变化,则需要将其去掉,否则计算过程会报错。
那么,如何事先确定一个时间序列是否有变化呢?
解决思路:
- 计算整个序列的平均值
- 计算整个序列的最大值
- 做差,若差为0,则该时间序列无在研究的时间范围内无变化。
特别注意,由于该csv 文件过大(60MB左右),请参考Rstudio-是不是发现你的Rstudio读取大的CSV越来越慢了这篇文章,采用fread函数导入数据。
gdp = fread('L:\\JianShu\\2019-12-05\\data_trend\\gdp_world_ssp1.csv')
gdp = as.matrix(gdp)
gdp = apply(gdp,2,as.numeric)
# 提出经纬度信息
long = gdp[,1]
lat = gdp[,2]
gdp = gdp[,-c(1,2)]
# 预判时间序列是否有变化
mean_gdp = apply(gdp,1,mean)
max_gdp = apply(gdp,1,max)
diff_gdp = max_gdp - mean_gdp
#筛选出无变化时间序列的行号,并将其从gdp 矩阵中删除
index_non = which(diff_gdp ==0)
gdp = gdp[-index_non,]
long = long[-index_non]
lat = lat[-index_non]
3. 计算每个空间点改进后的MK趋势值(该过程十分耗时)
mmk_cal <- function(x){
temp = mmkTrend(x)$Zc
return(temp)
}
system.time(mmk_gdp <- apply(gdp,1,mmk_cal))
4. MK趋势值空间可视化
pl_df = data.frame(long = long,lat = lat,mmk = mmk_gdp)
pl_df_m = melt(pl_df,c('long','lat'))
pl_df_m$cuts = cut(pl_df_m$value,breaks = c(0,2,4,6,8,Inf))
fontsize = 12
legend_set<-theme(legend.background = element_rect(fill='white'),
legend.key = element_blank(),
legend.key.size = unit(3,'lines'),
legend.key.height=unit(0.1,"inches"),#图例分类符号高度
legend.key.width=unit(0.5,"inches"),#图例符号的宽度
legend.text=element_text(colour="black",size=fontsize,face = 'bold'),#图例分类标签设置
legend.text.align=0,#0左,1右,0.5居中, 图例分类标签的对齐方式
#legend.title=element_text(colour="black",size=15,face='bold'),#图例标题设置
legend.title = element_blank(),
legend.title.align=1,#图例标题对齐方式
legend.position=c('bottom'),#"none","left","right","bottom","top",or
# two-element numeric vector,(0,0)-(1,1)
legend.direction="horizontal",#"vertical" 图例排列方向
legend.justification=c('center'),#"center" or two-element numeric vector
legend.box="vertical",#"horizontal",对图例的排列方式
legend.box.just="top"#多图例的居中方式
,plot.background = element_rect(color='transparent'))
mycolor = colorRampPalette(brewer.pal(11,'Spectral'))(5)
color_group = paste0('a',1:12)
main_plot = ggplot()+
geom_hline(aes(yintercept = 50),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_hline(aes(yintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_hline(aes(yintercept = -50),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_vline(aes(xintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_vline(aes(xintercept = -100),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_vline(aes(xintercept = 100),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
geom_tile(data = pl_df_m,aes(x = long,y = lat, fill = cuts))+
theme(panel.background = element_rect(fill = 'transparent',color = 'black'),
axis.text = element_text(face='bold',colour='black',size=fontsize,hjust=.5),
axis.title = element_text(face='bold',colour='black',size=fontsize,hjust=.5),
legend.position=c('bottom'),
legend.direction = c('horizontal'))+
scale_fill_manual(values = mycolor)+
coord_fixed(1.3)+
geom_hline(aes(yintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
guides(fill=guide_legend(nrow=1))+
xlab('Longitude')+
ylab('Latitude')
main_plot
图2 全球GDP-MMK趋势分析结果图
5. 所用软件包
library('ggplot2')
library('data.table')
library('RColorBrewer')
library('reshape2')
6. 改进后Mann-Kendall趋势分析函数
mmkTrend <-
function(x, ci = .95) {
x = x
z = NULL
z0 = NULL
pval = NULL
pval0 = NULL
S = 0
Tau = NULL
essf = NULL
ci = ci
if (is.vector(x) == FALSE) {
stop("Input data must be a vector")
}
if (any(is.finite(x) == FALSE)) {
x[-c(which(is.finite(x) == FALSE))] -> x
warning("The input vector contains non-finite numbers. An attempt was made to remove them")
}
n <- length(x)
for (i in 1:(n-1)) {
for (j in (i+1):n) {
S = S + sign(x[j]-x[i])
}
}
acf(rank(lm(x ~ I(1:n))$resid), lag.max=(n-1), plot=FALSE)$acf[-1] -> ro
qnorm((1+ci)/2)/sqrt(n) -> sig
rep(NA,length(ro)) -> rof
for (i in 1:(length(ro))) {
if(ro[i] > sig || ro[i] < -sig) {
rof[i] <- ro[i]
} else {
rof[i] = 0
}
}
2 / (n*(n-1)*(n-2)) -> cte
ess=0
for (i in 1:(n-1)) {
ess = ess + (n-i)*(n-i-1)*(n-i-2)*rof[i]
}
essf = 1 + ess*cte
var.S = n*(n-1)*(2*n+5)*(1/18)
if(length(unique(x)) < n) {
unique(x) -> aux
for (i in 1:length(aux)) {
length(which(x == aux[i])) -> tie
if (tie > 1) {
var.S = var.S - tie*(tie-1)*(2*tie+5)*(1/18)
}
}
}
VS = var.S * essf
if (S == 0) {
z = 0
z0 = 0
}
if (S > 0) {
z = (S-1)/sqrt(VS)
z0 = (S-1)/sqrt(var.S)
} else {
z = (S+1)/sqrt(VS)
z0 = (S+1)/sqrt(var.S)
}
pval = 2*pnorm(-abs(z))
pval0 = 2*pnorm(-abs(z0))
Tau = S/(.5*n*(n-1))
rep(NA, times=(n^2-n)/2) -> V
k = 0
for (i in 2:n) {
for (j in 1:i) {
k = k+1
V[k] = (x[i]-x[j])/(i-j)
}
}
median(na.omit(V)) -> slp
return(list("Z" = z0, "p.value" = pval0, "Zc" = z, "Corrected p.value" = pval, "tau" = Tau, "N/N*s" = essf, "Sen's Slope" = slp))
}