R-Modified Mann-Kendall 趋势分析（改进后

2019-12-05 本文已影响0人 TroyShen

0. 问题导入

很多情况下，我们需要研究时间序列，如降水，气温，GDP，人口等要素历史或未来的演变规律。特别是面向全球的研究，我们往往需要将空间的时空矩阵通过计算趋势值，进而压缩到一张图上来展示我们所感兴趣要素的时空变异规律（图1）。

图1 数据结构及计算流程

本文介绍的改进后的Mann-Kendall趋势分析方法已被广泛应用到分析时间序列演变规律过程中的应用（文末附改进后MK趋势分析方法函数）。

1. 示例数据

本次示例数据百度云链接如下，可点击下载。
全球SSP情景GDP数据说明
 全球SSP情景GDP数据

2. 数据导入与预处理

在分析计算空间上点的MK趋势之前，我们首先得确定其在真个时间区间内有变化。若无变化，则需要将其去掉，否则计算过程会报错。

那么，如何事先确定一个时间序列是否有变化呢？

解决思路：

计算整个序列的平均值
计算整个序列的最大值
做差，若差为0，则该时间序列无在研究的时间范围内无变化。

特别注意，由于该csv 文件过大（60MB左右），请参考Rstudio-是不是发现你的Rstudio读取大的CSV越来越慢了这篇文章，采用fread函数导入数据。

gdp = fread('L:\\JianShu\\2019-12-05\\data_trend\\gdp_world_ssp1.csv')
gdp = as.matrix(gdp)
gdp = apply(gdp,2,as.numeric)
# 提出经纬度信息
long = gdp[,1]
lat = gdp[,2]
gdp = gdp[,-c(1,2)]
# 预判时间序列是否有变化
mean_gdp = apply(gdp,1,mean)
max_gdp = apply(gdp,1,max)  
diff_gdp = max_gdp - mean_gdp
#筛选出无变化时间序列的行号，并将其从gdp 矩阵中删除
index_non = which(diff_gdp ==0)
gdp = gdp[-index_non,]
long = long[-index_non]
lat  = lat[-index_non]

3. 计算每个空间点改进后的MK趋势值（该过程十分耗时）

mmk_cal <- function(x){
  temp = mmkTrend(x)$Zc
  return(temp)
}
system.time(mmk_gdp <- apply(gdp,1,mmk_cal))

4. MK趋势值空间可视化

pl_df = data.frame(long = long,lat = lat,mmk = mmk_gdp)

pl_df_m = melt(pl_df,c('long','lat'))
pl_df_m$cuts = cut(pl_df_m$value,breaks = c(0,2,4,6,8,Inf))

fontsize = 12

legend_set<-theme(legend.background = element_rect(fill='white'),
                  legend.key = element_blank(),
                  legend.key.size = unit(3,'lines'),
                  legend.key.height=unit(0.1,"inches"),#图例分类符号高度
                  legend.key.width=unit(0.5,"inches"),#图例符号的宽度
                  legend.text=element_text(colour="black",size=fontsize,face = 'bold'),#图例分类标签设置
                  legend.text.align=0,#0左，1右，0.5居中， 图例分类标签的对齐方式
                  #legend.title=element_text(colour="black",size=15,face='bold'),#图例标题设置
                  legend.title = element_blank(),
                  legend.title.align=1,#图例标题对齐方式
                  legend.position=c('bottom'),#"none","left","right","bottom","top",or 
                  # two-element numeric vector,(0,0)-(1,1)
                  legend.direction="horizontal",#"vertical" 图例排列方向
                  legend.justification=c('center'),#"center" or two-element numeric vector
                  legend.box="vertical",#"horizontal",对图例的排列方式
                  legend.box.just="top"#多图例的居中方式
                  ,plot.background = element_rect(color='transparent'))

mycolor = colorRampPalette(brewer.pal(11,'Spectral'))(5)
color_group = paste0('a',1:12)

main_plot = ggplot()+
  geom_hline(aes(yintercept = 50),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_hline(aes(yintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_hline(aes(yintercept = -50),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_vline(aes(xintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_vline(aes(xintercept = -100),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_vline(aes(xintercept = 100),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  geom_tile(data = pl_df_m,aes(x = long,y = lat, fill = cuts))+
  theme(panel.background = element_rect(fill = 'transparent',color = 'black'),
        axis.text = element_text(face='bold',colour='black',size=fontsize,hjust=.5),
        axis.title = element_text(face='bold',colour='black',size=fontsize,hjust=.5),
        legend.position=c('bottom'),
        legend.direction = c('horizontal'))+
  scale_fill_manual(values = mycolor)+
  coord_fixed(1.3)+
  geom_hline(aes(yintercept = 0),linetype = 'dashed',alpha = 0.5,lwd = 0.5,color = 'black')+
  guides(fill=guide_legend(nrow=1))+
  xlab('Longitude')+
  ylab('Latitude')

main_plot

图2 全球GDP-MMK趋势分析结果图

5. 所用软件包

library('ggplot2')
library('data.table')
library('RColorBrewer')
library('reshape2')

6. 改进后Mann-Kendall趋势分析函数

mmkTrend <-
  function(x, ci = .95) {
    x = x
    z = NULL
    z0 = NULL
    pval = NULL
    pval0 = NULL
    S = 0
    Tau = NULL
    essf = NULL
    ci = ci
    if (is.vector(x) == FALSE) {
      stop("Input data must be a vector")
    }
    if (any(is.finite(x) == FALSE)) {
      x[-c(which(is.finite(x) == FALSE))] -> x
      warning("The input vector contains non-finite numbers. An attempt was made to remove them")
    }
    n <- length(x)
    for (i in 1:(n-1)) {
      for (j in (i+1):n) {
        S = S + sign(x[j]-x[i])
      }
    }
    acf(rank(lm(x ~ I(1:n))$resid), lag.max=(n-1), plot=FALSE)$acf[-1] -> ro
    qnorm((1+ci)/2)/sqrt(n) -> sig
    rep(NA,length(ro)) -> rof
    for (i in 1:(length(ro))) {
      if(ro[i] > sig || ro[i] < -sig) {
        rof[i] <- ro[i]
      } else {
        rof[i] = 0
      }
    }
    2 / (n*(n-1)*(n-2)) -> cte
    ess=0
    for (i in 1:(n-1)) {          
      ess = ess + (n-i)*(n-i-1)*(n-i-2)*rof[i]
    }
    essf = 1 + ess*cte
    var.S = n*(n-1)*(2*n+5)*(1/18) 
    if(length(unique(x)) < n) {
      unique(x) -> aux
      for (i in 1:length(aux)) {
        length(which(x == aux[i])) -> tie
        if (tie > 1) {
          var.S = var.S - tie*(tie-1)*(2*tie+5)*(1/18)  
        }
      }
    }
    VS = var.S * essf            
    if (S == 0) {
      z = 0
      z0 = 0
    }
    if (S > 0) {
      z = (S-1)/sqrt(VS) 
      z0 = (S-1)/sqrt(var.S)
    } else {
      z = (S+1)/sqrt(VS) 
      z0 = (S+1)/sqrt(var.S)
    }      
    pval = 2*pnorm(-abs(z))
    pval0 = 2*pnorm(-abs(z0)) 
    Tau = S/(.5*n*(n-1))
    rep(NA, times=(n^2-n)/2) -> V
    k = 0
    for (i in 2:n) {
      for (j in 1:i) {
        k = k+1
        V[k] = (x[i]-x[j])/(i-j)
        
      }
    }
    median(na.omit(V)) -> slp
    return(list("Z" = z0, "p.value" = pval0, "Zc" = z, "Corrected p.value" = pval, "tau" = Tau, "N/N*s" = essf, "Sen's Slope" = slp))
}