【生信技能树】2019-12-22作业:自定义函数实现melt和

2019-12-23  本文已影响0人  猫叽先森
rm(list=ls())
options(stringsAsFactors = F)
##载入测试数据
load('example.Rdata')
a
#          GSM3781295 GSM3781296 GSM3781297 GSM3781298
#NM_214711   1.972502   1.939664   2.047662   2.208423
#NM_214710   2.688776   3.037042   5.280936   3.070719
#NM_214462   6.311992   1.704874   7.960758   1.901060
#NM_214461   6.102595   3.608155   6.625782   3.861742
library(reshape2)
aa <- a
aa$probe_id <- rownames(aa)
b <- melt(data = aa,
          id.vars = c("probe_id"),
          variable.name = "samples",
          value.name = "exprs")
b$samples <- as.character(b$samples)
b
#    probe_id    samples    exprs
#1  NM_214711 GSM3781295 1.972502
#2  NM_214710 GSM3781295 2.688776
#3  NM_214462 GSM3781295 6.311992
#4  NM_214461 GSM3781295 6.102595
#5  NM_214711 GSM3781296 1.939664
#6  NM_214710 GSM3781296 3.037042
#7  NM_214462 GSM3781296 1.704874
#8  NM_214461 GSM3781296 3.608155
#9  NM_214711 GSM3781297 2.047662
#10 NM_214710 GSM3781297 5.280936
#11 NM_214462 GSM3781297 7.960758
#12 NM_214461 GSM3781297 6.625782
#13 NM_214711 GSM3781298 2.208423
#14 NM_214710 GSM3781298 3.070719
#15 NM_214462 GSM3781298 1.901060
#16 NM_214461 GSM3781298 3.861742
save(b,file = 'b.Rdata')

melt_by_cat <- function(df) {
  df_new <- data.frame()
  for (i in 1:ncol(df)) {
    tmp <- as.data.frame(df[,i])
    colnames(tmp) <- c("exprs")
    tmp$probe_id <- rownames(df)
    tmp$samples <- colnames(df)[i]
    df_new <- rbind(df_new,tmp)
  }
  index <- c('probe_id','samples','exprs')
  df_new <- df_new[index]
  return(df_new)
}
re1 <- melt_by_cat(a)
##判断re1和b是否一致
identical(b,re1)
#[1] TRUE

d <- dcast(data = b, probe_id ~samples)
d
#   probe_id GSM3781295 GSM3781296 GSM3781297 GSM3781298
#1 NM_214461   6.102595   3.608155   6.625782   3.861742
#2 NM_214462   6.311992   1.704874   7.960758   1.901060
#3 NM_214710   2.688776   3.037042   5.280936   3.070719
#4 NM_214711   1.972502   1.939664   2.047662   2.208423

dcast_by_cat <- function(df0) {
  sp_index <- unique(df0$samples)
  create_df <- function(x,index,i) {
    df <- x[x$samples == index[i],]
    df <- df[,-2]
    colnames(df)[2] <- index[i]
    return(df)
  }
  dcast_df <- create_df(df0,sp_index,1)
  for (i in 2:length(sp_index)) {
    tmp <- create_df(df0,sp_index,i)
    dcast_df <- merge(dcast_df,tmp)
  }
  return(dcast_df)
}
re2 <- dcast_by_cat(b)
##判断re2和d结果是否一致
identical(d,re2)
#[1] TRUE

收获:

  1. 学习了melt()dcast()最基本的用法
    1)melt()得到的数据框,总行数=原数据框行数*列数,
    每行3个数据,分别对应原数据框每个单元格的行名,列名,数值
    b <- melt(data = aa,id.vars = c("probe_id"),variable.name = "samples",value.name = "exprs")

    2)dcast()得到的数据框,
    d <- dcast(data = b,probe_id ~samples)
    以~号前的列做行名,~号后为列名,重排数据

存在问题:

  1. 还是未能掌握apply函数的使用方法,尝试使用apply()改写melt(),得到的结果是一个List
melt_by_cat_2 <- function(df) {
  df_new <- data.frame()
  id <- 0
  df_new <- apply(df,2,function(x){
    tmp <- as.data.frame(x)
    colnames(tmp) <- c("exprs")
    tmp$probe_id <- rownames(df)
    id <- id + 1
    tmp$samples <- colnames(df)[id]
    tmp <- as.data.frame(tmp)
    df_new <- as.data.frame(rbind(df_new,tmp))
  })
  return(df_new)
}
re <- melt_by_cat_2(a)
上一篇下一篇

猜你喜欢

热点阅读