R

脚本 | R | 每个基因在不同处理下表达量的相关系数

2022-03-07  本文已影响0人  shwzhao

通常求表达量的相关系数,是每两个样本间或每两个基因间的,最后得到相关性矩阵。但是我想求一个基因在不同处理下的相关系数,或者特定的一对基因之间相关系数,怎么办?


方法1. tidyverserowwise()来解决

更新:别用了,几万个基因来做,慢得想吐,我要疯了!

> library(tidyverse)
> df <- tibble(geneid=paste("gene", 1:6, sep=""),
             a = runif(6),
             b = runif(6),
             c = runif(6),
             d = runif(6),
             e = runif(6),
             u = runif(6),
             v = runif(6),
             x = runif(6),
             y = runif(6),
             z = runif(6)) 
> df
# A tibble: 6 × 11
  geneid       a      b      c        d     e     u     v      x     y      z
  <chr>    <dbl>  <dbl>  <dbl>    <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
1 gene1  0.552   0.125  0.0159 0.960    0.506 0.708 0.336 0.658  0.380 0.312
2 gene2  0.00347 0.996  0.250  0.785    0.584 0.822 0.241 0.394  0.287 0.411
3 gene3  0.314   0.0522 0.922  0.127    0.678 0.486 0.531 0.554  0.519 0.400
4 gene4  0.498   0.594  0.883  0.000890 0.634 0.631 0.638 0.908  0.985 0.0191
5 gene5  0.368   0.797  0.697  0.117    0.513 0.817 0.371 0.0788 0.819 0.939
6 gene6  0.673   0.399  0.738  0.778    0.632 0.469 0.251 0.339  0.516 0.324
> df_cor <- df %>%
  rowwise() %>%
  mutate(a_cor = cor(c_across(a:e), c_across(u:z))) %>%
  select(geneid, a_cor)
> df_cor 
# A tibble: 6 × 2
# Rowwise:
  geneid  a_cor
  <chr>   <dbl>
1 gene1  -0.239
2 gene2  -0.874
3 gene3  -0.163
4 gene4  -0.310
5 gene5  -0.706
6 gene6   0.760
> cor(c(0.552,0.125,0.0159,0.960,0.506), c(0.708,0.336,0.658,0.380,0.312)); cor(c(0.00347,0.996,0.250,0.785,0.584), c(0.822,0.241,0.394,0.287,0.411)); cor(c(0.314,0.0522,0.922,0.127,0.678), c(0.486,0.531,0.554,0.519,0.400)); cor(c(0.498,0.594,0.883,0.000890,0.634), c(0.631,0.638,0.908,0.985,0.0191)); cor(c(0.368,0.797,0.697,0.117,0.513), c(0.817,0.371,0.0788,0.819,0.939)); cor(c(0.673,0.399,0.738,0.778,0.632), c(0.469,0.251,0.339,0.516,0.324))
[1] -0.2383802
[1] -0.8744735
[1] -0.1592421
[1] -0.3098914
[1] -0.7068544
[1] 0.7592879

方法2. 果然还是得group_by()

非常的快,我很喜欢!

> df1 <- tibble(geneid=paste("gene", 1:6, sep=""),
              a = runif(6),
              b = runif(6),
              c = runif(6),
              d = runif(6),
              e = runif(6)) %>%
  pivot_longer(-geneid,
               names_to="Samples",
               values_to="Values")
> df2 <- tibble(geneid=paste("gene", 1:6, sep=""),
              a = runif(6),
              b = runif(6),
              c = runif(6),
              d = runif(6),
              e = runif(6)) %>%
  pivot_longer(-geneid,
               names_to="Samples",
               values_to="Values")
> df1 %>%
  left_join(df2, by=c("geneid", "Samples"))
# A tibble: 30 × 4
   geneid Samples Values.x Values.y
   <chr>  <chr>      <dbl>    <dbl>
 1 gene1  a          0.670   0.370
 2 gene1  b          0.920   0.686
 3 gene1  c          0.791   0.0458
 4 gene1  d          0.750   0.293
 5 gene1  e          0.397   0.987
 6 gene2  a          0.233   0.840
 7 gene2  b          0.288   0.871
 8 gene2  c          0.640   0.805
 9 gene2  d          0.309   0.303
10 gene2  e          0.709   0.947
# … with 20 more rows
> df1 %>%
  left_join(df2, by=c("geneid", "Samples")) %>%
  group_by(geneid) %>%
  transmute(corV = cor(Values.x, Values.y)) %>%
  distinct()
# A tibble: 6 × 2
# Groups:   geneid [6]
  geneid    corV
  <chr>    <dbl>
1 gene1  -0.539
2 gene2   0.377
3 gene3  -0.0584
4 gene4   0.578
5 gene5  -0.668
6 gene6   0.340
> cor(c(0.670,0.920,0.791,0.750,0.397), c(0.370,0.686,0.0458,0.293,0.987)); cor(c(0.233,0.288,0.640,0.309,0.709), c(0.840,0.871,0.805,0.303,0.947)); cor(c(0.684,0.300,0.0482,0.558,0.873), c(0.741,0.264,0.535,0.624,0.235)); cor(c(0.124,0.990,0.779,0.163,0.691), c(0.630,0.934,0.588,0.362,0.428)); cor(c(0.785,0.501,0.170,0.434,0.149), c(0.301,0.118,0.511,0.454,0.522)); cor(c(0.194,0.109,0.225,0.518,0.306), c(0.637,0.342,0.628,0.570,0.483))
[1] -0.5396631
[1] 0.3766399
[1] -0.05769485
[1] 0.5792299
[1] -0.6678438
[1] 0.3404695
上一篇下一篇

猜你喜欢

热点阅读