R,笔记01
2018-12-15 本文已影响0人
按着易得
设置随机数的种子值
set.seed(811)
f <- sample(seq(1,100), 10)
sort(f) # sort排序返回值(看到的是按照大小排序后的数字)
order(f) # 排序返回位置(看到的是原来这个数字在向量中的位置)
f[order(f)]
which(f==28) # 告诉我数字28的位置
f[which(f==28)] # 告诉我(数字28的位置=2)位置上的数字
grep用法
d <- c("TP53","ERBB2","BRCA1")
grep("ERBB2",d) # grep向量d中的字符"ERBB2"
grepl("ERBB2",d) # 匹配d中每一项是否等于ERBB2,返回TRUE OR FALSE
d[grepl("ERBB2",d)] # 向量d中在(筛选d中ERBB2所在的位置2)位置上的是什么
d %in% "ERBB2" # 在d中精确匹配ERBB2,返回TURE OR FALSE
d[d %in% "ERBB2"] # 返回d中(d中精确匹配ERBB2)TURE结果的字符。
载入数据
rm(list = ls()) ## 移除当前环境内所有数据
load(file = "01_TCGA_BRCA_exprSet.Rdata")
load(file = "绝对路径/01_TCGA_BRCA_exprSet.Rdata")
###################
read.csv()
read.table()
## 点击Rstudio file框内的数据,选择load数据也可以载入
创建数据框
targetgene <- c("VEGFR","EGFR","HER2")
cancerID <- c("HCC","NSCLC","BC")
targetdrug <- c("sorafenib","gefitinib","Herceptin")
cancer <- data.frame(cancerID,targetgene,targetdrug)
cancer
class(cancer) ## class()查看数据类型,非常常用
################
## str():给出完整的对于数据的描述(结构)
## 内容类型:num,chr,int等
## 数据框架:?行?列等等
subset用法
head(test)
subgroup sample OR4F5 SAMD11 NOC2L KLHL17 PLEKHN1 PERM1 HES4 ISG15
TCGA-AC-A3OD-01B-06R-A22O-07 LumA Tumor 6.169828 7.234873 10.74941 8.489675 8.043983 7.645314 6.654734 9.276203
TCGA-AR-A251-01A-12R-A169-07 Basal Tumor 6.169828 7.790335 12.30692 9.080771 8.805406 7.891789 8.424512 10.088208
TCGA-BH-A0B5-11A-23R-A12P-07 Normal Normal 6.169828 7.004001 11.95045 8.042766 6.505176 6.505176 7.589666 9.197538
TCGA-E2-A1IK-01A-11R-A144-07 LumA Tumor 6.169828 7.176634 12.27498 8.748516 8.213874 7.764995 8.244910 13.769407
TCGA-E2-A15C-01A-31R-A12D-07 LumA Tumor 6.169828 9.627965 12.16211 8.791762 9.088827 8.465383 8.061344 10.174720
TCGA-B6-A2IU-01A-32R-A18M-07 LumA Tumor 6.169828 7.332757 12.15743 9.090721 8.61255
# subset() 第一个数据,第二个选行(逻辑值),第三个是列名称
# 返回满足条件的向量、矩阵或数据框。
# 用法:subset(所操作的数据,给限定行的条件,给出列范围)
# 设定为,操作数据是test,限定行需为Normal,列范围是1-5。
subset(test,test$subgroup == "Normal",select =c(1:5))
subgroup sample OR4F5 SAMD11 NOC2L
TCGA-BH-A0B5-11A-23R-A12P-07 Normal Normal 6.169828 7.004001 11.95045
TCGA-BH-A209-11A-42R-A157-07 Normal Normal 6.169828 7.365634 11.15970
strsplit 和 paste用法
> load(file = "01_TCGA_BRCA_exprSet.Rdata")
> rownames(exprSet)[1]
[1] "TCGA-AC-A3OD-01B-06R-A22O-07"
> strsplit(rownames(exprSet)[1],split = "")
[[1]]
[1] "T" "C" "G" "A" "-" "A" "C" "-" "A" "3" "O" "D" "-" "0" "1" "B" "-" "0" "6" "R" "-" "A" "2" "2" "O" "-" "0" "7"
> class(strsplit(rownames(exprSet)[1],split = ""))
[1] "list"
> unlist(strsplit(rownames(exprSet)[1],split = ""))
[1] "T" "C" "G" "A" "-" "A" "C" "-" "A" "3" "O" "D" "-" "0" "1" "B" "-" "0" "6" "R" "-" "A" "2" "2" "O" "-" "0" "7"
> class(unlist(strsplit(rownames(exprSet)[1],split = "")))
[1] "character"
> # 我想把这个基因的第14,15个字符取出来
> unlist(strsplit(rownames(exprSet)[1],split = ""))[14:15]
[1] "0" "1"
> ## 拆分代码理解rownames(exprSet)[1]为获取exprSet的第一行行名
> ## 加上strsplit,就是分割行名
> ## unlist是把分割后的行名,第[14:15]号元素打破list成chr返回出来
>
> # 这两个字符能不合在一起?
> paste(unlist(strsplit(rownames(exprSet)[1],split = ""))[14:15],
+ collapse = "")
[1] "01"
> ##请把前四个字符粘在一起
> paste(unlist(strsplit(rownames(exprSet)[1],split = ""))[1:4],
+ collapse = "")
[1] "TCGA"
> # 而paste0直接把东西粘在一起
> paste0("A","B")
[1] "AB"
> ## 实际上,有一个专门的函数就是用来去字符串的
> substring(rownames(exprSet)[1],14,15)
[1] "01"