生信技能树 R语言 初级作业题目(上)

2019-06-15  本文已影响0人  Ashu

之前都是没有系统地学习R语言,5月26日去西安听生信技能树 R语言培训课,Jimmy大神布置的作业题,像我这样纯正小白的初学者,完全负基础,学了又忘,忘了又学,继续搬砖,一点点去理解,巩固基础,整理详细一点的版本,花了不少时间,方便像我这样纯正小白理解:

a1 <- c("good morning")#字符串向量
a1
a2 <- c(1,5,8,16,21,25)#数值型向量
a2
a3 <- c("a","b","c","d","e","f")#字符型向量
a3
a4 <- c("a","b","c",1,2,3)#数值和字符混合,但calss默认是字符型
a4
a5 <- c(T,F,T,T,F,F)#逻辑值向量
a5
a6 <- c(1+0i)#复数向量
a6
#run:
> a1 <- c("good morning")#字符串向量
> a1
[1] "good morning"
> a2 <- c(1,5,8,16,21,25)#数值型向量
> a2
[1]  1  5  8 16 21 25
> a3 <- c("a","b","c","d","e","f")#字符型向量
> a3
[1] "a" "b" "c" "d" "e" "f"
> a4 <- c("a","b","c",1,2,3)#数值和字符混合,但calss默认是字符型
> a4
[1] "a" "b" "c" "1" "2" "3"
> a5 <- c(T,F,T,T,F,F)#逻辑值向量
> a5
[1]  TRUE FALSE  TRUE  TRUE FALSE FALSE
> a6 <- c(1+0i)#复数向量
> a6
[1] 1+0i
y <- matrix(1:20,nrow=5,ncol=4)
y
#run
> y <- matrix(1:20,nrow=5,ncol=4)
> y
     [,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20
dim1 <- c("A1","A2")
dim2 <- c("B1","B2","B3")
dim3 <- c("C1","C2","C3","C4")
z <- array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3))
z
#run
> dim1 <- c("A1","A2")
> dim2 <- c("B1","B2","B3")
> dim3 <- c("C1","C2","C3","C4")
> z <- array(1:24,c(2,3,4),dimnames = list(dim1,dim2,dim3))
> z
, , C1

   B1 B2 B3
A1  1  3  5
A2  2  4  6

, , C2

   B1 B2 B3
A1  7  9 11
A2  8 10 12

, , C3

   B1 B2 B3
A1 13 15 17
A2 14 16 18

, , C4

   B1 B2 B3
A1 19 21 23
A2 20 22 24

数组是矩阵的一个自然推广。

patientID <- c(1,2,3,4)
age <- c(25,34,28,52)
diabetes <- c("Type1","Type1","Type2","Type1")
status <- c("Poor","Improved","Excellent","Poor")
patientdata <- data.frame(patientID,age,diabetes,status)
patientdata
#run
> patientID <- c(1,2,3,4)
> age <- c(25,34,28,52)
> diabetes <- c("Type1","Type1","Type2","Type1")
> status <- c("Poor","Improved","Excellent","Poor")
> patientdata <- data.frame(patientID,age,diabetes,status)
> patientdata
  patientID age diabetes    status
1         1  25    Type1      Poor
2         2  34    Type1  Improved
3         3  28    Type2 Excellent
4         4  52    Type1      Poor

数据框不同的列可包含数值型,字符型的数据

g <- "My First List"
h <- c(25,26,18,39)
j <- matrix(1:10,nrow = 5)
k <- c("one","two","three")
mylist <- list(title=g,ages=h,j,k)
mylist
#run
> g <- "My First List"
> h <- c(25,26,18,39)
> j <- matrix(1:10,nrow = 5)
> k <- c("one","two","three")
> mylist <- list(title=g,ages=h,j,k)
> mylist
$title
[1] "My First List"

$ages
[1] 25 26 18 39

[[3]]
     [,1] [,2]
[1,]    1    6
[2,]    2    7
[3,]    3    8
[4,]    4    9
[5,]    5   10

[[4]]
[1] "one"   "two"   "three"

列表可包含几个向量,矩阵,数据框,甚至组合的列表。

patientID <- c(1,2,3,4)
age <- c(25,34,28,52)
diabetes <- c("Type1","Type1","Type2","Type1")
status <- c("Poor","Improved","Excellent","Poor")
gender <- c("male","female","female","male")
incomes <- c("8k","12k","4.5k","7k")
patientdata <- data.frame(patientID,age,diabetes,status,gender,income)
patientdata
patientdata[c(1,3),]
patientdata[,c(4,6)]
patientdata[c(1,3),c(4,6)]
#run
> patientID <- c(1,2,3,4)
> age <- c(25,34,28,52)
> diabetes <- c("Type1","Type1","Type2","Type1")
> status <- c("Poor","Improved","Excellent","Poor")
> gender <- c("male","female","female","male")
> income <- c("8k","12k","4.5k","7k")
> patientdata <- data.frame(patientID,age,diabetes,status,gender,income)
> patientdata
  patientID age diabetes    status gender income
1         1  25    Type1      Poor   male     8k
2         2  34    Type1  Improved female    12k
3         3  28    Type2 Excellent female   4.5k
4         4  52    Type1      Poor   male     7k
> patientdata[c(1,3),]
  patientID age diabetes    status gender income
1         1  25    Type1      Poor   male     8k
3         3  28    Type2 Excellent female   4.5k
> patientdata[,c(4,6)]
     status income
1      Poor     8k
2  Improved    12k
3 Excellent   4.5k
4      Poor     7k
> patientdata[c(1,3),c(4,6)]
     status income
1      Poor     8k
3 Excellent   4.5k
data()
rivers#北美主要河流及长度,
head(rivers)
tail(rivers)
length(rivers)#rivers有多少对象元素
str(rivers)#查看河流的结构
summary(rivers)#获取描述性统计量(最小值/最大值/四分位数/数值型变量/因子向量/逻辑值向量
#run
> head(rivers)
[1] 735 320 325 392 524 450
> tail(rivers)
[1]  500  720  270  430  671 1770
> length(rivers)#rivers有多少对象元素
[1] 141
> str(rivers)#查看河流的结构
 num [1:141] 735 320 325 392 524 ...
> summary(rivers)#获取描述性统计量(最小值/最大值/四分位数/数值型变量/因子向量/逻辑值向量)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  135.0   310.0   425.0   591.2   680.0  3710.0 

"head"和"tail"一般默认读前6行和后6行。

SraRunTable <- read.table("http://www.bio-info-trainee.com/tmp/5years/SraRunTable.txt",fill=TRUE,header = T,sep = "\t")
dim(SraRunTable)
class(colnames(SraRunTable))
#run
> SraRunTable <- read.table("http://www.bio-info-trainee.com/tmp/5years/SraRunTable.txt",fill=TRUE,header = T,sep = "\t")
> dim(SraRunTable)
[1] 768  31
> class(colnames(SraRunTable))
[1] "character"

768行,31列,元素为字符型。

sample <-read.csv("sample.csv")
colnames(sample)
#run
> sample <-read.csv("sample.csv")
> colnames(sample)
 [1] "Accession"           "Title"               "Sample.Type"         "Taxonomy"           
 [5] "Channels"            "Platform"            "Series"              "Supplementary.Types"
 [9] "Supplementary.Links" "SRA.Accession"       "Contact"             "Release.Date" 
SraRunTable <- read.table("http://www.bio-info-trainee.com/tmp/5years/SraRunTable.txt",fill=TRUE,header = T,sep = "\t")
sample <-read.csv("sample.csv")
m=merge(SraRunTable,sample,by.x = 'Sample_Name',by.y = 'Accession')
str(m)
#run
> str(m)
'data.frame':   768 obs. of  42 variables

合并后有768个对象,42个变量

课程分享

生信技能树全球公益巡讲

https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g

B站公益74小时生信工程师教学视频合辑

https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw

招学徒:

https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw

上一篇 下一篇

猜你喜欢

热点阅读