快乐学习笔记
2018-10-03 本文已影响0人
liyin_d64b
Linux学习笔记
2018-10-3
ls #list
-l #长格式
-h #human readable
-rw------ #(r 读,w 写,x 执行)
-a #显示以.开头的隐藏文件
. #表示当前目录
.. #表示上一个目录
cd ~gcdong #到gcdong的家目录去看看发现他安装了R,miniconda等
type #显示命令类型
date #时间管理
man #manual 查看命令 /keyword N 前一个 n下一个 q 退出
- 马哥练习题:
- 查看echo是内部还是外部命令
type echo
-
作用?显示注释作用,用于一些批命令中需要注释给用户看的地方,比如前一条命令执行会花很长时间,常规来会用echo显示一条信息让用户知道这个时候比较慢,稍微等待一会。之类的信息
-
如何换行
echo -e “hello \nworld”
- 横向tab
echo -t
- 纵向tab,
echo -v
- Printf
Printf “hello\n”
Printf “hello"
- 练习题
- 在任意文件夹下面创建形如
1/2/3/4/5/6/7/8/9
格式的文件夹系列。
mkdir -p 1/2/3/4/5/6/7/8/9`
-p means no error if existing, make parent directories as needed
- 在创建好的文件夹下面,比如我的是
/Users/jimmy/tmp/1/2/3/4/5/6/7/8/9
,里面创建文本文件 me.txt
pwd
touch me.txt
- 在文本文件
me.txt
里面输入内容:
Vim me.txt
Go to: http://www.biotrainee.com/
I love bioinfomatics.
And you ?
:w
:q
More me.txt
- 删除上面创建的文件夹 1/2/3/4/5/6/7/8/9 及文本文件 me.txt
Pwd
Cd –
Rm -r 1
五、在任意文件夹下面创建 folder1~5这5个文件夹,然后每个文件夹下面继续创建 folder1~5这5个文件夹:
Pwd
Mkdir -p folder_{1..5}/folder_{1..5}
Ls */
R for data science
> library(tidyverse)
> library(nycflights13)
> by_day <- group_by(flights, year, month, day)
> summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
# A tibble: 365 x 4
# Groups: year, month [?]
year month day delay
<int> <int> <int> <dbl>
1 2013 1 1 11.5
2 2013 1 2 13.9
3 2013 1 3 11.0
4 2013 1 4 8.95
5 2013 1 5 5.73
6 2013 1 6 7.15
7 2013 1 7 5.42
8 2013 1 8 2.55
9 2013 1 9 2.28
10 2013 1 10 2.84
# ... with 355 more rows
> by_dest <- group_by(flights, dest)
> delay <- summarise(by_dest,
+ count=n(),
+ dist = mean(distance, na.rm = TRUE),
+ delay = mean(arr_delay, na.rm = TRUE)
+ )
> delay
# A tibble: 105 x 4
dest count dist delay
<chr> <int> <dbl> <dbl>
1 ABQ 254 1826 4.38
2 ACK 265 199 4.85
3 ALB 439 143 14.4
4 ANC 8 3370 -2.5
5 ATL 17215 757. 11.3
6 AUS 2439 1514. 6.02
7 AVL 275 584. 8.00
8 BDL 443 116 7.05
9 BGR 375 378 8.03
10 BHM 297 866. 16.9
# ... with 95 more rows
>
> myda <- read.table("1.txt",sep = "\t",header = T)
> a <- group_by(myda, Hugo_Symbol)
> a
# A tibble: 4,630 x 3
# Groups: Hugo_Symbol [21]
Hugo_Symbol Variant_Classification Tumor_Sample_Barcode
<fct> <fct> <fct>
1 KEAP1 "3UTR\tTCGA-94-7557-01A-11D-2122-08\nSYNE1\t3UTR" TCGA-85-7843-01A-11D-2122-08
2 RYR2 "3UTR\tTCGA-56-8307-01A-11D-2293-08\nPDCD1\t3UTR" TCGA-77-8009-01A-11D-2184-08
3 FAM135B "3UTR\tTCGA-37-3792-01A-01D-0983-08\nTTN\t3UTR" TCGA-94-A5I6-01A-21D-A27K-08
4 PIK3CA "3UTR\tTCGA-63-A5MM-01A-11D-A26M-08\nKEAP1\t3UTR" TCGA-56-8082-01A-11D-2244-08
5 FAM135B "3UTR\tTCGA-85-8352-01A-31D-2323-08\nMUC16\t3UTR" TCGA-33-4533-01A-01D-1267-08
6 CDKN2A "3UTR\tTCGA-46-6025-01A-11D-1817-08\nCDKN2A\t3UTR" TCGA-94-A5I4-01A-11D-A26M-08
7 KMT2D "3UTR\tTCGA-98-A53I-01A-31D-A25L-08\nFAM135B\t3UTR" TCGA-66-2782-01A-01D-1522-08
8 KEAP1 "3UTR\tTCGA-43-2576-01A-01D-1522-08\nFAM135B\t3UTR" TCGA-43-5670-01A-21D-2122-08
9 CDKN2A "3UTR\tTCGA-85-8664-01A-11D-2395-08\nSYNE1\t3UTR" TCGA-85-8071-01A-11D-2244-08
10 PDCD1 "3UTR\tTCGA-77-A5GH-01A-11D-A27K-08\nCSMD3\t3UTR" TCGA-22-4593-01A-21D-1817-08
# ... with 4,620 more rows
> my <- summarise(a,count=n())
> my
# A tibble: 21 x 2
Hugo_Symbol count
<fct> <int>
1 CD274 1
2 CDKN2A 83
3 CSMD3 369
4 FAM135B 186
5 HLA-A 10
6 KEAP1 60
7 KMT2D 150
8 LRP1B 274
9 MUC16 453
10 NFE2L2 79
# ... with 11 more rows
> arrange(my,desc(count))
# A tibble: 21 x 2
Hugo_Symbol count
<fct> <int>
1 TTN 1212
2 MUC16 453
3 TP53 421
4 CSMD3 369
5 RYR2 316
6 SYNE1 290
7 LRP1B 274
8 USH2A 268
9 ZFHX4 255
10 FAM135B 186
# ... with 11 more rows
>
>
>
> myda <- read.table("1.txt",sep = "\t",header = T) %>%
+ group_by(Hugo_Symbol) %>%
+ summarise(count=n()) %>%
+ arrange(desc(count))
> myda
# A tibble: 21 x 2
Hugo_Symbol count
<fct> <int>
1 TTN 1212
2 MUC16 453
3 TP53 421
4 CSMD3 369
5 RYR2 316
6 SYNE1 290
7 LRP1B 274
8 USH2A 268
9 ZFHX4 255
10 FAM135B 186
# ... with 11 more rows
>
> delay
# A tibble: 105 x 4
dest count dist delay
<chr> <int> <dbl> <dbl>
1 ABQ 254 1826 4.38
2 ACK 265 199 4.85
3 ALB 439 143 14.4
4 ANC 8 3370 -2.5
5 ATL 17215 757. 11.3
6 AUS 2439 1514. 6.02
7 AVL 275 584. 8.00
8 BDL 443 116 7.05
9 BGR 375 378 8.03
10 BHM 297 866. 16.9
# ... with 95 more rows
>
> by_dest <- group_by(flights, dest)
>
> delay <- summarise(by_dest,
+ count = n(),
+ dist = mean(distance, na.rm = TRUE),
+ delay = mean(arr_delay, na.rm = TRUE)
+ )
> delay <- filter(delay, count > 20, dest != "HNL")
> delay
# A tibble: 96 x 4
dest count dist delay
<chr> <int> <dbl> <dbl>
1 ABQ 254 1826 4.38
2 ACK 265 199 4.85
3 ALB 439 143 14.4
4 ATL 17215 757. 11.3
5 AUS 2439 1514. 6.02
6 AVL 275 584. 8.00
7 BDL 443 116 7.05
8 BGR 375 378 8.03
9 BHM 297 866. 16.9
10 BNA 6333 758. 11.8
# ... with 86 more rows
>
> ggplot(delay,mapping = aes(x=dist,y=delay))+geom_point(aes(size=count),alpha=1/4)+geom_smooth(se=FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
>
>
>
>
>
>
> flights %>%
+ group_by(year, month, day) %>%
+ summarise(mean = mean(dep_delay))
# A tibble: 365 x 4
# Groups: year, month [?]
year month day mean
<int> <int> <int> <dbl>
1 2013 1 1 NA
2 2013 1 2 NA
3 2013 1 3 NA
4 2013 1 4 NA
5 2013 1 5 NA
6 2013 1 6 NA
7 2013 1 7 NA
8 2013 1 8 NA
9 2013 1 9 NA
10 2013 1 10 NA
# ... with 355 more rows
> flights %>%
+ group_by(year, month, day) %>%
+ summarise(mean = mean(dep_delay, na.rm = TRUE))
# A tibble: 365 x 4
# Groups: year, month [?]
year month day mean
<int> <int> <int> <dbl>
1 2013 1 1 11.5
2 2013 1 2 13.9
3 2013 1 3 11.0
4 2013 1 4 8.95
5 2013 1 5 5.73
6 2013 1 6 7.15
7 2013 1 7 5.42
8 2013 1 8 2.55
9 2013 1 9 2.28
10 2013 1 10 2.84
# ... with 355 more rows
>
>
> not_cancelled <- flights %>%
+ filter(!is.na(dep_delay), !is.na(arr_delay))
> not_cancelled %>%
+ group_by(year, month, day) %>%
+ summarise(mean = mean(dep_delay))
# A tibble: 365 x 4
# Groups: year, month [?]
year month day mean
<int> <int> <int> <dbl>
1 2013 1 1 11.4
2 2013 1 2 13.7
3 2013 1 3 10.9
4 2013 1 4 8.97
5 2013 1 5 5.73
6 2013 1 6 7.15
7 2013 1 7 5.42
8 2013 1 8 2.56
9 2013 1 9 2.30
10 2013 1 10 2.84
# ... with 355 more rows
>
>
>
>
> delays <- not_cancelled %>%
+ group_by(tailnum) %>%
+ summarise(
+ delay = mean(arr_delay)
+ )
> delays
# A tibble: 4,037 x 2
tailnum delay
<chr> <dbl>
1 D942DN 31.5
2 N0EGMQ 9.98
3 N10156 12.7
4 N102UW 2.94
5 N103US -6.93
6 N104UW 1.80
7 N10575 20.7
8 N105UW -0.267
9 N107US -5.73
10 N108UW -1.25
# ... with 4,027 more rows
>
>
> ggplot(data = delays, mapping = aes(x = delay))+ geom_freqpoly(binwidth = 10)
>
>
>
> a <- arrange(delays, desc(delay))
> a
# A tibble: 4,037 x 2
tailnum delay
<chr> <dbl>
1 N844MH 320
2 N911DA 294
3 N922EV 276
4 N587NW 264
5 N851NW 219
6 N928DN 201
7 N7715E 188
8 N654UA 185
9 N665MQ 175.
10 N427SW 157
# ... with 4,027 more rows
>
>
> delays <- not_cancelled %>%
+ group_by(tailnum) %>%
+ summarise(
+ delay = mean(arr_delay, na.rm = TRUE),
+ n = n()
+ )
> delays
# A tibble: 4,037 x 3
tailnum delay n
<chr> <dbl> <int>
1 D942DN 31.5 4
2 N0EGMQ 9.98 352
3 N10156 12.7 145
4 N102UW 2.94 48
5 N103US -6.93 46
6 N104UW 1.80 46
7 N10575 20.7 269
8 N105UW -0.267 45
9 N107US -5.73 41
10 N108UW -1.25 60
# ... with 4,027 more rows
>
>
> ggplot(data = delays, mapping = aes(x = delay, y = n)) +
+ geom_point(alpha = 1/10)