快乐学习笔记

2018-10-03 本文已影响0人 liyin_d64b

Linux学习笔记

2018-10-3
ls #list
-l #长格式
-h #human readable
-rw------ #(r 读，w 写，x 执行)
-a   #显示以.开头的隐藏文件
. #表示当前目录
.. #表示上一个目录
cd ~gcdong  #到gcdong的家目录去看看发现他安装了R，miniconda等
type #显示命令类型
date #时间管理
man #manual 查看命令 /keyword N 前一个 n下一个 q 退出

马哥练习题：

查看echo是内部还是外部命令

type echo

作用？显示注释作用，用于一些批命令中需要注释给用户看的地方，比如前一条命令执行会花很长时间，常规来会用echo显示一条信息让用户知道这个时候比较慢，稍微等待一会。之类的信息
如何换行

echo -e “hello \nworld”

横向tab

echo -t

纵向tab,

echo -v

Printf

Printf “hello\n”
Printf “hello"

练习题

在任意文件夹下面创建形如 1/2/3/4/5/6/7/8/9 格式的文件夹系列。

mkdir -p 1/2/3/4/5/6/7/8/9`
-p means no error if existing, make parent directories as needed

在创建好的文件夹下面，比如我的是 /Users/jimmy/tmp/1/2/3/4/5/6/7/8/9 ，里面创建文本文件 me.txt

pwd 
touch me.txt

在文本文件 me.txt 里面输入内容:

Vim me.txt
Go to: http://www.biotrainee.com/
I love bioinfomatics.
And you ?
:w
:q
More me.txt

删除上面创建的文件夹 1/2/3/4/5/6/7/8/9 及文本文件 me.txt

Pwd
Cd –
Rm -r 1

五、在任意文件夹下面创建 folder1~5这5个文件夹，然后每个文件夹下面继续创建 folder1~5这5个文件夹：

Pwd
Mkdir -p folder_{1..5}/folder_{1..5}
Ls */

R for data science

> library(tidyverse)
> library(nycflights13)
> by_day <- group_by(flights, year, month, day)
> summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
# A tibble: 365 x 4
# Groups:   year, month [?]
    year month   day delay
   <int> <int> <int> <dbl>
 1  2013     1     1 11.5 
 2  2013     1     2 13.9 
 3  2013     1     3 11.0 
 4  2013     1     4  8.95
 5  2013     1     5  5.73
 6  2013     1     6  7.15
 7  2013     1     7  5.42
 8  2013     1     8  2.55
 9  2013     1     9  2.28
10  2013     1    10  2.84
# ... with 355 more rows
> by_dest <- group_by(flights, dest)
> delay <- summarise(by_dest,
+                    count=n(),
+                    dist = mean(distance, na.rm = TRUE),
+                    delay = mean(arr_delay, na.rm = TRUE)
+ )
> delay
# A tibble: 105 x 4
   dest  count  dist  delay
   <chr> <int> <dbl>  <dbl>
 1 ABQ     254 1826    4.38
 2 ACK     265  199    4.85
 3 ALB     439  143   14.4 
 4 ANC       8 3370   -2.5 
 5 ATL   17215  757.  11.3 
 6 AUS    2439 1514.   6.02
 7 AVL     275  584.   8.00
 8 BDL     443  116    7.05
 9 BGR     375  378    8.03
10 BHM     297  866.  16.9 
# ... with 95 more rows
> 
> myda <- read.table("1.txt",sep = "\t",header = T)
> a <- group_by(myda, Hugo_Symbol)
> a
# A tibble: 4,630 x 3
# Groups:   Hugo_Symbol [21]
   Hugo_Symbol Variant_Classification                              Tumor_Sample_Barcode        
   <fct>       <fct>                                               <fct>                       
 1 KEAP1       "3UTR\tTCGA-94-7557-01A-11D-2122-08\nSYNE1\t3UTR"   TCGA-85-7843-01A-11D-2122-08
 2 RYR2        "3UTR\tTCGA-56-8307-01A-11D-2293-08\nPDCD1\t3UTR"   TCGA-77-8009-01A-11D-2184-08
 3 FAM135B     "3UTR\tTCGA-37-3792-01A-01D-0983-08\nTTN\t3UTR"     TCGA-94-A5I6-01A-21D-A27K-08
 4 PIK3CA      "3UTR\tTCGA-63-A5MM-01A-11D-A26M-08\nKEAP1\t3UTR"   TCGA-56-8082-01A-11D-2244-08
 5 FAM135B     "3UTR\tTCGA-85-8352-01A-31D-2323-08\nMUC16\t3UTR"   TCGA-33-4533-01A-01D-1267-08
 6 CDKN2A      "3UTR\tTCGA-46-6025-01A-11D-1817-08\nCDKN2A\t3UTR"  TCGA-94-A5I4-01A-11D-A26M-08
 7 KMT2D       "3UTR\tTCGA-98-A53I-01A-31D-A25L-08\nFAM135B\t3UTR" TCGA-66-2782-01A-01D-1522-08
 8 KEAP1       "3UTR\tTCGA-43-2576-01A-01D-1522-08\nFAM135B\t3UTR" TCGA-43-5670-01A-21D-2122-08
 9 CDKN2A      "3UTR\tTCGA-85-8664-01A-11D-2395-08\nSYNE1\t3UTR"   TCGA-85-8071-01A-11D-2244-08
10 PDCD1       "3UTR\tTCGA-77-A5GH-01A-11D-A27K-08\nCSMD3\t3UTR"   TCGA-22-4593-01A-21D-1817-08
# ... with 4,620 more rows
> my <- summarise(a,count=n())
> my
# A tibble: 21 x 2
   Hugo_Symbol count
   <fct>       <int>
 1 CD274           1
 2 CDKN2A         83
 3 CSMD3         369
 4 FAM135B       186
 5 HLA-A          10
 6 KEAP1          60
 7 KMT2D         150
 8 LRP1B         274
 9 MUC16         453
10 NFE2L2         79
# ... with 11 more rows
> arrange(my,desc(count))
# A tibble: 21 x 2
   Hugo_Symbol count
   <fct>       <int>
 1 TTN          1212
 2 MUC16         453
 3 TP53          421
 4 CSMD3         369
 5 RYR2          316
 6 SYNE1         290
 7 LRP1B         274
 8 USH2A         268
 9 ZFHX4         255
10 FAM135B       186
# ... with 11 more rows
> 
> 
> 
> myda <- read.table("1.txt",sep = "\t",header = T) %>%
+   group_by(Hugo_Symbol) %>%
+   summarise(count=n()) %>%
+   arrange(desc(count))
> myda
# A tibble: 21 x 2
   Hugo_Symbol count
   <fct>       <int>
 1 TTN          1212
 2 MUC16         453
 3 TP53          421
 4 CSMD3         369
 5 RYR2          316
 6 SYNE1         290
 7 LRP1B         274
 8 USH2A         268
 9 ZFHX4         255
10 FAM135B       186
# ... with 11 more rows
> 
> delay
# A tibble: 105 x 4
   dest  count  dist  delay
   <chr> <int> <dbl>  <dbl>
 1 ABQ     254 1826    4.38
 2 ACK     265  199    4.85
 3 ALB     439  143   14.4 
 4 ANC       8 3370   -2.5 
 5 ATL   17215  757.  11.3 
 6 AUS    2439 1514.   6.02
 7 AVL     275  584.   8.00
 8 BDL     443  116    7.05
 9 BGR     375  378    8.03
10 BHM     297  866.  16.9 
# ... with 95 more rows
> 
> by_dest <- group_by(flights, dest)
> 
> delay <- summarise(by_dest,
+                    count = n(),
+                    dist = mean(distance, na.rm = TRUE),
+                    delay = mean(arr_delay, na.rm = TRUE)
+ )
> delay <- filter(delay, count > 20, dest != "HNL")
> delay
# A tibble: 96 x 4
   dest  count  dist delay
   <chr> <int> <dbl> <dbl>
 1 ABQ     254 1826   4.38
 2 ACK     265  199   4.85
 3 ALB     439  143  14.4 
 4 ATL   17215  757. 11.3 
 5 AUS    2439 1514.  6.02
 6 AVL     275  584.  8.00
 7 BDL     443  116   7.05
 8 BGR     375  378   8.03
 9 BHM     297  866. 16.9 
10 BNA    6333  758. 11.8 
# ... with 86 more rows
> 
> ggplot(delay,mapping = aes(x=dist,y=delay))+geom_point(aes(size=count),alpha=1/4)+geom_smooth(se=FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
> 
> 
> 
> 
> 
> 
> flights %>% 
+   group_by(year, month, day) %>% 
+   summarise(mean = mean(dep_delay))
# A tibble: 365 x 4
# Groups:   year, month [?]
    year month   day  mean
   <int> <int> <int> <dbl>
 1  2013     1     1    NA
 2  2013     1     2    NA
 3  2013     1     3    NA
 4  2013     1     4    NA
 5  2013     1     5    NA
 6  2013     1     6    NA
 7  2013     1     7    NA
 8  2013     1     8    NA
 9  2013     1     9    NA
10  2013     1    10    NA
# ... with 355 more rows
> flights %>% 
+   group_by(year, month, day) %>% 
+   summarise(mean = mean(dep_delay, na.rm = TRUE))
# A tibble: 365 x 4
# Groups:   year, month [?]
    year month   day  mean
   <int> <int> <int> <dbl>
 1  2013     1     1 11.5 
 2  2013     1     2 13.9 
 3  2013     1     3 11.0 
 4  2013     1     4  8.95
 5  2013     1     5  5.73
 6  2013     1     6  7.15
 7  2013     1     7  5.42
 8  2013     1     8  2.55
 9  2013     1     9  2.28
10  2013     1    10  2.84
# ... with 355 more rows
> 
> 
> not_cancelled <- flights %>% 
+   filter(!is.na(dep_delay), !is.na(arr_delay))
> not_cancelled %>% 
+   group_by(year, month, day) %>% 
+   summarise(mean = mean(dep_delay))
# A tibble: 365 x 4
# Groups:   year, month [?]
    year month   day  mean
   <int> <int> <int> <dbl>
 1  2013     1     1 11.4 
 2  2013     1     2 13.7 
 3  2013     1     3 10.9 
 4  2013     1     4  8.97
 5  2013     1     5  5.73
 6  2013     1     6  7.15
 7  2013     1     7  5.42
 8  2013     1     8  2.56
 9  2013     1     9  2.30
10  2013     1    10  2.84
# ... with 355 more rows
> 
> 
> 
> 
> delays <- not_cancelled %>% 
+   group_by(tailnum) %>% 
+   summarise(
+     delay = mean(arr_delay)
+   )
> delays
# A tibble: 4,037 x 2
   tailnum   delay
   <chr>     <dbl>
 1 D942DN   31.5  
 2 N0EGMQ    9.98 
 3 N10156   12.7  
 4 N102UW    2.94 
 5 N103US   -6.93 
 6 N104UW    1.80 
 7 N10575   20.7  
 8 N105UW   -0.267
 9 N107US   -5.73 
10 N108UW   -1.25 
# ... with 4,027 more rows
> 
> 
> ggplot(data = delays, mapping = aes(x = delay))+ geom_freqpoly(binwidth = 10) 
> 
> 
> 
> a <- arrange(delays, desc(delay))
> a
# A tibble: 4,037 x 2
   tailnum delay
   <chr>   <dbl>
 1 N844MH   320 
 2 N911DA   294 
 3 N922EV   276 
 4 N587NW   264 
 5 N851NW   219 
 6 N928DN   201 
 7 N7715E   188 
 8 N654UA   185 
 9 N665MQ   175.
10 N427SW   157 
# ... with 4,027 more rows
> 
> 
> delays <- not_cancelled %>% 
+   group_by(tailnum) %>% 
+   summarise(
+     delay = mean(arr_delay, na.rm = TRUE),
+     n = n()
+   )
> delays
# A tibble: 4,037 x 3
   tailnum   delay     n
   <chr>     <dbl> <int>
 1 D942DN   31.5       4
 2 N0EGMQ    9.98    352
 3 N10156   12.7     145
 4 N102UW    2.94     48
 5 N103US   -6.93     46
 6 N104UW    1.80     46
 7 N10575   20.7     269
 8 N105UW   -0.267    45
 9 N107US   -5.73     41
10 N108UW   -1.25     60
# ... with 4,027 more rows
> 
> 
> ggplot(data = delays, mapping = aes(x = delay, y = n)) + 
+   geom_point(alpha = 1/10)

快乐学习笔记

Linux学习笔记

R for data science

猜你喜欢

热点阅读