生信星球培训第六十四期

学习小组day6笔记-钟能能

2020-06-10  本文已影响0人  熊熊趴下就是钟能能

R中的包

1.如何安装并使用R包(以dplyr包为例)

install.package("dplyr")   #下载dplyr包  注意:括号中包的命一定要用“ ”括起来
library(dplyr)  #加载dplyr包
help(package=dplyr) #查看dplyr包的使用帮助

2.dplyr包的使用示例

> ls("package:dplyr")       #查看dplyr包中包含的函数
  [1] "%>%"                   "across"                "add_count"            
  [4] "add_count_"            "add_row"               "add_rownames"         
  [7] "add_tally"             "add_tally_"            "all_equal"            
 [10] "all_of"                "all_vars"              "anti_join"            
 [13] "any_of"                "any_vars"              "arrange"              
 [16] "arrange_"              "arrange_all"           "arrange_at"           
 [19] "arrange_if"            "as.tbl"                "as_data_frame"        
 [22] "as_label"              "as_tibble"             "auto_copy"            
 [25] "band_instruments"      "band_instruments2"     "band_members"         
 [28] "bench_tbls"            "between"               "bind_cols"            
 [31] "bind_rows"             "c_across"              "case_when"            
 [34] "changes"               "check_dbplyr"          "coalesce"             
 [37] "collapse"              "collect"               "combine"              
 [40] "common_by"             "compare_tbls"          "compare_tbls2"        
 [43] "compute"               "contains"              "copy_to"              
 [46] "count"                 "count_"                "cumall"               
 [49] "cumany"                "cume_dist"             "cummean"              
 [52] "cur_column"            "cur_data"              "cur_group"            
 [55] "cur_group_id"          "cur_group_rows"        "current_vars"         
 [58] "data_frame"            "data_frame_"           "db_analyze"           
 [61] "db_begin"              "db_commit"             "db_create_index"      
 [64] "db_create_indexes"     "db_create_table"       "db_data_type"         
 [67] "db_desc"               "db_drop_table"         "db_explain"           
 [70] "db_has_table"          "db_insert_into"        "db_list_tables"       
 [73] "db_query_fields"       "db_query_rows"         "db_rollback"          
 [76] "db_save_query"         "db_write_table"        "dense_rank"           
 [79] "desc"                  "dim_desc"              "distinct"             
 [82] "distinct_"             "distinct_all"          "distinct_at"          
 [85] "distinct_if"           "distinct_prepare"      "do"                   
 [88] "do_"                   "dplyr_col_modify"      "dplyr_reconstruct"    
 [91] "dplyr_row_slice"       "ends_with"             "enexpr"               
 [94] "enexprs"               "enquo"                 "enquos"               
 [97] "ensym"                 "ensyms"                "eval_tbls"            
[100] "eval_tbls2"            "everything"            "explain"              
[103] "expr"                  "failwith"              "filter"               
[106] "filter_"               "filter_all"            "filter_at"            
[109] "filter_if"             "first"                 "frame_data"           
[112] "full_join"             "funs"                  "funs_"                
[115] "glimpse"               "group_by"              "group_by_"            
[118] "group_by_all"          "group_by_at"           "group_by_drop_default"
[121] "group_by_if"           "group_by_prepare"      "group_cols"           
[124] "group_data"            "group_indices"         "group_indices_"       
[127] "group_keys"            "group_map"             "group_modify"         
[130] "group_nest"            "group_rows"            "group_size"           
[133] "group_split"           "group_trim"            "group_vars"           
[136] "group_walk"            "grouped_df"            "groups"               
[139] "id"                    "ident"                 "if_else"              
[142] "inner_join"            "intersect"             "is.grouped_df"        
[145] "is.src"                "is.tbl"                "is_grouped_df"        
[148] "lag"                   "last"                  "last_col"             
[151] "lead"                  "left_join"             "location"             
[154] "lst"                   "lst_"                  "make_tbl"             
[157] "matches"               "min_rank"              "mutate"               
[160] "mutate_"               "mutate_all"            "mutate_at"            
[163] "mutate_each"           "mutate_each_"          "mutate_if"            
[166] "n"                     "n_distinct"            "n_groups"             
[169] "na_if"                 "near"                  "nest_by"              
[172] "nest_join"             "new_grouped_df"        "nth"                  
[175] "ntile"                 "num_range"             "one_of"               
[178] "order_by"              "percent_rank"          "progress_estimated"   
[181] "pull"                  "quo"                   "quo_name"             
[184] "quos"                  "recode"                "recode_factor"        
[187] "relocate"              "rename"                "rename_"              
[190] "rename_all"            "rename_at"             "rename_if"            
[193] "rename_vars"           "rename_vars_"          "rename_with"          
[196] "right_join"            "row_number"            "rows_delete"          
[199] "rows_insert"           "rows_patch"            "rows_update"          
[202] "rows_upsert"           "rowwise"               "same_src"             
[205] "sample_frac"           "sample_n"              "select"               
[208] "select_"               "select_all"            "select_at"            
[211] "select_if"             "select_var"            "select_vars"          
[214] "select_vars_"          "semi_join"             "setdiff"              
[217] "setequal"              "show_query"            "slice"                
[220] "slice_"                "slice_head"            "slice_max"            
[223] "slice_min"             "slice_sample"          "slice_tail"           
[226] "sql"                   "sql_escape_ident"      "sql_escape_string"    
[229] "sql_join"              "sql_select"            "sql_semi_join"        
[232] "sql_set_op"            "sql_subquery"          "sql_translate_env"    
[235] "src"                   "src_df"                "src_local"            
[238] "src_mysql"             "src_postgres"          "src_sqlite"           
[241] "src_tbls"              "starts_with"           "starwars"             
[244] "storms"                "summarise"             "summarise_"           
[247] "summarise_all"         "summarise_at"          "summarise_each"       
[250] "summarise_each_"       "summarise_if"          "summarize"            
[253] "summarize_"            "summarize_all"         "summarize_at"         
[256] "summarize_each"        "summarize_each_"       "summarize_if"         
[259] "sym"                   "syms"                  "tally"                
[262] "tally_"                "tbl"                   "tbl_df"               
[265] "tbl_nongroup_vars"     "tbl_ptype"             "tbl_sum"              
[268] "tbl_vars"              "tibble"                "top_frac"             
[271] "top_n"                 "transmute"             "transmute_"           
[274] "transmute_all"         "transmute_at"          "transmute_if"         
[277] "tribble"               "trunc_mat"             "type_sum"             
[280] "ungroup"               "union"                 "union_all"            
[283] "validate_grouped_df"   "vars"                  "with_groups"          
[286] "with_order"            "wrap_dbplyr_obj"  
> datamtcars <- mtcars[1:10,1:4]    #以R中自带的数据mtcars为例
> datamtcars <- data.frame(rnames=rownames(datamtcars),datamtcars)
> datamtcars
                             rnames  mpg cyl  disp  hp
Mazda RX4                 Mazda RX4 21.0   6 160.0 110
Mazda RX4 Wag         Mazda RX4 Wag 21.0   6 160.0 110
Datsun 710               Datsun 710 22.8   4 108.0  93
Hornet 4 Drive       Hornet 4 Drive 21.4   6 258.0 110
Hornet Sportabout Hornet Sportabout 18.7   8 360.0 175
Valiant                     Valiant 18.1   6 225.0 105
Duster 360               Duster 360 14.3   8 360.0 245
Merc 240D                 Merc 240D 24.4   4 146.7  62
Merc 230                   Merc 230 22.8   4 140.8  95
Merc 280                   Merc 280 19.2   6 167.6 123
> summarise(datamtcars,avg=mean(mpg))               # dplyr包summarise举例
    avg
1 20.37
> summarise(datamtcars,sum(mpg))
  sum(mpg)
1    203.7
> summarise(datamtcars,mean(mpg))
  mean(mpg)
1     20.37
> head(mtcars,20) %>% tail(10)    #管道操作 %>% (快捷键Ctr + Shift + M)            
                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
> iris %>% group_by(Species) %>% summarise(sum=sum(Petal.Width)) #管道操作示例2
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 3 x 2
  Species      sum
  <fct>      <dbl>
1 setosa      12.3
2 versicolor  66.3
3 virginica  101. 
> mutate(ex3,new=Sepal.Length+Petal.Length)  #mutate新增列
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species new
1           5.4         3.7          1.5         0.2  setosa 6.9
2           4.8         3.4          1.6         0.2  setosa 6.4
3           4.8         3.0          1.4         0.1  setosa 6.2
4           4.3         3.0          1.1         0.1  setosa 5.4
5           5.8         4.0          1.2         0.2  setosa 7.0
6           5.7         4.4          1.5         0.4  setosa 7.2
7           5.4         3.9          1.3         0.4  setosa 6.7
8           5.1         3.5          1.4         0.3  setosa 6.5
9           5.7         3.8          1.7         0.3  setosa 7.4
10          5.1         3.8          1.5         0.3  setosa 6.6

更多dplyr包的操作可使用R的帮助选项查看各个函数。

上一篇下一篇

猜你喜欢

热点阅读