学习小组day6笔记-钟能能
2020-06-10 本文已影响0人
熊熊趴下就是钟能能
R中的包
1.如何安装并使用R包(以dplyr包为例)
install.package("dplyr") #下载dplyr包 注意:括号中包的命一定要用“ ”括起来
library(dplyr) #加载dplyr包
help(package=dplyr) #查看dplyr包的使用帮助
2.dplyr包的使用示例
> ls("package:dplyr") #查看dplyr包中包含的函数
[1] "%>%" "across" "add_count"
[4] "add_count_" "add_row" "add_rownames"
[7] "add_tally" "add_tally_" "all_equal"
[10] "all_of" "all_vars" "anti_join"
[13] "any_of" "any_vars" "arrange"
[16] "arrange_" "arrange_all" "arrange_at"
[19] "arrange_if" "as.tbl" "as_data_frame"
[22] "as_label" "as_tibble" "auto_copy"
[25] "band_instruments" "band_instruments2" "band_members"
[28] "bench_tbls" "between" "bind_cols"
[31] "bind_rows" "c_across" "case_when"
[34] "changes" "check_dbplyr" "coalesce"
[37] "collapse" "collect" "combine"
[40] "common_by" "compare_tbls" "compare_tbls2"
[43] "compute" "contains" "copy_to"
[46] "count" "count_" "cumall"
[49] "cumany" "cume_dist" "cummean"
[52] "cur_column" "cur_data" "cur_group"
[55] "cur_group_id" "cur_group_rows" "current_vars"
[58] "data_frame" "data_frame_" "db_analyze"
[61] "db_begin" "db_commit" "db_create_index"
[64] "db_create_indexes" "db_create_table" "db_data_type"
[67] "db_desc" "db_drop_table" "db_explain"
[70] "db_has_table" "db_insert_into" "db_list_tables"
[73] "db_query_fields" "db_query_rows" "db_rollback"
[76] "db_save_query" "db_write_table" "dense_rank"
[79] "desc" "dim_desc" "distinct"
[82] "distinct_" "distinct_all" "distinct_at"
[85] "distinct_if" "distinct_prepare" "do"
[88] "do_" "dplyr_col_modify" "dplyr_reconstruct"
[91] "dplyr_row_slice" "ends_with" "enexpr"
[94] "enexprs" "enquo" "enquos"
[97] "ensym" "ensyms" "eval_tbls"
[100] "eval_tbls2" "everything" "explain"
[103] "expr" "failwith" "filter"
[106] "filter_" "filter_all" "filter_at"
[109] "filter_if" "first" "frame_data"
[112] "full_join" "funs" "funs_"
[115] "glimpse" "group_by" "group_by_"
[118] "group_by_all" "group_by_at" "group_by_drop_default"
[121] "group_by_if" "group_by_prepare" "group_cols"
[124] "group_data" "group_indices" "group_indices_"
[127] "group_keys" "group_map" "group_modify"
[130] "group_nest" "group_rows" "group_size"
[133] "group_split" "group_trim" "group_vars"
[136] "group_walk" "grouped_df" "groups"
[139] "id" "ident" "if_else"
[142] "inner_join" "intersect" "is.grouped_df"
[145] "is.src" "is.tbl" "is_grouped_df"
[148] "lag" "last" "last_col"
[151] "lead" "left_join" "location"
[154] "lst" "lst_" "make_tbl"
[157] "matches" "min_rank" "mutate"
[160] "mutate_" "mutate_all" "mutate_at"
[163] "mutate_each" "mutate_each_" "mutate_if"
[166] "n" "n_distinct" "n_groups"
[169] "na_if" "near" "nest_by"
[172] "nest_join" "new_grouped_df" "nth"
[175] "ntile" "num_range" "one_of"
[178] "order_by" "percent_rank" "progress_estimated"
[181] "pull" "quo" "quo_name"
[184] "quos" "recode" "recode_factor"
[187] "relocate" "rename" "rename_"
[190] "rename_all" "rename_at" "rename_if"
[193] "rename_vars" "rename_vars_" "rename_with"
[196] "right_join" "row_number" "rows_delete"
[199] "rows_insert" "rows_patch" "rows_update"
[202] "rows_upsert" "rowwise" "same_src"
[205] "sample_frac" "sample_n" "select"
[208] "select_" "select_all" "select_at"
[211] "select_if" "select_var" "select_vars"
[214] "select_vars_" "semi_join" "setdiff"
[217] "setequal" "show_query" "slice"
[220] "slice_" "slice_head" "slice_max"
[223] "slice_min" "slice_sample" "slice_tail"
[226] "sql" "sql_escape_ident" "sql_escape_string"
[229] "sql_join" "sql_select" "sql_semi_join"
[232] "sql_set_op" "sql_subquery" "sql_translate_env"
[235] "src" "src_df" "src_local"
[238] "src_mysql" "src_postgres" "src_sqlite"
[241] "src_tbls" "starts_with" "starwars"
[244] "storms" "summarise" "summarise_"
[247] "summarise_all" "summarise_at" "summarise_each"
[250] "summarise_each_" "summarise_if" "summarize"
[253] "summarize_" "summarize_all" "summarize_at"
[256] "summarize_each" "summarize_each_" "summarize_if"
[259] "sym" "syms" "tally"
[262] "tally_" "tbl" "tbl_df"
[265] "tbl_nongroup_vars" "tbl_ptype" "tbl_sum"
[268] "tbl_vars" "tibble" "top_frac"
[271] "top_n" "transmute" "transmute_"
[274] "transmute_all" "transmute_at" "transmute_if"
[277] "tribble" "trunc_mat" "type_sum"
[280] "ungroup" "union" "union_all"
[283] "validate_grouped_df" "vars" "with_groups"
[286] "with_order" "wrap_dbplyr_obj"
> datamtcars <- mtcars[1:10,1:4] #以R中自带的数据mtcars为例
> datamtcars <- data.frame(rnames=rownames(datamtcars),datamtcars)
> datamtcars
rnames mpg cyl disp hp
Mazda RX4 Mazda RX4 21.0 6 160.0 110
Mazda RX4 Wag Mazda RX4 Wag 21.0 6 160.0 110
Datsun 710 Datsun 710 22.8 4 108.0 93
Hornet 4 Drive Hornet 4 Drive 21.4 6 258.0 110
Hornet Sportabout Hornet Sportabout 18.7 8 360.0 175
Valiant Valiant 18.1 6 225.0 105
Duster 360 Duster 360 14.3 8 360.0 245
Merc 240D Merc 240D 24.4 4 146.7 62
Merc 230 Merc 230 22.8 4 140.8 95
Merc 280 Merc 280 19.2 6 167.6 123
> summarise(datamtcars,avg=mean(mpg)) # dplyr包summarise举例
avg
1 20.37
> summarise(datamtcars,sum(mpg))
sum(mpg)
1 203.7
> summarise(datamtcars,mean(mpg))
mean(mpg)
1 20.37
> head(mtcars,20) %>% tail(10) #管道操作 %>% (快捷键Ctr + Shift + M)
mpg cyl disp hp drat wt qsec vs am gear carb
Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
> iris %>% group_by(Species) %>% summarise(sum=sum(Petal.Width)) #管道操作示例2
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 3 x 2
Species sum
<fct> <dbl>
1 setosa 12.3
2 versicolor 66.3
3 virginica 101.
> mutate(ex3,new=Sepal.Length+Petal.Length) #mutate新增列
Sepal.Length Sepal.Width Petal.Length Petal.Width Species new
1 5.4 3.7 1.5 0.2 setosa 6.9
2 4.8 3.4 1.6 0.2 setosa 6.4
3 4.8 3.0 1.4 0.1 setosa 6.2
4 4.3 3.0 1.1 0.1 setosa 5.4
5 5.8 4.0 1.2 0.2 setosa 7.0
6 5.7 4.4 1.5 0.4 setosa 7.2
7 5.4 3.9 1.3 0.4 setosa 6.7
8 5.1 3.5 1.4 0.3 setosa 6.5
9 5.7 3.8 1.7 0.3 setosa 7.4
10 5.1 3.8 1.5 0.3 setosa 6.6
更多dplyr包的操作可使用R的帮助选项查看各个函数。