R语言使用group_by和summarise统计
2018-10-15 本文已影响0人
liyin_d64b
> library(nycflights13)
> library(tidyverse)
> rt <- read.table("text.txt",header = T,sep = "\t")
> myda <- group_by(rt,Tumor_Sample) #如果要求和谁,就group_by谁
> realdata <- summarise(myda,naw=sum(score))
> realdata
# A tibble: 491 x 2
Tumor_Sample naw
<fct> <dbl>
1 TCGA-18-3406 95.7
2 TCGA-18-3407 31.1
3 TCGA-18-3408 36.9
4 TCGA-18-3409 1447.
5 TCGA-18-3410 110.
6 TCGA-18-3411 151.
7 TCGA-18-3412 53.0
8 TCGA-18-3414 171.
9 TCGA-18-3415 93.0
10 TCGA-18-3416 135.
# ... with 481 more rows
> daily <- group_by(flights, year, month, day)
> daily
# A tibble: 336,776 x 19
# Groups: year, month, day [365]
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin
<int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr> <chr>
1 2013 1 1 517 515 2 830 819 11 UA 1545 N14228 EWR
2 2013 1 1 533 529 4 850 830 20 UA 1714 N24211 LGA
3 2013 1 1 542 540 2 923 850 33 AA 1141 N619AA JFK
4 2013 1 1 544 545 -1 1004 1022 -18 B6 725 N804JB JFK
5 2013 1 1 554 600 -6 812 837 -25 DL 461 N668DN LGA
6 2013 1 1 554 558 -4 740 728 12 UA 1696 N39463 EWR
7 2013 1 1 555 600 -5 913 854 19 B6 507 N516JB EWR
8 2013 1 1 557 600 -3 709 723 -14 EV 5708 N829AS LGA
9 2013 1 1 557 600 -3 838 846 -8 B6 79 N593JB JFK
10 2013 1 1 558 600 -2 753 745 8 AA 301 N3ALAA LGA
# ... with 336,766 more rows, and 6 more variables: dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
# time_hour <dttm>
> (per_day <- summarise(daily, flights = n()))
# A tibble: 365 x 4
# Groups: year, month [?]
year month day flights
<int> <int> <int> <int>
1 2013 1 1 842
2 2013 1 2 943
3 2013 1 3 914
4 2013 1 4 915
5 2013 1 5 720
6 2013 1 6 832
7 2013 1 7 933
8 2013 1 8 899
9 2013 1 9 902
10 2013 1 10 932
# ... with 355 more rows
> (per_month <- summarise(per_day, flights = sum(flights)))
# A tibble: 12 x 3
# Groups: year [?]
year month flights
<int> <int> <int>
1 2013 1 27004
2 2013 2 24951
3 2013 3 28834
4 2013 4 28330
5 2013 5 28796
6 2013 6 28243
7 2013 7 29425
8 2013 8 29327
9 2013 9 27574
10 2013 10 28889
11 2013 11 27268
12 2013 12 28135
> myda <- group_by(flights,year,month) %>%
+ summarise(flights = n())
> myda
# A tibble: 12 x 3
# Groups: year [?]
year month flights
<int> <int> <int>
1 2013 1 27004
2 2013 2 24951
3 2013 3 28834
4 2013 4 28330
5 2013 5 28796
6 2013 6 28243
7 2013 7 29425
8 2013 8 29327
9 2013 9 27574
10 2013 10 28889
11 2013 11 27268
12 2013 12 28135
> myda==per_month
year month flights
[1,] TRUE TRUE TRUE
[2,] TRUE TRUE TRUE
[3,] TRUE TRUE TRUE
[4,] TRUE TRUE TRUE
[5,] TRUE TRUE TRUE
[6,] TRUE TRUE TRUE
[7,] TRUE TRUE TRUE
[8,] TRUE TRUE TRUE
[9,] TRUE TRUE TRUE
[10,] TRUE TRUE TRUE
[11,] TRUE TRUE TRUE
[12,] TRUE TRUE TRUE
> daily %>%
+ ungroup() %>% # no longer grouped by date
+ summarise(flights = n()) # all flights
# A tibble: 1 x 1
flights
<int>
1 336776