R语言使用group_by和summarise统计

2018-10-15 本文已影响0人 liyin_d64b
> library(nycflights13)
> library(tidyverse)
> rt <- read.table("text.txt",header = T,sep = "\t")
> myda <- group_by(rt,Tumor_Sample) #如果要求和谁，就group_by谁
> realdata <- summarise(myda,naw=sum(score))
> realdata
# A tibble: 491 x 2
   Tumor_Sample    naw
   <fct>         <dbl>
 1 TCGA-18-3406   95.7
 2 TCGA-18-3407   31.1
 3 TCGA-18-3408   36.9
 4 TCGA-18-3409 1447. 
 5 TCGA-18-3410  110. 
 6 TCGA-18-3411  151. 
 7 TCGA-18-3412   53.0
 8 TCGA-18-3414  171. 
 9 TCGA-18-3415   93.0
10 TCGA-18-3416  135. 
# ... with 481 more rows
> daily <- group_by(flights, year, month, day)
> daily
# A tibble: 336,776 x 19
# Groups:   year, month, day [365]
    year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin
   <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>     <dbl> <chr>    <int> <chr>   <chr> 
 1  2013     1     1      517            515         2      830            819        11 UA        1545 N14228  EWR   
 2  2013     1     1      533            529         4      850            830        20 UA        1714 N24211  LGA   
 3  2013     1     1      542            540         2      923            850        33 AA        1141 N619AA  JFK   
 4  2013     1     1      544            545        -1     1004           1022       -18 B6         725 N804JB  JFK   
 5  2013     1     1      554            600        -6      812            837       -25 DL         461 N668DN  LGA   
 6  2013     1     1      554            558        -4      740            728        12 UA        1696 N39463  EWR   
 7  2013     1     1      555            600        -5      913            854        19 B6         507 N516JB  EWR   
 8  2013     1     1      557            600        -3      709            723       -14 EV        5708 N829AS  LGA   
 9  2013     1     1      557            600        -3      838            846        -8 B6          79 N593JB  JFK   
10  2013     1     1      558            600        -2      753            745         8 AA         301 N3ALAA  LGA   
# ... with 336,766 more rows, and 6 more variables: dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
#   time_hour <dttm>
> (per_day   <- summarise(daily, flights = n()))
# A tibble: 365 x 4
# Groups:   year, month [?]
    year month   day flights
   <int> <int> <int>   <int>
 1  2013     1     1     842
 2  2013     1     2     943
 3  2013     1     3     914
 4  2013     1     4     915
 5  2013     1     5     720
 6  2013     1     6     832
 7  2013     1     7     933
 8  2013     1     8     899
 9  2013     1     9     902
10  2013     1    10     932
# ... with 355 more rows
> (per_month <- summarise(per_day, flights = sum(flights)))
# A tibble: 12 x 3
# Groups:   year [?]
    year month flights
   <int> <int>   <int>
 1  2013     1   27004
 2  2013     2   24951
 3  2013     3   28834
 4  2013     4   28330
 5  2013     5   28796
 6  2013     6   28243
 7  2013     7   29425
 8  2013     8   29327
 9  2013     9   27574
10  2013    10   28889
11  2013    11   27268
12  2013    12   28135
> myda <- group_by(flights,year,month) %>%
+   summarise(flights = n())
> myda
# A tibble: 12 x 3
# Groups:   year [?]
    year month flights
   <int> <int>   <int>
 1  2013     1   27004
 2  2013     2   24951
 3  2013     3   28834
 4  2013     4   28330
 5  2013     5   28796
 6  2013     6   28243
 7  2013     7   29425
 8  2013     8   29327
 9  2013     9   27574
10  2013    10   28889
11  2013    11   27268
12  2013    12   28135
> myda==per_month
      year month flights
 [1,] TRUE  TRUE    TRUE
 [2,] TRUE  TRUE    TRUE
 [3,] TRUE  TRUE    TRUE
 [4,] TRUE  TRUE    TRUE
 [5,] TRUE  TRUE    TRUE
 [6,] TRUE  TRUE    TRUE
 [7,] TRUE  TRUE    TRUE
 [8,] TRUE  TRUE    TRUE
 [9,] TRUE  TRUE    TRUE
[10,] TRUE  TRUE    TRUE
[11,] TRUE  TRUE    TRUE
[12,] TRUE  TRUE    TRUE

> daily %>% 
+   ungroup() %>%             # no longer grouped by date
+   summarise(flights = n())  # all flights
# A tibble: 1 x 1
  flights
    <int>
1  336776
R语言使用group_by和summarise统计

猜你喜欢

热点阅读