R, dplyr

dplyr

> install.packages("dplyr")
> library(dplyr)
> head(airquality) 
Ozone Solar.R Wind Temp Month Day 
1 41 190  7.4 67 5 1 
2 36 118  8.0 72 5 2 
3 12 149 12.6 74 5 3 
4 18 313 11.5 62 5 4 
5 NA  NA 14.3 56 5 5 
6 28  NA 14.9 66 5 6

> air <- filter(airquality, Month==6)
> head(air) 
Ozone Solar.R Wind Temp Month Day 
1 NA 286  8.6 78 6 1 
2 NA 287  9.7 74 6 2 
3 NA 242 16.1 67 6 3 
4 NA 186  9.2 84 6 4 
5 NA 220  8.6 85 6 5 
6 NA 264 14.3 79 6 6

> air <- filter(airquality, Month==6 & Temp > 90)
> head(air) 
Ozone Solar.R Wind Temp Month Day 
1 NA 259 10.9 93 6 11 
2 NA 250  9.2 92 6 12

> air <- filter(airquality, Ozone > 80 | Temp > 90)
> head(air) 
Ozone Solar.R Wind Temp Month Day 
1 115 223  5.7 79 5 30 
2  NA 259 10.9 93 6 11 
3  NA 250  9.2 92 6 12 
4 135 269  4.1 84 7  1 
5  97 267  6.3 92 7  8 
6  97 272  5.7 92 7  9

> slice(airquality, 6:10)
 Ozone Solar.R Wind Temp Month Day 
1 28  NA 14.9 66 5  6 
2 23 299  8.6 65 5  7 
3 19  99 13.8 59 5  8 
4  8  19 20.1 61 5  9 
5 NA 194  8.6 69 5 10

> air <- arrange(airquality, Temp)
> head(air) 
Ozone Solar.R Wind Temp Month Day 
1 NA  NA 14.3 56 5  5 
2  6  78 18.4 57 5 18 
3 NA  66 16.6 57 5 25 
4 NA  NA  8.0 57 5 27 
5 18  65 13.2 58 5 15 
6 NA 266 14.9 58 5 26

> air <- arrange(airquality, desc(Temp)) 
> head(air) 
Ozone Solar.R Wind Temp Month Day 
1  76 203  9.7 97 8 28 
2  84 237  6.3 96 8 30 
3 118 225  2.3 94 8 29 
4  85 188  6.3 94 8 31 
5  NA 259 10.9 93 6 11 
6  73 183  2.8 93 9  3

> air <- select(airquality, Month, Day, Temp)
> head(air)
 Month Day Temp 
1  5 1 67 
2  5 2 72 
3  5 3 74 
4  5 4 62 
5  5 5 56 
6  5 6 66

> air <- select(airquality, Temp:Day)      # Temp~Day까지 추출 
> air <- select(airquality, -(Temp:Day))   # Temp~Day까지 제외하고 추출 

> air <- select(airquality, Solar=Solar.R) # 이름 바꿔서 추출 
> head(air) 
Solar 
1 190 
2 118 
3 149 
4 313 
5  NA 
6  NA 

> air <- rename(airquality, Solar=Solar.R) 
> head(air) 
Ozone Solar Wind Temp Month Day 
1 41 190  7.4 67 5 1 
2 36 118  8.0 72 5 2 
3 12 149 12.6 74 5 3 
4 18 313 11.5 62 5 4 
5 NA  NA 14.3 56 5 5 
6 28  NA 14.9 66 5 6

> distinct(select(airquality, Month)) # 중복 빼고 추출 
> Month 
1 5 
2 6 
3 7 
4 8 
5 9

> air <- mutate(airquality, Temp.C = (Temp-32)/1.8) # 변수 새로 만들기 
> head(air) 
Ozone Solar.R Wind Temp Month Day Temp.C   
1 41 190  7.4 67 5 1 19.44444   
2 36 118  8.0 72 5 2 22.22222   
3 12 149 12.6 74 5 3 23.33333   
4 18 313 11.5 62 5 4 16.66667   
5 NA  NA 14.3 56 5 5 13.33333  
6 28  NA 14.9 66 5 6 18.88889   

> summarise(airquality, mean(Temp), na.rm=TRUE) 
mean(Temp)              
1 77.88235

median, sd, max, min

> sample_n(airquality, 5) # 5개 추출 
Ozone Solar.R Wind Temp Month Day 
1  9  24 13.8 81 8  2 
2 NA 332 13.8 80 6 14 
3 NA 264 14.3 79 6  6 
4  9  24 10.9 71 9 14 
5 39 323 11.5 87 6 10 

> sample_frac(airquality, 0.05) # 5% 추출 
Ozone Solar.R Wind Temp Month Day 
1 78 197  5.1 92 9  2 
2 13 112 11.5 71 9 15 
3 24 238 10.3 68 9 19 
4 36 118  8.0 72 5  2 
5 NA 250  9.2 92 6 12 
6 12 120 11.5 73 6 19 
7 32  92 12.0 61 5 24 
8  9  36 14.3 72 8 22

, replace=TRUE # 복원추출 

> air.group <- group_by(airquality, Month) 
> air.group 
# A tibble: 153 × 6 
# Groups: Month [5] 
Ozone Solar.R Wind Temp Month Day 
<int> <int> <dbl> <int> <int> <int> 
1 41 190 7.4 67 5 1 
2 36 118 8 72 5 2 
3 12 149 12.6 74 5 3 
4 18 313 11.5 62 5 4 
5 NA NA 14.3 56 5 5 
6 28 NA 14.9 66 5 6 
7 23 299 8.6 65 5 7 
8 19 99 13.8 59 5 8 
9 8 19 20.1 61 5 9 
10 NA 194 8.6 69 5 10 
# … with 143 more rows 
# i Use `print(n = ...)` to see more rows