Select a subset
select.Rd
Selects the row and the varaibles by specifing a condition using a formula.
Arguments
- from
a data.frame object with variables
- formula
a formula indicating the operation to create new varibles. Look at the detail section for explanantion
- as
a character vector with names of new variables.
- na.remove
a logical value indicating whether NA values should be removed
- na.return
a logical value indicating whether only the observation with NA values should be shown
- ...
further arguments
Details
The formula is composed of two part:
column_names ~ row_conditions
the left-hand side are the names of the column to select, and the right-hand the operations to select the rows, using the I()
function.
For example:
column_names1 + column_names2 ~ I(column_names1 == "a") + I(column_names2 > 4)
first the rows are selected if the observation in the column_names1
are equal to a
and if the observation in the column_names2
are biggers than 4
, then the column_names1
and column_names2
are returned.
If na.remove
is set to TRUE
, after the subsetting the observations with missing are removed.
Examples
data("airquality")
dt <- airquality
## Selects columns and filter rows
select(from = dt, formula = .~ I(Ozone > 10 & Wind > 10))
#> Ozone Solar.R Wind Temp Month Day
#> 3 12 149 12.6 74 5 3
#> 4 18 313 11.5 62 5 4
#> NA NA NA NA NA NA NA
#> 6 28 NA 14.9 66 5 6
#> 8 19 99 13.8 59 5 8
#> 14 14 274 10.9 68 5 14
#> 15 18 65 13.2 58 5 15
#> 16 14 334 11.5 64 5 16
#> 17 34 307 12.0 66 5 17
#> 19 30 322 11.5 68 5 19
#> 22 11 320 16.6 73 5 22
#> 24 32 92 12.0 61 5 24
#> NA.1 NA NA NA NA NA NA
#> NA.2 NA NA NA NA NA NA
#> 28 23 13 12.0 67 5 28
#> 29 45 252 14.9 81 5 29
#> NA.3 NA NA NA NA NA NA
#> NA.4 NA NA NA NA NA NA
#> 40 71 291 13.8 90 6 9
#> 41 39 323 11.5 87 6 10
#> NA.5 NA NA NA NA NA NA
#> NA.6 NA NA NA NA NA NA
#> NA.7 NA NA NA NA NA NA
#> 47 21 191 14.9 77 6 16
#> 48 37 284 20.7 72 6 17
#> 50 12 120 11.5 73 6 19
#> 51 13 137 10.3 76 6 20
#> NA.8 NA NA NA NA NA NA
#> NA.9 NA NA NA NA NA NA
#> NA.10 NA NA NA NA NA NA
#> NA.11 NA NA NA NA NA NA
#> 67 40 314 10.9 83 7 6
#> 74 27 175 14.9 81 7 13
#> NA.12 NA NA NA NA NA NA
#> 78 35 274 10.3 82 7 17
#> 81 63 220 11.5 85 7 20
#> NA.13 NA NA NA NA NA NA
#> 88 52 82 12.0 86 7 27
#> 100 89 229 10.3 90 8 8
#> NA.14 NA NA NA NA NA NA
#> 104 44 192 11.5 86 8 12
#> 105 28 273 11.5 82 8 13
#> NA.15 NA NA NA NA NA NA
#> 108 22 71 10.3 77 8 16
#> 111 31 244 10.9 78 8 19
#> 112 44 190 10.3 78 8 20
#> 113 21 259 15.5 77 8 21
#> NA.16 NA NA NA NA NA NA
#> 129 32 92 15.5 84 9 6
#> 130 20 252 10.9 80 9 7
#> 131 23 220 10.3 78 9 8
#> 132 21 230 10.9 75 9 9
#> 134 44 236 14.9 81 9 11
#> 135 21 259 15.5 76 9 12
#> 138 13 112 11.5 71 9 15
#> 140 18 224 13.8 67 9 17
#> 141 13 27 10.3 76 9 18
#> 142 24 238 10.3 68 9 19
#> 144 13 238 12.6 64 9 21
#> 146 36 139 10.3 81 9 23
#> 148 14 20 16.6 63 9 25
#> NA.17 NA NA NA NA NA NA
#> 151 14 191 14.3 75 9 28
#> 153 20 223 11.5 68 9 30
select(from = dt, formula = Ozone ~ I(Wind > 10))
#> Ozone
#> 3 12
#> 4 18
#> 5 NA
#> 6 28
#> 8 19
#> 9 8
#> 14 14
#> 15 18
#> 16 14
#> 17 34
#> 18 6
#> 19 30
#> 22 11
#> 24 32
#> 25 NA
#> 26 NA
#> 28 23
#> 29 45
#> 34 NA
#> 37 NA
#> 40 71
#> 41 39
#> 42 NA
#> 45 NA
#> 46 NA
#> 47 21
#> 48 37
#> 50 12
#> 51 13
#> 58 NA
#> 59 NA
#> 60 NA
#> 65 NA
#> 67 40
#> 73 10
#> 74 27
#> 75 NA
#> 76 7
#> 78 35
#> 81 63
#> 84 NA
#> 88 52
#> 94 9
#> 100 89
#> 103 NA
#> 104 44
#> 105 28
#> 107 NA
#> 108 22
#> 111 31
#> 112 44
#> 113 21
#> 114 9
#> 115 NA
#> 129 32
#> 130 20
#> 131 23
#> 132 21
#> 134 44
#> 135 21
#> 137 9
#> 138 13
#> 140 18
#> 141 13
#> 142 24
#> 144 13
#> 146 36
#> 147 7
#> 148 14
#> 150 NA
#> 151 14
#> 153 20
select(from = dt, formula = Ozone + Wind~ I(Ozone > 10))
#> Ozone Wind
#> 1 41 7.4
#> 2 36 8.0
#> 3 12 12.6
#> 4 18 11.5
#> NA NA NA
#> 6 28 14.9
#> 7 23 8.6
#> 8 19 13.8
#> NA.1 NA NA
#> 12 16 9.7
#> 13 11 9.2
#> 14 14 10.9
#> 15 18 13.2
#> 16 14 11.5
#> 17 34 12.0
#> 19 30 11.5
#> 20 11 9.7
#> 22 11 16.6
#> 24 32 12.0
#> NA.2 NA NA
#> NA.3 NA NA
#> NA.4 NA NA
#> 28 23 12.0
#> 29 45 14.9
#> 30 115 5.7
#> 31 37 7.4
#> NA.5 NA NA
#> NA.6 NA NA
#> NA.7 NA NA
#> NA.8 NA NA
#> NA.9 NA NA
#> NA.10 NA NA
#> 38 29 9.7
#> NA.11 NA NA
#> 40 71 13.8
#> 41 39 11.5
#> NA.12 NA NA
#> NA.13 NA NA
#> 44 23 8.0
#> NA.14 NA NA
#> NA.15 NA NA
#> 47 21 14.9
#> 48 37 20.7
#> 49 20 9.2
#> 50 12 11.5
#> 51 13 10.3
#> NA.16 NA NA
#> NA.17 NA NA
#> NA.18 NA NA
#> NA.19 NA NA
#> NA.20 NA NA
#> NA.21 NA NA
#> NA.22 NA NA
#> NA.23 NA NA
#> NA.24 NA NA
#> NA.25 NA NA
#> 62 135 4.1
#> 63 49 9.2
#> 64 32 9.2
#> NA.26 NA NA
#> 66 64 4.6
#> 67 40 10.9
#> 68 77 5.1
#> 69 97 6.3
#> 70 97 5.7
#> 71 85 7.4
#> NA.27 NA NA
#> 74 27 14.9
#> NA.28 NA NA
#> 77 48 6.9
#> 78 35 10.3
#> 79 61 6.3
#> 80 79 5.1
#> 81 63 11.5
#> 82 16 6.9
#> NA.29 NA NA
#> NA.30 NA NA
#> 85 80 8.6
#> 86 108 8.0
#> 87 20 8.6
#> 88 52 12.0
#> 89 82 7.4
#> 90 50 7.4
#> 91 64 7.4
#> 92 59 9.2
#> 93 39 6.9
#> 95 16 7.4
#> 96 78 6.9
#> 97 35 7.4
#> 98 66 4.6
#> 99 122 4.0
#> 100 89 10.3
#> 101 110 8.0
#> NA.31 NA NA
#> NA.32 NA NA
#> 104 44 11.5
#> 105 28 11.5
#> 106 65 9.7
#> NA.33 NA NA
#> 108 22 10.3
#> 109 59 6.3
#> 110 23 7.4
#> 111 31 10.9
#> 112 44 10.3
#> 113 21 15.5
#> NA.34 NA NA
#> 116 45 9.7
#> 117 168 3.4
#> 118 73 8.0
#> NA.35 NA NA
#> 120 76 9.7
#> 121 118 2.3
#> 122 84 6.3
#> 123 85 6.3
#> 124 96 6.9
#> 125 78 5.1
#> 126 73 2.8
#> 127 91 4.6
#> 128 47 7.4
#> 129 32 15.5
#> 130 20 10.9
#> 131 23 10.3
#> 132 21 10.9
#> 133 24 9.7
#> 134 44 14.9
#> 135 21 15.5
#> 136 28 6.3
#> 138 13 11.5
#> 139 46 6.9
#> 140 18 13.8
#> 141 13 10.3
#> 142 24 10.3
#> 143 16 8.0
#> 144 13 12.6
#> 145 23 9.2
#> 146 36 10.3
#> 148 14 16.6
#> 149 30 6.9
#> NA.36 NA NA
#> 151 14 14.3
#> 152 18 8.0
#> 153 20 11.5
## All rows and filter columns
select(from = dt, formula = Ozone ~ .)
#> Ozone
#> 1 41
#> 2 36
#> 3 12
#> 4 18
#> 5 NA
#> 6 28
#> 7 23
#> 8 19
#> 9 8
#> 10 NA
#> 11 7
#> 12 16
#> 13 11
#> 14 14
#> 15 18
#> 16 14
#> 17 34
#> 18 6
#> 19 30
#> 20 11
#> 21 1
#> 22 11
#> 23 4
#> 24 32
#> 25 NA
#> 26 NA
#> 27 NA
#> 28 23
#> 29 45
#> 30 115
#> 31 37
#> 32 NA
#> 33 NA
#> 34 NA
#> 35 NA
#> 36 NA
#> 37 NA
#> 38 29
#> 39 NA
#> 40 71
#> 41 39
#> 42 NA
#> 43 NA
#> 44 23
#> 45 NA
#> 46 NA
#> 47 21
#> 48 37
#> 49 20
#> 50 12
#> 51 13
#> 52 NA
#> 53 NA
#> 54 NA
#> 55 NA
#> 56 NA
#> 57 NA
#> 58 NA
#> 59 NA
#> 60 NA
#> 61 NA
#> 62 135
#> 63 49
#> 64 32
#> 65 NA
#> 66 64
#> 67 40
#> 68 77
#> 69 97
#> 70 97
#> 71 85
#> 72 NA
#> 73 10
#> 74 27
#> 75 NA
#> 76 7
#> 77 48
#> 78 35
#> 79 61
#> 80 79
#> 81 63
#> 82 16
#> 83 NA
#> 84 NA
#> 85 80
#> 86 108
#> 87 20
#> 88 52
#> 89 82
#> 90 50
#> 91 64
#> 92 59
#> 93 39
#> 94 9
#> 95 16
#> 96 78
#> 97 35
#> 98 66
#> 99 122
#> 100 89
#> 101 110
#> 102 NA
#> 103 NA
#> 104 44
#> 105 28
#> 106 65
#> 107 NA
#> 108 22
#> 109 59
#> 110 23
#> 111 31
#> 112 44
#> 113 21
#> 114 9
#> 115 NA
#> 116 45
#> 117 168
#> 118 73
#> 119 NA
#> 120 76
#> 121 118
#> 122 84
#> 123 85
#> 124 96
#> 125 78
#> 126 73
#> 127 91
#> 128 47
#> 129 32
#> 130 20
#> 131 23
#> 132 21
#> 133 24
#> 134 44
#> 135 21
#> 136 28
#> 137 9
#> 138 13
#> 139 46
#> 140 18
#> 141 13
#> 142 24
#> 143 16
#> 144 13
#> 145 23
#> 146 36
#> 147 7
#> 148 14
#> 149 30
#> 150 NA
#> 151 14
#> 152 18
#> 153 20
select(from = dt, formula = Ozone + Wind ~ NULL)
#> Ozone Wind
#> 1 41 7.4
#> 2 36 8.0
#> 3 12 12.6
#> 4 18 11.5
#> 5 NA 14.3
#> 6 28 14.9
#> 7 23 8.6
#> 8 19 13.8
#> 9 8 20.1
#> 10 NA 8.6
#> 11 7 6.9
#> 12 16 9.7
#> 13 11 9.2
#> 14 14 10.9
#> 15 18 13.2
#> 16 14 11.5
#> 17 34 12.0
#> 18 6 18.4
#> 19 30 11.5
#> 20 11 9.7
#> 21 1 9.7
#> 22 11 16.6
#> 23 4 9.7
#> 24 32 12.0
#> 25 NA 16.6
#> 26 NA 14.9
#> 27 NA 8.0
#> 28 23 12.0
#> 29 45 14.9
#> 30 115 5.7
#> 31 37 7.4
#> 32 NA 8.6
#> 33 NA 9.7
#> 34 NA 16.1
#> 35 NA 9.2
#> 36 NA 8.6
#> 37 NA 14.3
#> 38 29 9.7
#> 39 NA 6.9
#> 40 71 13.8
#> 41 39 11.5
#> 42 NA 10.9
#> 43 NA 9.2
#> 44 23 8.0
#> 45 NA 13.8
#> 46 NA 11.5
#> 47 21 14.9
#> 48 37 20.7
#> 49 20 9.2
#> 50 12 11.5
#> 51 13 10.3
#> 52 NA 6.3
#> 53 NA 1.7
#> 54 NA 4.6
#> 55 NA 6.3
#> 56 NA 8.0
#> 57 NA 8.0
#> 58 NA 10.3
#> 59 NA 11.5
#> 60 NA 14.9
#> 61 NA 8.0
#> 62 135 4.1
#> 63 49 9.2
#> 64 32 9.2
#> 65 NA 10.9
#> 66 64 4.6
#> 67 40 10.9
#> 68 77 5.1
#> 69 97 6.3
#> 70 97 5.7
#> 71 85 7.4
#> 72 NA 8.6
#> 73 10 14.3
#> 74 27 14.9
#> 75 NA 14.9
#> 76 7 14.3
#> 77 48 6.9
#> 78 35 10.3
#> 79 61 6.3
#> 80 79 5.1
#> 81 63 11.5
#> 82 16 6.9
#> 83 NA 9.7
#> 84 NA 11.5
#> 85 80 8.6
#> 86 108 8.0
#> 87 20 8.6
#> 88 52 12.0
#> 89 82 7.4
#> 90 50 7.4
#> 91 64 7.4
#> 92 59 9.2
#> 93 39 6.9
#> 94 9 13.8
#> 95 16 7.4
#> 96 78 6.9
#> 97 35 7.4
#> 98 66 4.6
#> 99 122 4.0
#> 100 89 10.3
#> 101 110 8.0
#> 102 NA 8.6
#> 103 NA 11.5
#> 104 44 11.5
#> 105 28 11.5
#> 106 65 9.7
#> 107 NA 11.5
#> 108 22 10.3
#> 109 59 6.3
#> 110 23 7.4
#> 111 31 10.9
#> 112 44 10.3
#> 113 21 15.5
#> 114 9 14.3
#> 115 NA 12.6
#> 116 45 9.7
#> 117 168 3.4
#> 118 73 8.0
#> 119 NA 5.7
#> 120 76 9.7
#> 121 118 2.3
#> 122 84 6.3
#> 123 85 6.3
#> 124 96 6.9
#> 125 78 5.1
#> 126 73 2.8
#> 127 91 4.6
#> 128 47 7.4
#> 129 32 15.5
#> 130 20 10.9
#> 131 23 10.3
#> 132 21 10.9
#> 133 24 9.7
#> 134 44 14.9
#> 135 21 15.5
#> 136 28 6.3
#> 137 9 10.9
#> 138 13 11.5
#> 139 46 6.9
#> 140 18 13.8
#> 141 13 10.3
#> 142 24 10.3
#> 143 16 8.0
#> 144 13 12.6
#> 145 23 9.2
#> 146 36 10.3
#> 147 7 10.3
#> 148 14 16.6
#> 149 30 6.9
#> 150 NA 13.2
#> 151 14 14.3
#> 152 18 8.0
#> 153 20 11.5