Skip to contents

Add new variables by mutating the input variables using a formula.

Usage

add(from, formula, as = NULL,
    position = c("right", "left"),
    na.remove = FALSE, logic_convert = TRUE,...)

Arguments

from

a data.frame object with variables

formula

a formula indicating the operation to create new varibles. Look at the detail section for explanantion.

as

a character vector with names of new variables.

position

if the new varaibles are positioned at the begining (right) or at the left (left) of the data in input.

na.remove

a logical value indicating whether NA values should be removed.

logic_convert

logical value indicating if the new logical varaible are convertet to 0 or 1

...

further arguments

Details

The formula is composed of two part:

~ new_variables

the right-hand are the new varaible to add starting from the existing varaibles, using the I() function.

For example:

~ I(log(column_names1)) + I(column_names2/100)

the column_names1 and log(column_names1) are added to the data.

If na.remove is set ti TRUE, new variables are created, added to the dataset in input and then the observation with missing are removed.

Value

Returns a data.frame object with the original and the new varaibles.

Author

Alessio Serafini

Examples


data("airquality")
dt <- airquality

head(add(from = dt, formula =   ~ log(Ozone)))
#>   Ozone Solar.R Wind Temp Month Day    Var.1
#> 1    41     190  7.4   67     5   1 3.713572
#> 2    36     118  8.0   72     5   2 3.583519
#> 3    12     149 12.6   74     5   3 2.484907
#> 4    18     313 11.5   62     5   4 2.890372
#> 5    NA      NA 14.3   56     5   5       NA
#> 6    28      NA 14.9   66     5   6 3.332205
head(add(from = dt, formula =   ~ log(Ozone) +  log(Wind)))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2
#> 1    41     190  7.4   67     5   1 3.713572 2.001480
#> 2    36     118  8.0   72     5   2 3.583519 2.079442
#> 3    12     149 12.6   74     5   3 2.484907 2.533697
#> 4    18     313 11.5   62     5   4 2.890372 2.442347
#> 5    NA      NA 14.3   56     5   5       NA 2.660260
#> 6    28      NA 14.9   66     5   6 3.332205 2.701361
head(add(from = dt, formula =   ~ log(Ozone), as = "Ozone_1"))
#>   Ozone Solar.R Wind Temp Month Day  Ozone_1
#> 1    41     190  7.4   67     5   1 3.713572
#> 2    36     118  8.0   72     5   2 3.583519
#> 3    12     149 12.6   74     5   3 2.484907
#> 4    18     313 11.5   62     5   4 2.890372
#> 5    NA      NA 14.3   56     5   5       NA
#> 6    28      NA 14.9   66     5   6 3.332205


head(add(from = dt, formula =  Ozone + Wind ~ log()))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2
#> 1    41     190  7.4   67     5   1 3.713572 2.001480
#> 2    36     118  8.0   72     5   2 3.583519 2.079442
#> 3    12     149 12.6   74     5   3 2.484907 2.533697
#> 4    18     313 11.5   62     5   4 2.890372 2.442347
#> 5    NA      NA 14.3   56     5   5       NA 2.660260
#> 6    28      NA 14.9   66     5   6 3.332205 2.701361
head(add(from = dt, formula =  ~ log()))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2    Var.3    Var.4
#> 1    41     190  7.4   67     5   1 3.713572 5.247024 2.001480 4.204693
#> 2    36     118  8.0   72     5   2 3.583519 4.770685 2.079442 4.276666
#> 3    12     149 12.6   74     5   3 2.484907 5.003946 2.533697 4.304065
#> 4    18     313 11.5   62     5   4 2.890372 5.746203 2.442347 4.127134
#> 5    NA      NA 14.3   56     5   5       NA       NA 2.660260 4.025352
#> 6    28      NA 14.9   66     5   6 3.332205       NA 2.701361 4.189655
#>      Var.5     Var.6
#> 1 1.609438 0.0000000
#> 2 1.609438 0.6931472
#> 3 1.609438 1.0986123
#> 4 1.609438 1.3862944
#> 5 1.609438 1.6094379
#> 6 1.609438 1.7917595
head(add(from = dt, formula =  .~ log(), position = "left"))
#>      Var.1    Var.2    Var.3    Var.4    Var.5     Var.6 Ozone Solar.R Wind
#> 1 3.713572 5.247024 2.001480 4.204693 1.609438 0.0000000    41     190  7.4
#> 2 3.583519 4.770685 2.079442 4.276666 1.609438 0.6931472    36     118  8.0
#> 3 2.484907 5.003946 2.533697 4.304065 1.609438 1.0986123    12     149 12.6
#> 4 2.890372 5.746203 2.442347 4.127134 1.609438 1.3862944    18     313 11.5
#> 5       NA       NA 2.660260 4.025352 1.609438 1.6094379    NA      NA 14.3
#> 6 3.332205       NA 2.701361 4.189655 1.609438 1.7917595    28      NA 14.9
#>   Temp Month Day
#> 1   67     5   1
#> 2   72     5   2
#> 3   74     5   3
#> 4   62     5   4
#> 5   56     5   5
#> 6   66     5   6

head(add(from = dt, formula =  .~ log(), na.remove = TRUE))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2    Var.3    Var.4
#> 1    41     190  7.4   67     5   1 3.713572 5.247024 2.001480 4.204693
#> 2    36     118  8.0   72     5   2 3.583519 4.770685 2.079442 4.276666
#> 3    12     149 12.6   74     5   3 2.484907 5.003946 2.533697 4.304065
#> 4    18     313 11.5   62     5   4 2.890372 5.746203 2.442347 4.127134
#> 7    23     299  8.6   65     5   7 3.135494 5.700444 2.151762 4.174387
#> 8    19      99 13.8   59     5   8 2.944439 4.595120 2.624669 4.077537
#>      Var.5     Var.6
#> 1 1.609438 0.0000000
#> 2 1.609438 0.6931472
#> 3 1.609438 1.0986123
#> 4 1.609438 1.3862944
#> 7 1.609438 1.9459101
#> 8 1.609438 2.0794415

head(add(from = dt, formula =   ~ I((Ozone>5))))
#>   Ozone Solar.R Wind Temp Month Day Var.1
#> 1    41     190  7.4   67     5   1     1
#> 2    36     118  8.0   72     5   2     1
#> 3    12     149 12.6   74     5   3     1
#> 4    18     313 11.5   62     5   4     1
#> 5    NA      NA 14.3   56     5   5    NA
#> 6    28      NA 14.9   66     5   6     1
head(add(from = dt, formula =   ~ I((Ozone>5)), logic_convert = FALSE ))
#>   Ozone Solar.R Wind Temp Month Day Var.1
#> 1    41     190  7.4   67     5   1  TRUE
#> 2    36     118  8.0   72     5   2  TRUE
#> 3    12     149 12.6   74     5   3  TRUE
#> 4    18     313 11.5   62     5   4  TRUE
#> 5    NA      NA 14.3   56     5   5    NA
#> 6    28      NA 14.9   66     5   6  TRUE

head(add(from = dt, formula = Ozone + Wind ~ C(Ozone-Ozone)))
#>   Ozone Solar.R Wind Temp Month Day Var.1 Var.2
#> 1    41     190  7.4   67     5   1     0     0
#> 2    36     118  8.0   72     5   2     0     0
#> 3    12     149 12.6   74     5   3     0     0
#> 4    18     313 11.5   62     5   4     0     0
#> 5    NA      NA 14.3   56     5   5    NA    NA
#> 6    28      NA 14.9   66     5   6     0     0
head(add(from = dt, formula =  ~ C(log(Ozone))))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2    Var.3    Var.4
#> 1    41     190  7.4   67     5   1 3.713572 3.713572 3.713572 3.713572
#> 2    36     118  8.0   72     5   2 3.583519 3.583519 3.583519 3.583519
#> 3    12     149 12.6   74     5   3 2.484907 2.484907 2.484907 2.484907
#> 4    18     313 11.5   62     5   4 2.890372 2.890372 2.890372 2.890372
#> 5    NA      NA 14.3   56     5   5       NA       NA       NA       NA
#> 6    28      NA 14.9   66     5   6 3.332205 3.332205 3.332205 3.332205
#>      Var.5    Var.6
#> 1 3.713572 3.713572
#> 2 3.583519 3.583519
#> 3 2.484907 2.484907
#> 4 2.890372 2.890372
#> 5       NA       NA
#> 6 3.332205 3.332205
head(add(from = dt, formula =  ~ C(5)))
#>   Ozone Solar.R Wind Temp Month Day Var.1 Var.2 Var.3 Var.4 Var.5 Var.6
#> 1    41     190  7.4   67     5   1     5     5     5     5     5     5
#> 2    36     118  8.0   72     5   2     5     5     5     5     5     5
#> 3    12     149 12.6   74     5   3     5     5     5     5     5     5
#> 4    18     313 11.5   62     5   4     5     5     5     5     5     5
#> 5    NA      NA 14.3   56     5   5     5     5     5     5     5     5
#> 6    28      NA 14.9   66     5   6     5     5     5     5     5     5
head(add(from = dt, formula = Ozone + Wind ~ C(Ozone-Ozone)))
#>   Ozone Solar.R Wind Temp Month Day Var.1 Var.2
#> 1    41     190  7.4   67     5   1     0     0
#> 2    36     118  8.0   72     5   2     0     0
#> 3    12     149 12.6   74     5   3     0     0
#> 4    18     313 11.5   62     5   4     0     0
#> 5    NA      NA 14.3   56     5   5    NA    NA
#> 6    28      NA 14.9   66     5   6     0     0
head(add(from = dt, formula =  Ozone + Wind ~ C(log(Ozone))))
#>   Ozone Solar.R Wind Temp Month Day    Var.1    Var.2
#> 1    41     190  7.4   67     5   1 3.713572 3.713572
#> 2    36     118  8.0   72     5   2 3.583519 3.583519
#> 3    12     149 12.6   74     5   3 2.484907 2.484907
#> 4    18     313 11.5   62     5   4 2.890372 2.890372
#> 5    NA      NA 14.3   56     5   5       NA       NA
#> 6    28      NA 14.9   66     5   6 3.332205 3.332205



foo <- function(x, a = 100){return(x-x + a)}

head(add(from = dt, formula =  Ozone + Month~ I(foo(a = 100))))
#> Error in foo(Ozone, a = 100): could not find function "foo"
head(add(from = dt, formula =  Ozone + Month~ foo()))
#> Error in foo(Ozone, ): could not find function "foo"
head(add(from = dt, formula =  ~ I(foo(Ozone, a = 100))))
#> Error in foo(Ozone, Ozone, a = 100): could not find function "foo"