Rename variable
data <- use_data_titanic(count = FALSE)
glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
data <- data %>% clean_var(Age, name = "age")
glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
Replace NA values
data <- use_data_beer()
data %>% describe(energy_kcal_100ml)
#> variable = energy_kcal_100ml
#> type = double
#> na = 11 of 161 (6.8%)
#> unique = 34
#> min|max = 20 | 62
#> q05|q95 = 24 | 56.65
#> q25|q75 = 37 | 44
#> median = 42
#> mean = 39.89333
Set min max values
data <- create_data_person()
data %>% describe(age)
#> variable = age
#> type = integer
#> na = 0 of 1 000 (0%)
#> unique = 80
#> min|max = 16 | 95
#> q05|q95 = 21 | 92
#> q25|q75 = 37 | 76
#> median = 55
#> mean = 55.845
Rescale 0 to 1
Cleaning text
Drop variables
-
drop_var_no_variance()
Drop all variables with no variance -
drop_var_not_numeric()
Drop all not numeric variables -
drop_var_low_variance()
Drop all variables with low variance -
drop_var_by_names()
Drop variables by name -
drop_var_with_na()
Drop all variables with NA-values
data <- use_data_beer()
data %>% describe_tbl()
#> 161 observations with 11 variables
#> 19 observations containing missings (NA)
#> 5 variables containing missings (NA)
#> 1 variables with no variance
data %>%
drop_var_no_variance() %>%
describe_tbl()
#> 161 observations with 10 variables
#> 19 observations containing missings (NA)
#> 5 variables containing missings (NA)
#> 0 variables with no variance
data %>%
drop_var_with_na() %>%
describe_tbl()
#> 161 observations with 6 variables
#> 0 observations containing missings (NA)
#> 0 variables containing missings (NA)
#> 1 variables with no variance
Drop observations
-
drop_obs_with_na()
Drop all observations with NA-values
data %>%
drop_obs_with_na() %>%
describe_tbl()
#> 142 observations with 11 variables
#> 0 observations containing missings (NA)
#> 0 variables containing missings (NA)
#> 1 variables with no variance
-
drop_obs_if()
Drop all observations where expression is true
data %>%
count_pct(type)
#> # A tibble: 3 × 4
#> type n total pct
#> <chr> <int> <int> <dbl>
#> 1 Alkoholfrei 27 161 16.8
#> 2 Bock 8 161 4.97
#> 3 Rest 126 161 78.3
data %>%
drop_obs_if(type == "Alkoholfrei") %>%
count_pct(type)
#> # A tibble: 2 × 4
#> type n total pct
#> <chr> <int> <int> <dbl>
#> 1 Bock 8 134 5.97
#> 2 Rest 126 134 94.0