## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE ) ## ----------------------------------------------------------------------------- library(missRanger) set.seed(3) iris_NA <- generateNA(iris, p = 0.1) head(iris_NA) imp <- missRanger(iris_NA, num.trees = 100) head(imp) ## ----------------------------------------------------------------------------- imp <- missRanger(iris_NA, pmm.k = 5, num.trees = 100, verbose = 0) head(imp) ## ----------------------------------------------------------------------------- imp <- missRanger(iris_NA, pmm.k = 5, num.trees = 200, mtry = 1, verbose = 0) ## ----------------------------------------------------------------------------- imp <- missRanger( iris_NA, pmm.k = 5, num.trees = 100, keep_forests = TRUE, verbose = 0 ) imp summary(imp) # Out-of-sample application # saveRDS(imp, file = "imputation_model.rds") # imp <- readRDS("imputation_model.rds") predict(imp, head(iris_NA)) ## ----------------------------------------------------------------------------- # Impute all variables with all (default) m <- missRanger(iris_NA, formula = . ~ ., pmm.k = 5, num.trees = 100, verbose = 0) # Don't use Species for imputation m <- missRanger(iris_NA, . ~ . - Species, pmm.k = 5, num.trees = 100, verbose = 0) # Impute Sepal.Length by Species (or not?) m <- missRanger(iris_NA, Sepal.Length ~ Species, pmm.k = 5, num.trees = 100) head(m) # Only univariate imputation was done! Why? Because Species contains missing values # itself and needs to appear on the LHS as well: m <- missRanger(iris_NA, Sepal.Length + Species ~ Species, pmm.k = 5, num.trees = 100) head(m) # Impute all variables univariately m <- missRanger(iris_NA, . ~ 1, verbose = 0)