## ----knitrOpts----------------------------------------------------------------
library(knitr)
suggested_packages <- c("geosphere", "nycflights13", "dplyr", "ggplot2", "microbenchmark")
opts_chunk$set(eval = all(vapply(suggested_packages, requireNamespace, quietly = TRUE, FUN.VALUE = FALSE)))

## ----loadPackages-------------------------------------------------------------
# tryCatch({
#   library(geosphere)
#   library(nycflights13)
#   library(dplyr, warn.conflicts = FALSE)
#   library(ggplot2)
#   library(microbenchmark)
#   library(data.table, warn.conflicts = FALSE)
#   library(magrittr)
#   library(hutils, warn.conflicts = FALSE)
# },
# # requireNamespace does not detect errors like
# # package ‘dplyr’ was installed by an R version with different internals; it needs to be reinstalled for use with this R version
# error = function(e) {
#   opts_chunk$set(eval = FALSE)
# })
# options(digits = 4)

## ----aliases------------------------------------------------------------------
# OR(OR(TRUE,
#       stop("Never happens")),  ## short-circuits
#    AND(FALSE,
#        stop("Never happens")))

## ----compare_if_else----------------------------------------------------------
# my_check <- function(values) {
#   all(vapply(values[-1], function(x) identical(values[[1]], x), logical(1)))
# }
# 
# set.seed(2)
# cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE)
# yes <- sample(letters, size = 100e3, replace = TRUE)
# no <- sample(letters, size = 100e3, replace = TRUE)
# na <- sample(letters, size = 100e3, replace = TRUE)
# 
# microbenchmark(dplyr =  dplyr::if_else(cnd, yes, no, na),
#                hutils = hutils::if_else(cnd, yes, no, na),
#                check = my_check) %>%
#   print
# 
# cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE)
# yes <- sample(letters, size = 1, replace = TRUE)
# no <- sample(letters, size = 100e3, replace = TRUE)
# na <- sample(letters, size = 1, replace = TRUE)
# 
# microbenchmark(dplyr =  dplyr::if_else(cnd, yes, no, na),
#                hutils = hutils::if_else(cnd, yes, no, na),
#                check = my_check) %>%
#   print

## ----compare_coalesce---------------------------------------------------------
# x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
# A <- sample(c(letters, NA), size = 100e3, replace = TRUE)
# B <- sample(c(letters, NA), size = 100e3, replace = TRUE)
# C <- sample(c(letters, NA), size = 100e3, replace = TRUE)
# 
# microbenchmark(dplyr =  dplyr::coalesce(x, A, B, C),
#                hutils = hutils::coalesce(x, A, B, C),
#                check = my_check) %>%
#   print

## ----compare_coalesce_short_circuit_x-----------------------------------------
# x <- sample(c(letters), size = 100e3, replace = TRUE)
# 
# microbenchmark(dplyr =  dplyr::coalesce(x, A, B, C),
#                hutils = hutils::coalesce(x, A, B, C),
#                check = my_check) %>%
#   print

## ----compare_coalesce_short_circuit_A-----------------------------------------
# x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
# A <- sample(c(letters), size = 100e3, replace = TRUE)
# 
# microbenchmark(dplyr =  dplyr::coalesce(x, A, B, C),
#                hutils = hutils::coalesce(x, A, B, C),
#                check = my_check) %>%
#   print

## ----canonical_drop_DT--------------------------------------------------------
# DT <- data.table(A = 1:5, B = 1:5, C = 1:5)
# DT[, A := NULL]

## ----drop_col_hutils----------------------------------------------------------
# DT <- data.table(A = 1:5, B = 1:5, C = 1:5)
# DT %>%
#   drop_col("A") %>%
#   drop_col("B")
# 
# # or
# DT <- data.table(A = 1:5, B = 1:5, C = 1:5)
# DT %>%
#   drop_cols(c("A", "B"))

## ----drop_colr----------------------------------------------------------------
# flights <- as.data.table(flights)
# 
# flights %>%
#   drop_colr("time") %>%
#   drop_colr("arr(?!_delay)", perl = TRUE)

## ----drop_constant_cols-------------------------------------------------------
# flights %>%
#   .[origin == "JFK"] %>%
#   drop_constant_cols

## ----drop_empty_cols----------------------------------------------------------
# planes %>%
#   as.data.table %>%
#   .[!complete.cases(.)]
# 
# planes %>%
#   as.data.table %>%
#   .[!complete.cases(.)] %>%
#   # drops speed
#   drop_empty_cols

## ----duplicated_rows----------------------------------------------------------
# flights %>%
#   # only the 'second' of the duplicates is returned
#   .[duplicated(., by = c("origin", "dest"))]
# 
# flights %>%
#   # Both rows are returned and (by default)
#   # duplicates are presented adjacently
#   duplicated_rows(by = c("origin", "dest"))

## ----haversine_distance-------------------------------------------------------
# DT1 <- data.table(lat_orig = runif(1e5, -80, 80),
#                   lon_orig = runif(1e5, -179, 179),
#                   lat_dest = runif(1e5, -80, 80),
#                   lon_dest = runif(1e5, -179, 179))
# 
# DT2 <- copy(DT1)
# 
# microbenchmark(DT1[, distance := haversine_distance(lat_orig, lon_orig,
#                                                     lat_dest, lon_dest)],
# 
#                DT2[, distance := distHaversine(cbind(lon_orig, lat_orig),
#                                                cbind(lon_orig, lat_orig))])
# rm(DT1, DT2)

## ----mutate-other, results='asis'---------------------------------------------
# set.seed(1)
# DT <- data.table(Fruit = sample(c("apple", "pear", "orange", "tomato", "eggplant"),
#                                 size = 20,
#                                 prob = c(0.45, 0.25, 0.15, 0.1, 0.05),
#                                 replace = TRUE),
#                  Price = rpois(20, 10))
# 
# kable(mutate_other(DT, "Fruit", n = 3)[])

## ----iris-veriscolor----------------------------------------------------------
# iris <- as.data.table(iris)
# iris[Species %in% c("setosa", "versicolour")] %$%
#   mean(Sepal.Length / Sepal.Width)

## ----iris-versicolor, error=TRUE----------------------------------------------
try({
# iris <- as.data.table(iris)
# iris[Species %ein% c("setosa", "versicolour")] %$%
#   mean(Sepal.Length / Sepal.Width)
})

## ----pin----------------------------------------------------------------------
# identical(iris[grep("v", Species)],
#           iris[Species %pin% "v"])

## ----pin-multi----------------------------------------------------------------
# iris[Species %pin% c("ver", "vir")] %>%
#   head

## -----------------------------------------------------------------------------
# DT <- data.table(x = 1:5,
#                  y = letters[1:5],
#                  AB = c(NA, TRUE, FALSE, TRUE, FALSE))
# select_which(DT, anyNA, .and.dots = "y")

## -----------------------------------------------------------------------------
# dt <- data.table(y = !sample(0:1, size = 100, replace = TRUE),
#                  x = runif(100))
# dt[, pred := predict(lm(y ~ x, data = .SD), newdata = .SD)]
# 
# dt[, auc(y, pred)]

## ----select_grep--------------------------------------------------------------
# flights %>%
#   select_grep("arr")

## ----select_grep-and----------------------------------------------------------
# flights %>%
#   select_grep("arr", .and = "year", .but.not = "arr_time")

## -----------------------------------------------------------------------------
# RQ(dplyr, "dplyr must be installed")
# RQ("dplyr", "dplyr needs installing", "dplyr installed.")

## ----ahull-1------------------------------------------------------------------
# if (!identical(Sys.info()[["sysname"]], "Darwin"))
#   ggplot(data.table(x = c(0, 1, 2, 3, 4), y = c(0, 1, 2, 0.1, 0))) +
#   geom_area(aes(x, y)) +
#   geom_rect(data = ahull(, c(0, 1, 2, 3, 4), c(0, 1, 2, 0.1, 0)),
#             aes(xmin = xmin,
#                 xmax = xmax,
#                 ymin = ymin,
#                 ymax = ymax),
#             color = "red")

## ----ahull-demos, fig.width = 8, fig.height = 6-------------------------------
# set.seed(101)
# ahull_dt <-
#   data.table(x = c(0:100) / 100,
#              y = cumsum(rnorm(101, 0.05)))
# if (!identical(Sys.info()[["sysname"]], "Darwin"))
# ggplot(ahull_dt) +
#   geom_area(aes(x, y)) +
#   geom_rect(data = ahull(ahull_dt),
#             aes(xmin = xmin,
#                 xmax = xmax,
#                 ymin = ymin,
#                 ymax = ymax),
#             color = "red") +
#   geom_rect(data = ahull(ahull_dt,
#                          incl_negative = TRUE),
#             aes(xmin = xmin,
#                 xmax = xmax,
#                 ymin = ymin,
#                 ymax = ymax),
#             color = "blue") +
#   geom_rect(data = ahull(ahull_dt,
#                          incl_negative = TRUE,
#                          minH = 4),
#             aes(xmin = xmin,
#                 xmax = xmax,
#                 ymin = ymin,
#                 ymax = ymax),
#             color = "green") +
#   geom_rect(data = ahull(ahull_dt,
#                          incl_negative = TRUE,
#                          minW = 0.25),
#             aes(xmin = xmin,
#                 xmax = xmax,
#                 ymin = ymin,
#                 ymax = ymax),
#             color = "white",
#             fill = NA)
# 
# 

## ----weighted_quantile-ex-----------------------------------------------------
# x <- 1:10
# w <- c(rep(1, 5), rep(2, 5))
# quantile(x, prob = c(0.25, 0.75), names = FALSE)
# 
# weighted_quantile(x, w, p = c(0.25, 0.75))

## ----mutate_ntile-ex----------------------------------------------------------
# flights %>%
#   as.data.table %>%
#   .[, .(year, month, day, origin, dest, distance)] %>%
#   mutate_ntile(distance, n = 5L)
# 

## ----mutate_ntile-ex-charonly-------------------------------------------------
# flights %>%
#   as.data.table %>%
#   .[, .(year, month, day, origin, dest, distance)] %>%
#   mutate_ntile(distance, n = 5L)

## ----mutate_ntile-ex-2--------------------------------------------------------
# flights %>%
#   as.data.table %>%
#   mutate_ntile("distance",
#                n = 5L,
#                character.only = TRUE) %>%
#   .[, dep_delay := coalesce(dep_delay, 0)] %>%
#   .[, .(avgDelay = mean(dep_delay)), keyby = "distanceQuintile"]
# 

## ----longest-affix------------------------------------------------------------
# trim_common_affixes(c("CurrentHousingCosts(weekly)",
#                       "CurrentFuelCosts(weekly)"))

## ----swap---------------------------------------------------------------------
# a <- 1
# b <- 2
# a %<->% b
# identical(c(a, b), c(2, 1))

## ----average-bearing----------------------------------------------------------
# average_bearing(0, 270)  # NW
# mean(c(0, 270))          # SE (i.e. wrong)

## ----Mode-eg------------------------------------------------------------------
# Mode(c(1, 1, 1, 2, 3))

## ----samp-eg------------------------------------------------------------------
# DT <- data.table(x = c(5, 2, 3),
#                  y = c(5, 3, 4))
# DT[, .(Base = sample(.BY[["x"]]:.BY[["y"]])), keyby = .(x, y)]
# DT[, .(Base = samp(.BY[["x"]]:.BY[["y"]])), keyby = .(x, y)]