## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(data.table) library(xgboost) library(xgb2sql) df <- data.frame(ggplot2::diamonds) head(df) ## ----------------------------------------------------------------------------- out <- onehot2sql(df) print(out$meta) head(out$model.matrix) ## ----------------------------------------------------------------------------- cat(out$sql) ## ----------------------------------------------------------------------------- d2 <- data.table(ggplot2::diamonds) # change column class d2[, cut:=factor(cut, ordered=FALSE)] d2[, clarity:=as.character(clarity)] # create IDate column d2[, tsdt:=as.IDate('2017-01-05')] d2[1:3, tsdt:=tsdt-1] # add NAs d2[1, clarity:=NA] d2[2, depth:=NA] head(d2) out2 <- onehot2sql(d2) head(out2$model.matrix) ## ----------------------------------------------------------------------------- newdata <- d2[1:5,] # newdata has columns with new elements newdata[5, clarity:='NEW']; newdata[1,tsdt:=as.IDate('2018-05-01')] # newdata has a new column newdata[, new_col:=1] # newdata is lacking a column newdata[, cut:=NULL] head(newdata) onehot2sql(newdata, meta=out2$meta)$model.matrix ## ----------------------------------------------------------------------------- x <- out$model.matrix[,colnames(out$model.matrix)!='price'] y <- out$model.matrix[,colnames(out$model.matrix)=='price'] bst <- xgboost(x = x, y = y, max_depth = 2, learning_rate = .3, nrounds = 2, objective = 'reg:squarederror') booster2sql(bst, output_file_name='xgb.txt') ## ----warning=FALSE, message=FALSE--------------------------------------------- cat(readChar('xgb.txt', file.info('xgb.txt')$size)) ## ----------------------------------------------------------------------------- xgb.dump(bst) ## ----------------------------------------------------------------------------- booster2sql(bst, output_file_name='onehot-xgb.txt', input_onehot_query=out$sql) ## ----warning=FALSE, message=FALSE--------------------------------------------- cat(readChar('onehot-xgb.txt', file.info('onehot-xgb.txt')$size))