## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----cooc_data---------------------------------------------------------------- library(nlpembeds) df_ehr = data.frame(Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4), Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4), Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2', 'C3', 'C4'), Count = 1:9) df_ehr ## ----cooc_call---------------------------------------------------------------- spm_cooc = build_df_cooc(df_ehr) spm_cooc ## ----cooc_month1-------------------------------------------------------------- cooc_1 = build_df_cooc(subset(df_ehr, Patient == 1 & Month == 1), min_code_freq = 0) cooc_1 ## ----cooc_month2-------------------------------------------------------------- cooc_2 = build_df_cooc(subset(df_ehr, Patient == 1 & Month == 3)) cooc_2 ## ----cooc_sum----------------------------------------------------------------- cooc_1 + cooc_2 ## ----pmi---------------------------------------------------------------------- m_pmi = get_pmi(spm_cooc) m_pmi ## ----svd---------------------------------------------------------------------- m_svd = get_svd(m_pmi, embedding_dim = 2) m_svd ## ----sql_data----------------------------------------------------------------- library(RSQLite) test_db_path = tempfile() test_db = dbConnect(SQLite(), test_db_path) dbWriteTable(test_db, 'df_monthly', df_ehr, overwrite = TRUE) ### # optional, done automatically by sql_cooc if table 'df_uniq_codes' not found # and parameter autoindex set to TRUE dbExecute(test_db, "CREATE INDEX patient_idx ON df_monthly (Patient)") df_uniq_codes = unique(df_ehr['Parent_Code']) dbWriteTable(test_db, 'df_uniq_codes', df_uniq_codes, overwrite = TRUE) ### dbDisconnect(test_db) ## ----sql_cooc----------------------------------------------------------------- output_db_path = tempfile() sql_cooc(input_path = test_db_path, output_path = output_db_path) ## ----read_sql----------------------------------------------------------------- test_db = dbConnect(SQLite(), output_db_path) spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;') dbDisconnect(test_db) spm_cooc ## ----sql_pmi------------------------------------------------------------------ m_pmi = get_pmi(spm_cooc) m_pmi ## ----read_sql_cooc------------------------------------------------------------ spm_cooc = build_spm_cooc_sym(spm_cooc) m_cooc = as.matrix(spm_cooc) m_cooc ## ----dicts_data--------------------------------------------------------------- df_ehr$Parent_Code %<>% ifelse(. == 'C1', 'C0000545', .) df_ehr$Parent_Code %<>% ifelse(. == 'C2', 'C0000578', .) df_ehr ## ----dicts_data_write--------------------------------------------------------- test_db_path = tempfile() test_db = dbConnect(SQLite(), test_db_path) dbWriteTable(test_db, 'df_monthly', df_ehr) dbDisconnect(test_db) ## ----dicts_cooc--------------------------------------------------------------- codes_dict_fpaths = list.files(system.file('dictionaries', package = 'nlpembeds'), full.names = TRUE) sql_cooc(input_path = test_db_path, output_path = output_db_path, exclude_dict_pattern = 'C[0-9]', codes_dict_fpaths = codes_dict_fpaths, autoindex = TRUE, overwrite_output = TRUE) ## ----dicts_cooc_read---------------------------------------------------------- test_db = dbConnect(SQLite(), output_db_path) spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;') dbDisconnect(test_db) spm_cooc ## ----oom_sql_pmi-------------------------------------------------------------- # m_pmi = get_pmi(spm_cooc) # m_pmi ## ----oom_read_sql_cooc-------------------------------------------------------- # spm_cooc = build_spm_cooc_sym(spm_cooc) # m_cooc = as.matrix(spm_cooc) # m_cooc ## ----------------------------------------------------------------------------- # remotes::install_git('https://github.com/jwood000/RcppAlgos@v2.4.0.git')