## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----libraries, message=FALSE------------------------------------------------- library(PHEindicatormethods) library(dplyr) library(tidyr) ## ----Execute SQL Query and load results into r object------------------------- pops <- data.frame( indicator = rep(c("Ind1", "Ind2", "Ind3", "Ind4"), each = 19 * 2 * 5), period = rep(2012:2016, each = 19 * 2, times = 4), region = rep(rep(c("Area1", "Area2"), each = 19), times = 20), ageband = rep(c(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90), times = 40), pop = sample(10000:20000, 19 * 2 * 5 * 4, replace = TRUE), esp2013 = rep(esp2013, times = 40)) head(pops) deaths <- data.frame( indicator = rep(c("Ind1", "Ind2", "Ind3", "Ind4"), each = 19 * 2 * 5), period = rep(2012:2016, each = 19 * 2), region = rep(rep(c("Area1", "Area2"), each = 19), times = 5), ageband = rep(c(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90), times = 10), dths = sample(200, 19 * 2 * 5 * 4, replace = TRUE)) head(deaths) ## ----create reference column-------------------------------------------------- df <- left_join(pops, deaths, by = c("indicator", "period", "region", "ageband")) %>% group_by(indicator, period, region) ## ----calculate DSRs----------------------------------------------------------- calculate_dsr( data = df, x = dths, # name of field containing count of events n = pop, # name of field containing population denominators stdpop = esp2013 # name of field containing standard populations ) ## ----alternative dsr---------------------------------------------------------- calculate_dsr( data = df, x = dths, n = pop, stdpop = esp2013, type = "standard", confidence = 99.8, multiplier = 10000 ) ## ----specify stdpop as field name--------------------------------------------- # duplicate data for males and females and apply different standard populations # to each sex df_f <- df %>% mutate( sex = "F", esp_dummy = c(5000, 5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 5500, 5000, 4500, 4000, 3000, 2000, 1500) ) df_m <- df %>% mutate( sex = "M", esp_dummy = c(5000, 5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 6500, 6000, 5500, 4000, 2000, 1000, 500) ) df_mf <- df_f %>% bind_rows(df_m) %>% group_by(sex, .add = TRUE) %>% select(!"esp2013") # add sex to the grouping variables then calculate the DSRs dsrs_mf <- calculate_dsr( df_mf, x = dths, n = pop, stdpop = esp_dummy ) head(dsrs_mf) ## ----------------------------------------------------------------------------- # Generate some dummy data # breakdown original dataset to show event frequencies and to count unique individuals df_freq <- df %>% mutate( f3 = floor((dths * 0.1) / 3), # 10% of events in individuals with 3 events f2 = floor((dths * 0.2) / 2), # 20% of events in individuals with 2 events f1 = (dths - (3 * f3) - (2 * f2))) %>% # 70% of events in individuals with 1 event select(!"dths") %>% pivot_longer( cols = c("f1", "f2", "f3"), names_to = "eventfrequency", values_to = "uniqueindividuals", names_prefix = "f") %>% mutate(eventfrequency = as.integer(eventfrequency) ) # calculate the dsrs - notice that output DSRs values match those calculated # earlier for the same data frame but confidence intervals are wider df_freq %>% group_by(eventfrequency, .add = TRUE) %>% calculate_dsr( x = uniqueindividuals, # count of unique individuals experiencing the frequency of events in eventfreq n = pop, stdpop = esp2013, independent_events = FALSE, # calculate CIs assuming events are not independent eventfreq = eventfrequency, # name of column containing the event frequencies (e.g. 1, 2, ...) ageband = ageband # name of column containing age bands ) ## ----test data---------------------------------------------------------------- pops2 <- data.frame( ageband = c( 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90), pop = c(30, 35, 35, 35, 40, 40, 45, 50, 50, 50, 60, 60, 70, 75, 70, 60, 20, 20, 15), esp2013 = esp2013 ) deaths2 <- data.frame( ageband = c(0, 5, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90), dths = c(1, 1, 1, 1, 3, 3, 3, 3, 10, 10, 10, 10, 8, 8, 8, 8) ) df2 <- left_join(pops2, deaths2, by = "ageband") head(df2) calculate_dsr( df2, x = dths, n = pop, stdpop = esp2013 )