## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#",
  fig.path = "figures/",
  fig.height = 4.5, 
  fig.width = 6 
)

## ----setup, eval = FALSE------------------------------------------------------
# # Attempt to load a package, if the package was not available, install and load
# if(!require("diemr", character.only = TRUE)){
#     install.packages("diemr", dependencies = TRUE)
#     library("diemr", character.only = TRUE)
# }

## ----eval = FALSE-------------------------------------------------------------
# filepaths <- system.file("extdata", "data7x3.txt", package = "diemr")
# # Analysing six individuals
# samples <- 1:6
# # Assuming diploid markers of all individuals
# ploidies = rep(list(rep(2, nchar(readLines(filepaths[1])[1]) - 1)), length(filepaths))
# CheckDiemFormat(files = filepaths,
#                 ChosenInds = samples,
#                 ploidy = ploidies)
# # File check passed: TRUE
# # Ploidy check passed: TRUE

## ----eval = FALSE-------------------------------------------------------------
# res <- diem(files = filepaths,
#             ploidy = ploidies,
#             markerPolarity = list(c(FALSE, FALSE, TRUE)),
#             ChosenInds = samples,
#             nCores = 1)

## ----eval = FALSE-------------------------------------------------------------
# res$DI
# #   newPolarity         DI   Support Marker
# # 1       FALSE  -4.872256 15.930181      1
# # 2        TRUE  -3.520647 18.633399      2
# # 3        TRUE -13.274822  6.130625      3

## ----eval = FALSE-------------------------------------------------------------
# genotypes <- importPolarized(file = filepaths,
#                              changePolarity = res$markerPolarity,
#                              ChosenInds = samples)
# 
# h <- apply(X = res$I4,
#            MARGIN = 1,
#            FUN = pHetErrOnStateCount)[1, ]
# 
# plotPolarized(genotypes = genotypes,
#               HI = h[samples])

## ----echo = FALSE, fig.width = 6, fig.height = 4.5, fig.align = 'center'------
genotypes <- diemr::importPolarized(file = system.file("extdata", "data7x3.txt",
                         package = "diemr"), 
                             changePolarity = c(FALSE, TRUE, TRUE), 
                             ChosenInds = 1:6)
h <- c(.5,0,.33,.5,.83,1)
diemr::plotPolarized(genotypes = genotypes,
              HI = h)

## ----eval = FALSE-------------------------------------------------------------
# S0011222
# S1210001
# S02221U0

## ----eval = FALSE-------------------------------------------------------------
# filepaths2 <- c(system.file("extdata", "data7x3.txt", package = "diemr"),
#                 system.file("extdata", "data7x10.txt", package = "diemr"))
# 
# ploidies2 <- list(rep(2, 7),
#                   c(2, 1, 2, 2, 2, 1, 2))
# 
# CheckDiemFormat(files = filepaths2,
#                 ChosenInds = samples,
#                 ploidy = ploidies2)
# # File check passed: TRUE
# # Ploidy check passed: TRUE
# 
# # Set random seed for repeatibility of null polarities (optional)
# set.seed(39583782)
# 
# # Run diem with verbose = TRUE to store hybrid indices with ploidy-aware allele counts
# res2 <- diem(files = filepaths2,
#              ploidy = ploidies2,
#              markerPolarity = FALSE,
#              ChosenInds = samples,
#              nCores = 1,
#              verbose = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# # Import polarized genotypes for all compartments
# genotypes2 <- importPolarized(files = filepaths2,
#                        changePolarity = res2$markerPolarity,
#                        ChosenInds = samples)
# 
# # Load individual hybrid indices from a stored file
# h2 <- unlist(read.table("diagnostics/HIwithOptimalPolarities.txt"))
# 
# # Plot the polarised genotypes
# plotPolarized(genotypes = genotypes2,
#               HI = h2[samples])

## ----echo = FALSE, warning = FALSE,fig.width = 6, fig.height = 4.5, fig.align = 'center'----
genotypes2 <- diemr::importPolarized(files = c(system.file("extdata", "data7x3.txt", package = "diemr"), 
                 system.file("extdata", "data7x10.txt", package = "diemr")),
                       changePolarity = c(FALSE, TRUE, TRUE,T,T,T,T,F,T,T,F,T,T), 
                       ChosenInds = 1:6)
h2 <- apply(genotypes2, 1, \(x) diemr::pHetErrOnStateCount(diemr::sStateCount(x)))[1, ]
diemr::plotPolarized(genotypes = genotypes2,
              HI = h2, lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
# # Select a threshold for the top diagnostic markers.
# threshold <- quantile(res2$DI$DI, prob = 0.6)
# # Create a vector identifying the diagnostic markers at the given threshold
# markers <- res2$DI$DI > threshold
# # Import only the selected markers
# genotypes3 <- importPolarized(files = filepaths2,
#                        changePolarity = res2$markerPolarity,
#                        ChosenInds = samples,
#                        ChosenSites = markers)
# # Calculate hybrid index from diagnostic markers
# h3 <- apply(genotypes3, 1, FUN = function(x) pHetErrOnStateCount(sStateCount(x)))[1, ]
# # Plot diagnostic markers
# plotPolarized(genotypes3, h3)

## ----echo = FALSE, warning = FALSE,fig.width = 6, fig.height = 4.5, fig.align = 'center'----
diemr::plotPolarized(genotypes = genotypes2[, c(TRUE,TRUE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE)],
              HI = c(.3,0,.5,.3,.4,1), lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
# install.packages(package = "diemr_1.4.2.tar.gz",
#                  repos = NULL, type = "source")

## ----eval = FALSE-------------------------------------------------------------
# apply(res$I4, MARGIN = 1, FUN = pHetErrOnStateCount)
# #          [,1] [,2]      [,3]      [,4]      [,5]      [,6]
# # p   0.5000000    0 0.3333333 0.5000000 0.8333333 1.0000000
# # Het 0.3333333    0 0.6666667 0.3333333 0.3333333 0.0000000
# # Err 0.0000000    0 0.0000000 0.0000000 0.0000000 0.3333333

## ----eval = FALSE-------------------------------------------------------------
# samples2 <- c(1, 3:6)
# CheckDiemFormat(files = filepaths,
#                 ChosenInds = samples2,
#                 ploidy = ploidies)
# # File check passed: TRUE
# # Ploidy check passed: TRUE
# 
# res2 <- diem(files = filepaths,
#              ChosenInds = samples2,
#              ploidy = ploidies,
#              nCores = 1,
#              markerPolarity = list(c(FALSE, FALSE, TRUE)))
# 
# # calculate hybrid indices from updated I4
# h.res2 <- apply(res2$I4,
#                 MARGIN = 1,
#                 FUN = pHetErrOnStateCount)[1, ]
# 
# # set names for the hybrid index values
# h.res2 <- setNames(h.res2, nm = samples2)
# #    1    3    4    5    6
# # 0.50 0.33 0.50 0.83 1.00
# 
# # calculate the center of the maximum hybrid index change
# diffs <- data.frame(rollmean = zoo::rollmean(sort(h.res2), k = 2),
#                     diff = diff(sort(h.res2), lag = 1))
# h.res2.c <- diffs$rollmean[which.max(diffs$diff)]
# # [1] 0.6666667

## ----eval = FALSE-------------------------------------------------------------
# # Select 40% of markers with the highest diagnostic index
# threshold <- quantile(res2$DI$DI, prob = 0.6)
# genotypes3 <- genotypes2[, res2$DI$DI > threshold]
# # Recalculate I4 and hybrid indices
# h3 <- apply(genotypes3,
#             MARGIN = 1,
#             FUN = \(x) pHetErrOnStateCount(sStateCount(x)))[1, ]
# # Plot the polarised markers
# plotPolarized(genotypes3, h3)

## ----echo = FALSE-------------------------------------------------------------
genotypes3 <- genotypes2[, c(1,2,4,10,11)]
h3 <- apply(genotypes3, 
            MARGIN = 1,
            FUN = \(x) diemr::pHetErrOnStateCount(diemr::sStateCount(x)))[1, ]
diemr::plotPolarized(genotypes3, h3)