## ----HTML, include = FALSE---------------------------------------------------- Zc <- "ZC" nH2O <- "nH2O" ## ----setup, include = FALSE--------------------------------------------------- oldopt <- options(width = 80) ## Use pngquant to optimize PNG images library(knitr) knit_hooks$set(pngquant = hook_pngquant) pngquant <- "--speed=1 --quality=0-25" if (!nzchar(Sys.which("pngquant"))) pngquant <- NULL # To make warnings appear in text box 20230619 # https://selbydavid.com/2017/06/18/rmarkdown-alerts/ knitr::knit_hooks$set( error = function(x, options) { paste('\n\n
', gsub('##', '\n', gsub('^##\ Error', '**Error:**', x)), '
', sep = '\n') }, warning = function(x, options) { paste('\n\n
', gsub('##', '\n', gsub('^##\ Warning:', '**Warning:**', x)), '
', sep = '\n') }, message = function(x, options) { paste('\n\n
', gsub('##', '\n', x), '
', sep = '\n') } ) ## Don't evaluate chunks if phyloseq is not available 20230619 #if(!requireNamespace("phyloseq", quietly = TRUE)) { # knitr::opts_chunk$set(eval = FALSE) # warning("The **phyloseq** package is not available, so this vignette shows only the code without the results.") #} ## ----load_packages, message = FALSE------------------------------------------- require(chem16S) require(phyloseq) require(ggplot2) theme_set(theme_bw()) # For composing plots and making a common legend (plot_layout()) require(patchwork) # For annotating plots with regression coefficients (stat_poly_line()) require(ggpmisc) ## ----load_GlobalPatterns------------------------------------------------------ data(GlobalPatterns) Human = get_variable(GlobalPatterns, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue") sample_data(GlobalPatterns)$Human <- factor(Human) ## ----plot_metrics2.GlobalPatterns, collapse = TRUE---------------------------- p2 <- plot_ps_metrics2(GlobalPatterns, color = "SampleType", shape = "Human", refdb = "RefSeq_206") ## ----plot_metrics2.GlobalPatterns_geom, fig.width = 6, fig.height = 4, fig.align = "center", pngquant = pngquant---- p2 + geom_polygon(aes(fill = SampleType), alpha = 0.5) + geom_point(size = 3) + guides(colour = guide_legend(override.aes = list(shape = c(17, 19, 19, 17, 19, 19, 17, 19, 17)))) ## ----read_Humboldt------------------------------------------------------------ psfile <- system.file("extdata/DADA2-GTDB_220/FEN+22/ps_FEN+22.rds", package = "chem16S") ps <- readRDS(psfile) ps <- prune_samples(sample_names(ps) != "SRR1346095", ps) ps ## ----plot_metrics2.Humboldt, fig.width = 6, fig.height = 4, fig.align = "center", pngquant = pngquant---- plot_ps_metrics2(ps, refdb = "GTDB_220", color = "Location") + geom_polygon(aes(fill = Location), alpha = 0.5) + geom_point(size = 3) ## ----check_patchwork, echo = FALSE-------------------------------------------- # Don't evaluate remaining chunks if patchwork is not available 20230627 if(!requireNamespace("patchwork", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) warning("The **patchwork** package is not available, so the remaining plots are not shown.") } ## ----plot_metrics2.Humboldt_redox_OM, fig.width = 6, fig.height = 5, fig.align = "center", pngquant = pngquant---- p2 <- plot_ps_metrics2(ps, refdb = "GTDB_220", color = "Sediment_redox") + geom_point(size = 4) + labs(color = "Sediment redox (Eh)") p3 <- plot_ps_metrics2(ps, refdb = "GTDB_220", color = "Organic_matter") + geom_point(size = 4) + labs(color = "Organic matter (%)") p2 / p3 ## ----sample.data.and.chemical.metrics.for.communities------------------------- sample.data.and.chemical.metrics.for.communities <- cbind(sample_data(ps), ps_metrics(ps, refdb = "GTDB_220")) ## ----check_ggpmisc, echo = FALSE---------------------------------------------- # Don't evaluate remaining chunks if ggpmisc is not available 20230627 if(!requireNamespace("ggpmisc", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) warning("The **ggpmisc** package is not available, so the remaining plots are not shown.") } ## ----scatter_plot, eval = FALSE----------------------------------------------- # # Defuse (enquo) and Inject (!!) from https://www.tidyverse.org/blog/2018/07/ggplot2-tidy-evaluation/ # # Regression line and equation from https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph # scatter_plot <- function(data = sample.data.and.chemical.metrics.for.communities, x, y, xlab, ylab) { # x <- enquo(x) # y <- enquo(y) # ggplot(data, aes(x = !!x, y = !!y, color = .data[["Location"]])) + # geom_point() + xlab(xlab) + ylab(ylab) + # # Override aes to plot one regression line for samples from all locations # stat_poly_line(aes(x = !!x, y = !!y), inherit.aes = FALSE) + # stat_poly_eq(aes(x = !!x, y = !!y), inherit.aes = FALSE, label.x = "center") # } # # sp1 <- scatter_plot(x = Sediment_redox, y = Zc, xlab = "Sediment redox (mV)", ylab = chemlab("Zc")) # sp2 <- scatter_plot(x = Sediment_redox, y = nH2O, xlab = "Sediment redox (mV)", ylab = chemlab("nH2O")) # sp3 <- scatter_plot(x = Organic_matter, y = Zc, xlab = "Organic matter (%)", ylab = chemlab("Zc")) # sp4 <- scatter_plot(x = Organic_matter, y = nH2O, xlab = "Organic matter (%)", ylab = chemlab("nH2O")) # sp1 + sp2 + sp3 + sp4 + plot_layout(guides = "collect") ## ----scatter_plot, echo = FALSE, fig.width = 7, fig.height = 6, fig.align = "center", pngquant = pngquant---- # Defuse (enquo) and Inject (!!) from https://www.tidyverse.org/blog/2018/07/ggplot2-tidy-evaluation/ # Regression line and equation from https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph scatter_plot <- function(data = sample.data.and.chemical.metrics.for.communities, x, y, xlab, ylab) { x <- enquo(x) y <- enquo(y) ggplot(data, aes(x = !!x, y = !!y, color = .data[["Location"]])) + geom_point() + xlab(xlab) + ylab(ylab) + # Override aes to plot one regression line for samples from all locations stat_poly_line(aes(x = !!x, y = !!y), inherit.aes = FALSE) + stat_poly_eq(aes(x = !!x, y = !!y), inherit.aes = FALSE, label.x = "center") } sp1 <- scatter_plot(x = Sediment_redox, y = Zc, xlab = "Sediment redox (mV)", ylab = chemlab("Zc")) sp2 <- scatter_plot(x = Sediment_redox, y = nH2O, xlab = "Sediment redox (mV)", ylab = chemlab("nH2O")) sp3 <- scatter_plot(x = Organic_matter, y = Zc, xlab = "Organic matter (%)", ylab = chemlab("Zc")) sp4 <- scatter_plot(x = Organic_matter, y = nH2O, xlab = "Organic matter (%)", ylab = chemlab("nH2O")) sp1 + sp2 + sp3 + sp4 + plot_layout(guides = "collect") ## ----load_Qinghai.Tibet------------------------------------------------------- psfile2 <- system.file("extdata/DADA2-GTDB_220/ZFZ+23/ps_ZFZ+23.rds", package = "chem16S") ps2 <- readRDS(psfile2) data.and.metrics <- cbind(sample_data(ps2), ps_metrics(ps2, refdb = "GTDB_220")) ps2 ## ----scatter_plot_2, eval = FALSE--------------------------------------------- # scatter_plot_2 <- function(data = data.and.metrics, x, y, xlab, ylab) { # x <- enquo(x) # y <- enquo(y) # ggplot(data, aes(x = !!x, y = !!y)) + # geom_point() + xlab(xlab) + ylab(ylab) + # stat_poly_line() + # stat_poly_eq(label.x = "center") # } # sp1 <- scatter_plot_2(x = ORP, y = Zc, xlab = "ORP (mV)", ylab = chemlab("Zc")) # sp2 <- scatter_plot_2(x = ORP, y = nH2O, xlab = "ORP (mV)", ylab = chemlab("nH2O")) # sp3 <- scatter_plot_2(x = T, y = Zc, xlab = "T (°C)", ylab = chemlab("Zc")) # sp4 <- scatter_plot_2(x = T, y = nH2O, xlab = "T (°C)", ylab = chemlab("nH2O")) # sp1 + sp2 + sp3 + sp4 ## ----scatter_plot_2, echo = FALSE, fig.width = 7, fig.height = 6, fig.align = "center", pngquant = pngquant---- scatter_plot_2 <- function(data = data.and.metrics, x, y, xlab, ylab) { x <- enquo(x) y <- enquo(y) ggplot(data, aes(x = !!x, y = !!y)) + geom_point() + xlab(xlab) + ylab(ylab) + stat_poly_line() + stat_poly_eq(label.x = "center") } sp1 <- scatter_plot_2(x = ORP, y = Zc, xlab = "ORP (mV)", ylab = chemlab("Zc")) sp2 <- scatter_plot_2(x = ORP, y = nH2O, xlab = "ORP (mV)", ylab = chemlab("nH2O")) sp3 <- scatter_plot_2(x = T, y = Zc, xlab = "T (°C)", ylab = chemlab("Zc")) sp4 <- scatter_plot_2(x = T, y = nH2O, xlab = "T (°C)", ylab = chemlab("nH2O")) sp1 + sp2 + sp3 + sp4 ## ----merge_datasets----------------------------------------------------------- taxa_names(ps2) <- paste0(taxa_names(ps2), "b") ps_merged <- merge_phyloseq(ps, ps2) ps_merged ## ----plot_metrics2.merged, fig.width = 6, fig.height = 4, fig.align = "center", pngquant = pngquant---- sample_data(ps_merged)$Environment <- ifelse(is.na(sample_data(ps_merged)$Depth), "Hot spring", "Marine sediment") plot_ps_metrics2(ps_merged, refdb = "GTDB_220", color = "Environment", shape = "Environment") + geom_point(size = 3) ## ----more_datasets, eval = FALSE---------------------------------------------- # # Get data for the Baltic Sea salinity gradient from Herlemann et al., 2016 # RDPfile <- system.file("extdata/RDP/HLA+16.tab.xz", package = "chem16S") # RDP <- read_RDP(RDPfile) # map <- map_taxa(RDP, refdb = "RefSeq_206") # metrics <- get_metrics(RDP, map, refdb = "RefSeq_206") # mdatfile <- system.file("extdata/metadata/HLA+16.csv", package = "chem16S") # mdat <- get_metadata(mdatfile, metrics) # # Take out brackish samples (6-20 PSU) # ibrackish <- mdat$metadata$pch == 20 # mdat$metadata <- mdat$metadata[!ibrackish, ] # mdat$metrics <- mdat$metrics[!ibrackish, ] # # Keep the values used for the plot # mdat$metadata <- mdat$metadata[, c("name", "pch", "col")] # mdat$metrics <- mdat$metrics[, c("Zc", "nH2O")] # # Change symbols # isalt <- mdat$metadata$pch == 21 # ifresh <- mdat$metadata$pch == 24 # mdat$metadata$pch[isalt] <- 24 # mdat$metadata$pch[ifresh] <- 21 # mdat$metadata$col[isalt] <- 3 # mdat$metadata$col[ifresh] <- 4 # # Append hot spring and sediment values # FEN22 <- readRDS(system.file("extdata/DADA2-GTDB_220/FEN+22/ps_FEN+22.rds", package = "chem16S")) # FEN22 <- prune_samples(sample_names(FEN22) != "SRR1346095", FEN22) # FEN22_metrics <- ps_metrics(FEN22)[, c("Zc", "nH2O")] # ZFZ23 <- readRDS(system.file("extdata/DADA2-GTDB_220/ZFZ+23/ps_ZFZ+23.rds", package = "chem16S")) # ZFZ23_metrics <- ps_metrics(ZFZ23)[, c("Zc", "nH2O")] # mdat$metadata <- rbind(mdat$metadata, # data.frame(name = "sediment", pch = 25, col = rep(7, nrow(FEN22_metrics)))) # mdat$metadata <- rbind(mdat$metadata, # data.frame(name = "hot spring", pch = 22, col = rep(2, nrow(ZFZ23_metrics)))) # mdat$metrics <- rbind(mdat$metrics, FEN22_metrics, ZFZ23_metrics) # # Change colors # mdat$metadata$col[mdat$metadata$col == 2] <- (red <- "#db2828") # mdat$metadata$col[mdat$metadata$col == 3] <- (green <- "#21ba45") # mdat$metadata$col[mdat$metadata$col == 4] <- (blue <- "#2185d0") # mdat$metadata$col[mdat$metadata$col == 7] <- (yellow <- "#fbbd08") # # Create bold axis labels # Zclab <- quote(bolditalic(Z)[bold(C)]) # nH2Olab <- quote(bolditalic(n)[bold(H[2]*O)]) # # Make the plot # par(mar = c(4, 4, 1, 1)) # par(cex.lab = 1.2, mgp = c(2.8, 1, 0)) # pm <- plot_metrics(mdat, title = FALSE, xlab = Zclab, ylab = nH2Olab) # # Add a legend # legend <- c("Hot spring", "Freshwater", "Marine water", "Marine sediment") # pch <- c(22, 21, 24, 25) # pt.bg <- c(red, blue, green, yellow) # legend("bottomleft", legend, pch = pch, col = 1, pt.bg = pt.bg, bg = "white", bty = "n") ## ----more_datasets, message = FALSE, results = "hide", echo = FALSE, fig.width = 5, fig.height = 4, fig.align = "center", pngquant = pngquant---- # Get data for the Baltic Sea salinity gradient from Herlemann et al., 2016 RDPfile <- system.file("extdata/RDP/HLA+16.tab.xz", package = "chem16S") RDP <- read_RDP(RDPfile) map <- map_taxa(RDP, refdb = "RefSeq_206") metrics <- get_metrics(RDP, map, refdb = "RefSeq_206") mdatfile <- system.file("extdata/metadata/HLA+16.csv", package = "chem16S") mdat <- get_metadata(mdatfile, metrics) # Take out brackish samples (6-20 PSU) ibrackish <- mdat$metadata$pch == 20 mdat$metadata <- mdat$metadata[!ibrackish, ] mdat$metrics <- mdat$metrics[!ibrackish, ] # Keep the values used for the plot mdat$metadata <- mdat$metadata[, c("name", "pch", "col")] mdat$metrics <- mdat$metrics[, c("Zc", "nH2O")] # Change symbols isalt <- mdat$metadata$pch == 21 ifresh <- mdat$metadata$pch == 24 mdat$metadata$pch[isalt] <- 24 mdat$metadata$pch[ifresh] <- 21 mdat$metadata$col[isalt] <- 3 mdat$metadata$col[ifresh] <- 4 # Append hot spring and sediment values FEN22 <- readRDS(system.file("extdata/DADA2-GTDB_220/FEN+22/ps_FEN+22.rds", package = "chem16S")) FEN22 <- prune_samples(sample_names(FEN22) != "SRR1346095", FEN22) FEN22_metrics <- ps_metrics(FEN22)[, c("Zc", "nH2O")] ZFZ23 <- readRDS(system.file("extdata/DADA2-GTDB_220/ZFZ+23/ps_ZFZ+23.rds", package = "chem16S")) ZFZ23_metrics <- ps_metrics(ZFZ23)[, c("Zc", "nH2O")] mdat$metadata <- rbind(mdat$metadata, data.frame(name = "sediment", pch = 25, col = rep(7, nrow(FEN22_metrics)))) mdat$metadata <- rbind(mdat$metadata, data.frame(name = "hot spring", pch = 22, col = rep(2, nrow(ZFZ23_metrics)))) mdat$metrics <- rbind(mdat$metrics, FEN22_metrics, ZFZ23_metrics) # Change colors mdat$metadata$col[mdat$metadata$col == 2] <- (red <- "#db2828") mdat$metadata$col[mdat$metadata$col == 3] <- (green <- "#21ba45") mdat$metadata$col[mdat$metadata$col == 4] <- (blue <- "#2185d0") mdat$metadata$col[mdat$metadata$col == 7] <- (yellow <- "#fbbd08") # Create bold axis labels Zclab <- quote(bolditalic(Z)[bold(C)]) nH2Olab <- quote(bolditalic(n)[bold(H[2]*O)]) # Make the plot par(mar = c(4, 4, 1, 1)) par(cex.lab = 1.2, mgp = c(2.8, 1, 0)) pm <- plot_metrics(mdat, title = FALSE, xlab = Zclab, ylab = nH2Olab) # Add a legend legend <- c("Hot spring", "Freshwater", "Marine water", "Marine sediment") pch <- c(22, 21, 24, 25) pt.bg <- c(red, blue, green, yellow) legend("bottomleft", legend, pch = pch, col = 1, pt.bg = pt.bg, bg = "white", bty = "n") ## ----cleanup, include = FALSE------------------------------------------------- options(oldopt)