## ----setup, include = FALSE---------------------------------------------------
oldopt <- options(width = 80)
## Use pngquant to optimize PNG images
library(knitr)
knit_hooks$set(pngquant = hook_pngquant)
pngquant <- "--speed=1 --quality=0-25"
if (!nzchar(Sys.which("pngquant"))) pngquant <- NULL
## ----HTML, include = FALSE----------------------------------------------------
Zc <- "ZC"
nH2O <- "nH2O"
## ----load_packages, message = FALSE-------------------------------------------
library(chem16S)
## ----taxon_AA-----------------------------------------------------------------
taxon_AA <- read.csv(system.file("RefDB/RefSeq_206/taxon_AA.csv.xz", package = "chem16S"))
ranks <- taxon_AA$protein
table(ranks)[unique(ranks)]
## ----Zc_boxplot, fig.width = 5, fig.height = 5, fig.align = "center", pngquant = pngquant----
taxon_Zc <- canprot::calc_metrics(taxon_AA, "Zc")[, 1]
Zc_list <- sapply( unique(ranks), function(rank) taxon_Zc[ranks == rank] )
opar <- par(mar = c(4, 7, 1, 1))
boxplot(Zc_list, horizontal = TRUE, las = 1, xlab = chemlab("Zc"))
par(opar)
## ----phylum_to_genus----------------------------------------------------------
taxnames <- read.csv(system.file("RefDB/RefSeq_206/taxonomy.csv.xz", package = "chem16S"))
phylum_to_genus <- function(phylum) na.omit(unique(taxnames$genus[taxnames$phylum == phylum]))
get_Zc <- function(genera) na.omit(taxon_Zc[match(genera, taxon_AA$organism)])
sapply(sapply(sapply(c("Crenarchaeota", "Euryarchaeota"), phylum_to_genus), get_Zc), mean)
## ----class_to_genus-----------------------------------------------------------
class_to_genus <- function(class) na.omit(unique(taxnames$genus[taxnames$class == class]))
sapply(sapply(sapply(c("Methanococci", "Halobacteria"), class_to_genus), get_Zc), mean)
## ----phylum_Zc, fig.width = 7, fig.height = 6, fig.align = "center", pngquant = pngquant----
taxnames2 <- taxnames[taxnames$superkingdom != "Viruses", ]
taxnames3 <- taxnames2[!duplicated(taxnames2$genus), ]
(top20_phyla <- head(sort(table(taxnames3$phylum), decreasing = TRUE), 20))
Zc_list <- sapply(sapply(names(top20_phyla), phylum_to_genus), get_Zc)
order_Zc <- order(sapply(Zc_list, mean))
Zc_list <- Zc_list[order_Zc]
opar <- par(mar = c(4, 13, 1, 1))
boxplot(Zc_list, horizontal = TRUE, las = 1, xlab = chemlab("Zc"))
par(opar)
## ----class_Zc, fig.width = 10, fig.height = 5, fig.align = "center", pngquant = pngquant----
opar <- par(mfrow = c(1, 2), mar = c(4, 10, 1, 1))
for(phylum in c("Euryarchaeota", "Proteobacteria")) {
taxnames4 <- taxnames3[taxnames3$phylum == phylum, ]
classes <- na.omit(unique(taxnames4$class))
Zc_list <- sapply(sapply(classes, class_to_genus), get_Zc)
order_Zc <- order(sapply(Zc_list, mean))
Zc_list <- Zc_list[order_Zc]
boxplot(Zc_list, horizontal = TRUE, las = 1, xlab = chemlab("Zc"))
}
par(opar)
## ----class_nH2O, fig.width = 10, fig.height = 5, fig.align = "center", pngquant = pngquant----
taxon_nH2O <- canprot::calc_metrics(taxon_AA, "nH2O")[, 1]
get_nH2O <- function(genera) na.omit(taxon_nH2O[match(genera, taxon_AA$organism)])
opar <- par(mfrow = c(1, 2), mar = c(4, 10, 1, 1))
for(phylum in c("Euryarchaeota", "Proteobacteria")) {
taxnames4 <- taxnames3[taxnames3$phylum == phylum, ]
classes <- na.omit(unique(taxnames4$class))
nH2O_list <- sapply(sapply(classes, class_to_genus), get_nH2O)
order_nH2O <- order(sapply(nH2O_list, mean))
nH2O_list <- nH2O_list[order_nH2O]
boxplot(nH2O_list, horizontal = TRUE, las = 1, xlab = chemlab("nH2O"))
}
par(opar)
## ----viruses_top50------------------------------------------------------------
(top50_phyla <- head(sort(table(taxnames$phylum), decreasing = TRUE), 50))
## ----viruses_plot, fig.width = 7, fig.height = 5, fig.align = "center", pngquant = pngquant----
Zc_mean <- sapply(sapply(sapply(names(top50_phyla), phylum_to_genus), get_Zc), mean)
nH2O_mean <- sapply(sapply(sapply(names(top50_phyla), phylum_to_genus), get_nH2O), mean)
domain <- taxnames$superkingdom[match(names(top50_phyla), taxnames$phylum)]
pchs <- c(24, 21, 23)
pch <- sapply(domain, switch, Archaea = pchs[1], Bacteria = pchs[2], Viruses = pchs[3])
bgs <- topo.colors(3, alpha = 0.5)
bg <- sapply(domain, switch, Archaea = bgs[1], Bacteria = bgs[2], Viruses = bgs[3])
opar <- par(mar = c(4, 4, 1, 1))
plot(Zc_mean, nH2O_mean, xlab = chemlab("Zc"), ylab = chemlab("nH2O"), pch = pch, bg = bg)
ilow <- nH2O_mean < -0.77 & domain == "Bacteria"
xadj <- c(-0.9, -0.8, 0.8, 1, -0.8)
yadj <- c(0, 1, 1, -1, -1)
text(Zc_mean[ilow] + 0.02 * xadj, nH2O_mean[ilow] + 0.005 * yadj, names(top50_phyla[ilow]), cex = 0.9)
legend("bottomleft", c("Archaea", "Bacteria", "Viruses"), pch = pchs, pt.bg = bgs)
par(opar)
## ----other_metrics, fig.width = 8, fig.height = 5, fig.align = "center", pngquant = pngquant----
AAcomp <- taxon_AA[match(classes, taxon_AA$organism), ]
metrics <- canprot::calc_metrics(AAcomp, c("HC", "OC", "NC", "SC", "GRAVY", "pI", "MW", "plength"))
layout(rbind(c(1, 2, 5), c(3, 4, 5)), widths = c(2, 2, 1.5))
opar <- par(mar = c(4.5, 4, 1, 1), cex = 1)
plot(metrics$OC, metrics$HC, col = 1:10, pch = 1:10, xlab = "O/C", ylab = "H/C")
plot(metrics$NC, metrics$SC, col = 1:10, pch = 1:10, xlab = "N/C", ylab = "S/C")
plot(metrics$pI, metrics$GRAVY, col = 1:10, pch = 1:10, xlab = "pI", ylab = "GRAVY")
plot(metrics$plength, metrics$MW, col = 1:10, pch = 1:10, xlab = "Length", ylab = "MW")
plot.new()
legend("right", classes, col = 1:10, pch = 1:10, bty = "n", xpd = NA)
par(opar)
## ----cleanup, include = FALSE-------------------------------------------------
options(oldopt)