Chapter 9 Other dataset
9.1 Fungi data
Another ITS sequencing dataset (Gao et al. 2019) is also stored in the example data of the package. Here, we use it as an example to show the use of FUNGuild database(Nguyen et al. 2016). FungalTraits (Põlme et al. 2020) database is also available for identifying fungal traits.
library(microeco)
library(magrittr)
# load ITS data
data(sample_info_ITS)
data(otu_table_ITS)
data(taxonomy_table_ITS)
# create microtable object
<- microtable$new(sample_table = sample_info_ITS, otu_table = otu_table_ITS, tax_table = taxonomy_table_ITS)
mt_fungi # remove the taxa not assigned in the Kingdom "k__Fungi"
$tax_table %<>% base::subset(Kingdom == "k__Fungi")
mt_fungi# use tidy_dataset() to make OTUs and samples information consistent across files
$tidy_dataset() mt_fungi
Then, let’s perform the fungal trait prediction.
<- trans_func$new(mt_fungi)
t1 # Both FungalTraits and FUNGuild are available
$cal_spe_func(fungi_database = "FungalTraits")
t1$cal_spe_func_perc(abundance_weighted = TRUE)
t1# return: t1$res_spe_func_perc
Then we show an example on the differential test of traits across groups.
# clone mt_fungi for differential test
<- clone(mt_fungi)
tmp_mt # we need assign the table to taxa_abund manually
$taxa_abund$func <- as.data.frame(t(t1$res_spe_func_perc), check.names = FALSE)
tmp_mt# use trans_diff class to perform differential test
<- trans_diff$new(dataset = tmp_mt, method = "anova", group = "sample_type", taxa_level = "func")
t2 $plot_diff_abund(add_sig = T, simplify_names = FALSE) + ggplot2::ylab("Relative abundance (%)") t2
Then we show the distribution of traits across modules in network.
# create trans_network object
<- trans_network$new(dataset = mt_fungi, cal_cor = "WGCNA", taxa_level = "OTU", filter_thres = 0.000001, cor_method = "spearman")
t1 # create correlation network
$cal_network(COR_p_thres = 0.05, COR_cut = 0.6)
t1# add modules
$cal_module()
t1# convert module info to microtable object
<- t1$trans_comm(use_col = "module")
meco_module # create trans_func object
<- trans_func$new(meco_module)
t2 # identify species traits, automatically select database for prokaryotes or fungi
# fungi_database = "FungalTraits" for the FungalTraits database
$cal_spe_func(fungi_database = "FUNGuild")
t2# calculate abundance-unweighted functional redundancy of each trait for each network module
$cal_spe_func_perc(abundance_weighted = FALSE)
t2# plot the functional redundancy of network modules
$plot_spe_func_perc(order_x = paste0("M", 1:10)) t2
9.2 Metagenomic data
In the file2meco package part, we provide several examples to transform the output files of some famous metagenomic tools (e.g. HUMAnN and kraken2) to the microtable object directly. Here, we show some detailed examples using KEGG pathway results.
library(microeco)
library(file2meco)
library(magrittr)
?humann2meco<- system.file("extdata", "example_metagenome_sample_info.tsv", package="file2meco")
sample_file_path <- system.file("extdata", "example_metagenome_match_table.tsv", package="file2meco")
match_file_path # use KEGG pathway based HUMAnN result
<- system.file("extdata", "example_HUMAnN_KEGG_abund.tsv", package="file2meco")
abund_file_path # match_table parameter can be used to replace sample names
<- humann2meco(abund_file_path, db = "KEGG", sample_table = sample_file_path, match_table = match_file_path)
test # remove the unclassified pathway in Level.1
$tax_table %<>% subset(Level.1 != "unclassified")
test$tidy_dataset()
test# rel = FALSE donot use relative abundance, use the raw RPK
$cal_abund(select_cols = 1:3, rel = FALSE)
test# use_percentage = FALSE disable percentage for relative abundance
<- trans_abund$new(test, taxrank = "Level.2", ntaxa = 10, use_percentage = FALSE)
test1 $plot_bar(facet = "Group", bar_full = FALSE, xtext_angle = 30) + ggplot2::ylab("Abundance (RPK)") test1
Then, we select both function and taxa to see which taxa those high abundant pathways come from.
# This operation is more flexible
$cal_abund(select_cols = c("Level.1", "Phylum", "Genus"), rel = FALSE)
test$taxa_abund$Level.1 %<>% .[!grepl("unclass", rownames(.)), ]
test$taxa_abund$Phylum %<>% .[!grepl("unclass", rownames(.)), ]
test<- trans_abund$new(test, taxrank = "Phylum", ntaxa = 10, use_percentage = FALSE)
test1 $plot_bar(facet = "Group", bar_full = FALSE, xtext_angle = 30) + ggplot2::ylab("Abundance (RPK)") test1
Let’s run LEfSe to find some functional biomarkers to differentiate two groups.
# functional biomarker
$cal_abund(select_cols = 1:3, rel = TRUE)
test<- trans_diff$new(test, method = "lefse", group = "Group")
test1 $plot_diff_bar(threshold = 3) test1
9.3 Gut microbiome
We use mouse gut data stored in file2meco package to show the input of QIIME2 file and the use of metabolic trait database NJC19 database(Lim et al. 2020).
library(microeco)
library(file2meco)
library(ggplot2)
# use data files inside the file2meco package.
<- system.file("extdata", "dada2_table.qza", package="file2meco")
abund_file_path <- system.file("extdata", "sample-metadata.tsv", package="file2meco")
sample_file_path <- system.file("extdata", "taxonomy.qza", package="file2meco")
taxonomy_file_path # construct microtable object
<- qiime2meco(abund_file_path, sample_table = sample_file_path, taxonomy_table = taxonomy_file_path)
tmp $tidy_dataset()
tmp# revise the species names in tax_table as the information in the example file is not standard
<- tmp$tax_table$Species != "s__"
select_rows $tax_table$Species[select_rows] <- paste0("s__", gsub("g__", "", tmp$tax_table$Genus[select_rows]), " ", gsub("s__", "", tmp$tax_table$Species[select_rows]))
tmp# taxonomic abundance
$cal_abund()
tmp
# create object of trans_func
<- trans_func$new(tmp)
data2 # Select NJC19 database
$cal_spe_func(prok_database = "NJC19")
data2# get the trait percentage data
$cal_spe_func_perc(abundance_weighted = FALSE)
data2# inset the trait percentage result into taxa_abund of microtable object
$taxa_abund$Trait <- as.data.frame(t(data2$res_spe_func_perc))
tmp# use trans_abund to plot
<- trans_abund$new(dataset = tmp, taxrank = "Trait", ntaxa = 10, use_percentage = FALSE)
t1 $plot_box(group = "donor_status", xtext_angle = 30) + ylab("Relative population abundance (%)") + theme(axis.text.x = element_text(size = 13)) t1
# differential abundance test of the traits percentage and use random forest to find biomarkers
<- trans_diff$new(dataset = tmp, method = "rf", group = "donor_status", taxa_level = "Trait")
t1 $plot_diff_bar(use_number = 1:30) t1