Chapter 4 Composition-based class
The trans_abund class and trans_venn class are organised into the section ‘Composition-based class’, since they are mainly used to show the composition information of communities.
4.1 trans_abund class
The trans_abund class has several functions to visualize taxonomic abundance based on the ggplot2 package.
4.1.1 Example
We first show the bar plot example.
# create trans_abund object
# select top 8 abundant Phyla.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 8) t1
## The transformed abundance data is stored in object$data_abund ...
# t1 object now include the transformed abundance data t1$abund_data and other elements for the following plotting
As the sample number is large, we do not show the sample names in x axis and add the facet to show abundance according to groups.
$plot_bar(others_color = "grey70", facet = "Group", xtext_keep = FALSE, legend_text_italic = FALSE)
t1# return a ggplot2 object
Two or more facets are supported with the facet parameter by providing a vector with multiple elements.
# require package ggh4x, first run install.packages("ggh4x") if not installed
$plot_bar(others_color = "grey70", facet = c("Group", "Type"), xtext_keep = FALSE, legend_text_italic = FALSE, barwidth = 1) t1
The default operation can filter all the unclassified taxa (i.e. p__ or g__ in tax_table that has been processed by tidy_taxonomy
function),
as those unknown taxa are generally meaningless.
However sometimes, these unknown taxa may be meaningful for users.
For example, if one want to isolate some unknown species, it is valuable to check the abundance of those unknown taxa.
At this time, please see this topic (https://github.com/ChiLiubio/microeco/issues/165) to resolve the issue that how to show unknown taxa with hierarchical taxonomy classification.
The alluvial plot is also implemented in the plot_bar function with use_alluvium parameter.
<- trans_abund$new(dataset = mt, taxrank = "Genus", ntaxa = 8)
t1 # require ggalluvial package
# use_alluvium = TRUE make the alluvial plot, clustering =TRUE can be used to reorder the samples by clustering
# bar_type = FALSE can remove 'others'
$plot_bar(bar_full = FALSE, use_alluvium = TRUE, clustering = TRUE, xtext_angle = 30, xtext_size = 3, color_values = RColorBrewer::brewer.pal(8, "Set2")) t1
The bar plot can also be performed with group mean values.
Note that, from v0.16.0, the parameter group_morestats = TRUE
can be used to add more summary statistics in the return data_abund
when groupmean
parameter is provided.
# The groupmean parameter can be used to obtain the group-mean barplot.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 10, groupmean = "Group")
t1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18)) g1
The box plot is an excellent way to intuitionally show abundance distribution across groups.
# show 15 taxa at Class level
<- trans_abund$new(dataset = mt, taxrank = "Class", ntaxa = 15)
t1 $plot_box(group = "Group", xtext_angle = 30) t1
Then we show the heatmap with the high abundant genera.
# show 40 taxa at Genus level
<- trans_abund$new(dataset = mt, taxrank = "Genus", ntaxa = 40)
t1 <- t1$plot_heatmap(facet = "Group", xtext_keep = FALSE, withmargin = FALSE, plot_breaks = c(0.01, 0.1, 1, 10))
g1
g1+ theme(axis.text.y = element_text(face = 'italic')) g1
Line chart is very useful to show the abundance change of taxa along time, space or other gradients.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 5)
t1 $plot_line()
t1<- trans_abund$new(dataset = mt, taxrank = "Genus", ntaxa = 5, groupmean = "Type")
t1 $plot_line(position = position_dodge(0.3), xtext_angle = 0) t1
Then, we show the pie chart with the group mean values.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 6, groupmean = "Group")
t1 # all pie chart in one row
$plot_pie(facet_nrow = 1)
t1$plot_pie(facet_nrow = 1, add_label = TRUE) t1
The donut and radar charts are implemented from v0.17.0. Please install the dependent packages according to the steps (https://chiliubio.github.io/microeco_tutorial/intro.html#dependence).
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 8, groupmean = "Group")
t1 $plot_donut(label = FALSE)
t1$plot_donut(label = TRUE) t1
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 8, groupmean = "Group")
t1 $plot_radar(values.radar = c("0%", "25%", "50%"), grid.min = 0, grid.mid = 0.25, grid.max = 0.5)
t1<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 8, groupmean = "Type")
t1 $plot_radar(values.radar = c("0%", "25%", "50%"), grid.min = 0, grid.mid = 0.25, grid.max = 0.5) t1
The ternary plot can be used for the case with three samples/groups.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 8, groupmean = "Group")
t1 $plot_tern() t1
When the hierarchical abundance data of two levels is needed to be shown in bar plot, the nested legend can be used.
# require ggnested package; see https://chiliubio.github.io/microeco_tutorial/intro.html#dependence
<- trans_abund$new(dataset = mt, taxrank = "Class", ntaxa = 10, high_level = "Phylum", delete_taxonomy_prefix = FALSE)
test1 $plot_bar(ggnested = TRUE, facet = c("Group", "Type"), xtext_angle = 30)
test1# fixed subclass number in each phylum
<- trans_abund$new(dataset = mt, taxrank = "Class", ntaxa = 30, show = 0, high_level = "Phylum", high_level_fix_nsub = 4)
test1 $plot_bar(ggnested = TRUE, xtext_angle = 30, facet = c("Group", "Type"))
test1$plot_bar(ggnested = TRUE, xtext_angle = 0, facet = c("Group", "Type"), coord_flip = TRUE)
test1# sum others in each phylum
<- trans_abund$new(dataset = mt, taxrank = "Class", ntaxa = 20, show = 0, high_level = "Phylum", high_level_fix_nsub = 3, delete_taxonomy_prefix = FALSE)
test1 $plot_bar(ggnested = TRUE, high_level_add_other = TRUE, xtext_angle = 30, facet = c("Group", "Type")) test1
The coord_flip
parameter in plot_bar
function can be changed to make the coordinate axis flipped.
The clustering plot can also be added in the bar plot.
In this case, the coordinate axis will be flipped automatically for better visualization.
<- trans_abund$new(dataset = mt, taxrank = "Phylum", ntaxa = 10, groupmean = "Group")
t1 <- t1$plot_bar(coord_flip = TRUE)
g1 <- g1 + theme_classic() + theme(axis.title.x = element_text(size = 16), axis.ticks.y = element_blank(), axis.line.y = element_blank())
g1
g1<- t1$plot_bar(clustering_plot = TRUE)
g1 # In this case, g1 (aplot object) is the combination of different ggplot objects
# to adjust the main plot, please select g1[[1]]
1]] <- g1[[1]] + theme_classic() + theme(axis.title.x = element_text(size = 16), axis.ticks.y = element_blank(), axis.line.y = element_blank())
g1[[
g1# save the figure
ggsave("test.png", g1, width = 8, height = 5)
4.1.2 Key points
- trans_abund$new: creating trans_abund object can invoke taxa_abund in microtable for transformation
- color_values parameter: color_values parameter in each function is used for colors selection
- input_taxaname parameter: input_taxaname parameter in trans_abund$new can be used to select interested customized taxa instead of abundance-based filtering
- use_percentage parameter: use_percentage parameter in trans_abund$new - whether show the abundance percentage
- order_x parameter: order the sample (group) names in x axis in the bar plot
4.2 trans_venn class
The trans_venn class is developed for venn analysis, i.e. shared and unique taxa across samples/groups.
4.2.1 Example
This part can be performed using samples or groups at OTU/ASV level or higher taxonomic level. To analyze the unique and shared OTUs of groups, we first merge samples according to the “Group” column of sample_table.
# merge samples as one community for each group
<- mt$merge_samples("Group")
tmp # tmp is a new microtable object
# create trans_venn object
<- trans_venn$new(tmp, ratio = NULL)
t1 $plot_venn() t1
# create venn plot with more information
<- trans_venn$new(tmp, ratio = "seqratio")
t1 $plot_venn()
t1# The integer is OTU number
# The percentage data is the sequence number/total sequence number
When the groups are too many to show with venn plot, using petal plot is better.
To assign different colors in petals, please provide multiple colors to petal_color
parameter.
# use "Type" column in sample_table
<- mt$merge_samples("Type")
tmp <- trans_venn$new(tmp)
t1 $plot_venn(petal_plot = TRUE, petal_color = RColorBrewer::brewer.pal(8, "Dark2"))
t1$plot_venn(petal_plot = TRUE, petal_center_size = 50, petal_r = 1.5, petal_a = 3, petal_move_xy = 3.8, petal_color_center = "#BEBADA") t1
Another way to plot the results is to use plot_bar
function, which is especially useful for a large number of samples/groups.
This way is generally called UpSet plot. Please see the help document for more parameters to adjust the plot.
<- mt$merge_samples("Type")
tmp
tmp<- trans_venn$new(dataset = tmp)
t1 # only show some sets with large intersection numbers
$data_summary %<>% .[.[, 1] > 20, ]
t1<- t1$plot_bar(left_plot = TRUE, bottom_height = 0.5, left_width = 0.15, up_bar_fill = "grey50", left_bar_fill = "grey50", bottom_point_color = "black")
g1
g1# g1 is aplot class and can be saved with ggplot2::ggsave, aplot::ggsave or cowplot::save_plot function
# as g1 is comprised of several sub-plots, please adjust the details for each sub-plot
1]]
g1[[2]] g1[[
Generally, after getting the intersection results, we do not know who those shared or unique taxa are. The composition of the unique or shared species may account for the different and similar parts of ecological characteristics across groups(Mendes et al. 2011). So, it is interesting to further analyze the composition of unique and shared species. For this goal, we first transform the results of venn plot to the traditional feature-sample table, that is, another object of microtable class.
<- mt$merge_samples("Group")
tmp <- trans_venn$new(tmp) t1
## The details of each venn part is stored in object$data_details ...
## The venn summary table used for plot is stored in object$data_summary ...
# transform venn results to the sample-species table, here do not consider abundance, only use presence/absence.
<- t1$trans_comm(use_frequency = TRUE)
t2 # t2 is a new microtable class, each part is considered a sample
class(t2)
## [1] "microtable" "R6"
We use bar plot to show the composition at the Genus level.
# calculate taxa abundance, that is, the frequency
$cal_abund()
t2# transform and plot
<- trans_abund$new(dataset = t2, taxrank = "Genus", ntaxa = 8)
t3 $plot_bar(bar_full = FALSE, legend_text_italic = T, xtext_angle = 30, color_values = RColorBrewer::brewer.pal(8, "Set2"),
t3order_x = c("IW", "CW", "TW", "IW&CW", "IW&TW", "CW&TW", "IW&CW&TW")) + ylab("Frequency (%)")
We also try to use pie chart to show the compositions at the Phylum level.
<- trans_abund$new(dataset = t2, taxrank = "Phylum", ntaxa = 8)
t3 $data_abund$Sample %<>% factor(., levels = unique(.))
t3$plot_pie(facet_nrow = 3, color_values = c(RColorBrewer::brewer.pal(8, "Dark2"), "grey50")) t3
Other examples:
To reorder samples in the plots, please manipulate the sample_table in the object to adjust the orders.
<- mt$merge_samples("Type")
test $sample_table %<>% .[c("YML", "NE", "NW", "NC", "QTP", "SC"), , drop = FALSE]
test$tidy_dataset()
test# The columns of otu_table can also be reordered according to the sample_table after running tidy_dataset function
<- trans_venn$new(test)
t1 $plot_bar(sort_samples = FALSE) t1
The parameter sort_samples = TRUE
in plot_bar
function can be applied to sort samples in the y axis according to the number of features.
The left bar plot can be removed when the parameter left_plot = FALSE
.
<- mt$merge_samples("Type")
test <- trans_venn$new(test)
t1 # remove left bar in the UpSet plot
$plot_bar(left_plot = FALSE)
t1# sort samples in the axis according to the number of features
$plot_bar(sort_samples = TRUE)
t1# original orders in test$sample_table
$plot_bar(sort_samples = FALSE) t1
<- mt$merge_samples("Type")
test <- trans_venn$new(test)
t1 $plot_bar(left_bar_fill = RColorBrewer::brewer.pal(8, "Dark2"), left_bar_alpha = 0.2, bottom_background_fill = RColorBrewer::brewer.pal(8, "Dark2"), bottom_background_alpha = 0.2)
t1$data_summary %<>% .[.$Counts > 1, ]
t1$plot_bar(left_width = 0.15, left_bar_fill = "grey50", up_bar_fill = "grey50", up_bar_width = 0.5, left_bar_width = 0.5, bottom_height = 0.5, bottom_point_color = "black", bottom_point_size = 8) t1