Chapter 3 Enrichment anaysis
To explore important biological significance or related biological pathway for each cluster, you can supply your own enrichment results with data frame format(two columns: cluster number and term name) or use enrichCluster to do batch enrichment analysis for each cluster.
3.1 Enrichment for non-model organisms
Adjust id.trans/toType/readable parameters if you have created your own orgdb object:
enrich <- enrichCluster(object = cm,
OrgDb = org.myorg.eg.db,
type = "BP",
id.trans = F,
toType = "GID",
readable = F,
pvalueCutoff = 0.5,
topn = 5)
You can set TERM2GENE and TERM2NAME if you have data frame with pathway-gene and pathway-pathwat name data:
3.2 Own enrichment data
Here we load an example enrichment data frame:
# load term info
data("termanno")
# check
head(termanno,4)
# id term
# 1 C1 developmental process
# 2 C1 anatomical structure development
# 3 C1 multicellular organism development
# 4 C2 system development
Add enrichment results annotation beside the heatmap for each cluster:
# anno with GO terms
pdf('testHTterm.pdf',height = 10,width = 10)
visCluster(object = ck,
plot.type = "both",
column_names_rot = 45,
annoTerm.data = termanno)
dev.off()
Put the line anntation to the left:
# change the line annotation side
pdf('testHTtermCmls.pdf',height = 10,width = 10)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
annoTerm.data = termanno,
line.side = "left")
dev.off()
Remove the dend tree:
# remove tree
pdf('testHTtermCmlsrt.pdf',height = 10,width = 10)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
annoTerm.data = termanno,
line.side = "left",
show_row_dend = F)
dev.off()
You can supply enrichment data with pvalue column to show in plot, the pathway label size reflects the relative pvalue:
# load GO term data
data("termanno2")
# check
head(termanno2,3)
# id term pval
# 1 C1 developmental process 3.17e-69
# 2 C1 anatomical structure development 1.44e-68
# 3 C1 multicellular organism development 1.36e-66
# adjust term colors and text size
pdf('termlfts.pdf',height = 10,width = 12,onefile = F)
visCluster(object = ck,
plot.type = "both",
column_names_rot = 45,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = termanno2,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 3),
go.size = "pval")
dev.off()
3.3 Batch enrichment analysis
enrichCluster can do batch enrichment analysis for each cluster using clusterpfofiler package. It will extract top 5 pathway terms(ordered by pvalue).
library(org.Mm.eg.db)
# enrich for clusters
enrich <- enrichCluster(object = ck,
OrgDb = org.Mm.eg.db,
type = "BP",
pvalueCutoff = 0.05,
topn = 5,
seed = 5201314)
# check
head(enrich,3)
# group Description pvalue ratio
# GO:0009150 C1 purine ribonucleotide metabolic process 4.287676e-08 12
# GO:0009259...2 C1 ribonucleotide metabolic process 6.782860e-08 12
# GO:0019693...3 C1 ribose phosphate metabolic process 9.382227e-08 12
# plot
pdf('termenrich.pdf',height = 10,width = 11,onefile = F)
visCluster(object = ck,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
go.size = "pval")
dev.off()
3.4 Add annotation for specific groups
You can define subgroup.anno to highlight specific groups you want.
# annotation for specific clusters
pdf('subc.pdf',height = 10,width = 12,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
go.size = "pval",
subgroup.anno = c("C4","C6","C8"))
dev.off()
3.5 Add barplot for enrichment annotation
The barplot height will use the enrichment data frame last column to plot:
pdf('bar.pdf',height = 10,width = 12,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
go.size = "pval",
add.bar = T)
dev.off()
Match the enrichment annotation right side and barplot annotation left side:
# change side
pdf('barc.pdf',height = 10,width = 12,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
add.bar = T,
go.size = "pval",
annoTerm.mside = "left")
dev.off()
Adjust cluster label position in barplot:
# change barplot cluster label position
pdf('barp.pdf',height = 10,width = 12,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
go.size = "pval",
add.bar = T,
textbar.pos = c(0.8,0.2))
dev.off()
Add annotation for specific cluster:
# sub-clusters with barplot
pdf('subcb.pdf',height = 10,width = 12,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
genes.gp = c('italic',fontsize = 12,col = "black"),
annoTerm.data = enrich,
line.side = "left",
go.col = rep(ggsci::pal_d3()(8),each = 5),
go.size = "pval",
add.bar = T,
textbar.pos = c(0.8,0.2),
subgroup.anno = c("C4","C6","C8"))
dev.off()
3.6 Add KEGG annotation
Sometimes you may want to perform both GO and KEGG enrichment results, here is an example:
# using mfuzz for clustering
# mfuzz
cm <- clusterData(exp = exps,
cluster.method = "mfuzz",
cluster.num = 6)
# GO enrich for clusters
enrich.go <- enrichCluster(object = cm,
OrgDb = org.Mm.eg.db,
type = "BP",
pvalueCutoff = 0.05,
topn = 5,
seed = 5201314)
# check
head(enrich.go,3)
# group Description pvalue ratio
# GO:0044282 C1 small molecule catabolic process 3.376035e-32 10.683761
# GO:0046395 C1 carboxylic acid catabolic process 8.123713e-20 6.623932
# GO:0016054 C1 organic acid catabolic process 1.057149e-19 6.623932
# KEGG enrich for clusters
enrich.kegg <- enrichCluster(object = cm,
OrgDb = org.Mm.eg.db,
type = "KEGG",
organism = "mmu",
pvalueCutoff = 0.05,
topn = 5,
seed = 5201314)
# check
head(enrich.kegg,3)
# group Description pvalue ratio
# mmu04146 C1 Peroxisome - Mus musculus (house mouse) 1.087835e-16 8.880309
# mmu01200...2 C1 Carbon metabolism - Mus musculus (house mouse) 3.447857e-14 9.266409
# mmu00620 C1 Pyruvate metabolism - Mus musculus (house mouse) 1.466749e-10 5.019305
# add gene name
markGenes = rownames(exps)[sample(1:nrow(exps),30,replace = F)]
# plot
pdf('gokegg.pdf',height = 10,width = 16,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 6),each = 5),
annoKegg.data = enrich.kegg,
kegg.col = rep(jjAnno::useMyCol("stallion",n = 6),each = 5),
line.side = "left",
sample.group = rep(c("sample1","sample2","sample3"),each = 2))
dev.off()
Format the annotation panel:
# change textbox style
pdf('gokeggsq.pdf',height = 12,width = 18,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 6),each = 5),
by.go = "anno_block",
annoKegg.data = enrich.kegg,
kegg.col = rep(jjAnno::useMyCol("stallion",n = 6),each = 5),
by.kegg = "anno_block",
word_wrap = F,add_new_line = F,
line.side = "left",
sample.group = rep(c("sample1","sample2","sample3"),each = 2))
dev.off()
Add both barplot for GO and KEGG:
# change style
pdf('gokeggbarNew.pdf',height = 10,width = 20,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 6),each = 5),
add.bar = T,
by.go = "anno_block",
annoKegg.data = enrich.kegg,
kegg.col = rep(jjAnno::useMyCol("stallion",n = 6),each = 5),
add.kegg.bar = T,
by.kegg = "anno_block",
word_wrap = F,add_new_line = F,
line.side = "left",
sample.group = rep(c("sample1","sample2","sample3"),each = 2))
dev.off()
The another solution is to combine the GO and KEGG resluts and plot in a single panel with different colors to distinguish them:
# combine go and kegg data
library(dplyr)
all <- rbind(enrich.go %>% group_by(group) %>% slice_head(n = 2) %>%
mutate(Description = paste("BP: ",Description,sep = "")),
enrich.kegg %>% group_by(group) %>% slice_head(n = 2) %>%
mutate(Description = paste("KEGG: ",Description,sep = ""))) %>%
arrange(group)
# plot
pdf('gokeggcomb.pdf',height = 10,width = 16,onefile = F)
visCluster(object = cm,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = all,
go.col = c(rep(jjAnno::useMyCol("calm",n = 6),each = 2),
rep(jjAnno::useMyCol("circus",n = 6),each = 2)),
line.side = "left",
word_wrap = F,add_new_line = F,
by.go = "anno_block",
add.bar = T,
sample.group = rep(c("sample1","sample2","sample3"),each = 2))
dev.off()