Chapter 7 Add custom annotation
I was thinking that ClusterGVis has some limitations when adding annotations to heatmaps. It would be better if we could customize and add relevant plots to each cluster, rather than being restricted to the data within the heatmap. So, you simply need to create the plot (using ggplot2), and then insert it into the relevant cluster, not limiting it to the heatmap data.
For example, when displaying marker genes in single-cell analysis, you could insert a feature plot (scatterplot) of the marker genes on the right side. Of course, any other kind of plot could also be used.
Additionally, I’ve added a new gglist parameter to ClusterGVis. You just need to provide a list of ggplot objects, making sure that the order corresponds correctly.
Create a scatter plot for marker genes for each celtype:
# find markers for every cluster compared to all remaining cells
# report only the positive ones
pbmc.markers.all <- Seurat::FindAllMarkers(pbmc,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
# get top 10 genes
pbmc.markers <- pbmc.markers.all %>%
dplyr::group_by(cluster) %>%
dplyr::top_n(n = 4, wt = avg_log2FC)
# prepare data from seurat object
st.data <- prepareDataFromscRNA(object = pbmc,
diffData = pbmc.markers,
showAverage = TRUE)
# loop plot
lapply(unique(pbmc.markers$cluster), function(x){
tmp <- pbmc.markers |> dplyr::filter(cluster == x)
# plot
p <- Seurat::FeaturePlot(object = pbmc,
features = tmp$gene,
ncol = 4)
return(p)
}) -> gglist
# assign names
names(gglist) <- paste("C",1:9,sep = "")
Insert scatter plot:
pdf('sc_ggplot.pdf',height = 16,width = 18,onefile = F)
visCluster(object = st.data,
plot.type = "both",
line.side = "left",
column_names_rot = 45,
markGenes = pbmc.markers$gene,
cluster.order = c(1:9),
ggplot.panel.arg = c(4,1,24,"grey90",NA),
gglist = gglist)
dev.off()
Use scRNAtoolVis to generate scatter plot:
library(scRNAtoolVis)
# loop plot
lapply(unique(pbmc.markers$cluster), function(x){
tmp <- pbmc.markers |> dplyr::filter(cluster == x)
# plot
p <-
featureCornerAxes(object = pbmc,
reduction = 'umap',
groupFacet = NULL,
relLength = 0.65,
relDist = 0.2,
cornerTextSize = 2.5,
features = tmp$gene,
show.legend = F)
return(p)
}) -> gglist
# assign names
names(gglist) <- paste("C",1:9,sep = "")
# insert scatter plot
pdf('sc_ggplot_scRNAtoolVis.pdf',height = 16,width = 16,onefile = F)
visCluster(object = st.data,
plot.type = "both",
line.side = "left",
column_names_rot = 45,
markGenes = pbmc.markers$gene,
cluster.order = c(1:9),
ggplot.panel.arg = c(5,1,13,"grey90",NA),
gglist = gglist)
dev.off()
Add custom enrichment plot:
# enrich for clusters
enrich <- enrichCluster(object = st.data,
OrgDb = org.Hs.eg.db,
type = "BP",
organism = "hsa",
pvalueCutoff = 0.5,
topn = 5,
seed = 5201314)
# check
head(enrich,3)
# group Description pvalue ratio
# GO:0002573 C1 myeloid leukocyte differentiation 0.0003941646 66.66667
# GO:0050870 C1 positive regulation of T cell activation 0.0005222343 66.66667
# GO:1903039 C1 positive regulation of leukocyte cell-cell adhesion 0.0006265618 66.66667
# barplot
palette = c("Grays","Light Grays","Blues2","Blues3","Purples2","Purples3","Reds2","Reds3","Greens2")
# loop
lapply(seq_along(unique(enrich$group)), function(x){
tmp <- enrich |> dplyr::filter(group == unique(enrich$group)[x]) |>
dplyr::arrange(desc(pvalue))
tmp$Description <- factor(tmp$Description,levels = tmp$Description)
# plot
p <-
ggplot(tmp) +
geom_col(aes(x = -log10(pvalue),y = Description,fill = -log10(pvalue)),
width = 0.75) +
geom_line(aes(x = log10(ratio),y = as.numeric(Description)),color = "grey50") +
geom_point(aes(x = log10(ratio),y = Description),size = 3,color = "orange") +
theme_bw() +
scale_y_discrete(position = "right",
labels = function(x) stringr::str_wrap(x, width = 40)) +
scale_x_continuous(sec.axis = sec_axis(~.,name = "log10(ratio)")) +
colorspace::scale_fill_binned_sequential(palette = palette[x]) +
ylab("")
return(p)
}) -> gglist
# assign names
names(gglist) <- paste("C",1:9,sep = "")
# insert bar plot
pdf('sc_ggplot_go.pdf',height = 20,width = 16,onefile = F)
visCluster(object = st.data,
plot.type = "both",
line.side = "left",
column_names_rot = 45,
markGenes = pbmc.markers$gene,
cluster.order = c(1:9),
ggplot.panel.arg = c(5,0.5,16,"grey90",NA),
gglist = gglist)
dev.off()
Add GO and KEGG enrichment annotation:
# enrich for clusters
enrich <- enrichCluster(object = st.data,
OrgDb = org.Hs.eg.db,
type = "BP",
organism = "hsa",
pvalueCutoff = 0.5,
topn = 5,
seed = 5201314)
# check
head(enrich,3)
# group Description pvalue ratio
# GO:0002573 C1 myeloid leukocyte differentiation 0.0003941646 66.66667
# GO:0050870 C1 positive regulation of T cell activation 0.0005222343 66.66667
# GO:1903039 C1 positive regulation of leukocyte cell-cell adhesion 0.0006265618 66.66667
enrich.KEGG <- enrichCluster(object = st.data,
OrgDb = org.Hs.eg.db,
type = "KEGG",
organism = "hsa",
pvalueCutoff = 0.9,
topn = 5,
seed = 5201314)
# check
head(enrich.KEGG,3)
# group Description pvalue ratio
# hsa00640 C1 Propanoate metabolism 0.01115231 33.33333
# hsa05216 C1 Thyroid cancer 0.01288734 33.33333
# hsa00620 C1 Pyruvate metabolism 0.01635129 33.33333
palette = c("Grays","Light Grays","Blues2","Blues3","Purples2","Purples3","Reds2","Reds3","Greens2")
# loop
lapply(seq_along(unique(enrich$group)), function(x){
# go plot
tmp <- enrich |> dplyr::filter(group == unique(enrich$group)[x]) |>
dplyr::arrange(desc(pvalue))
tmp$Description <- factor(tmp$Description,levels = tmp$Description)
# plot
p <-
ggplot(tmp) +
geom_col(aes(x = -log10(pvalue),y = Description,fill = -log10(pvalue)),
width = 0.75) +
geom_line(aes(x = log10(ratio),y = as.numeric(Description)),color = "grey50") +
geom_point(aes(x = log10(ratio),y = Description),size = 3,color = "orange") +
theme_bw() +
scale_y_discrete(position = "right",
labels = function(x) stringr::str_wrap(x, width = 40)) +
scale_x_continuous(sec.axis = sec_axis(~.,name = "log10(ratio)")) +
colorspace::scale_fill_binned_sequential(palette = palette[x]) +
ylab("")
# plot kegg
tmp.kg <- enrich.KEGG |> dplyr::filter(group == unique(enrich.KEGG$group)[x]) |>
dplyr::arrange(desc(pvalue))
tmp.kg$Description <- factor(tmp.kg$Description,levels = tmp.kg$Description)
# plot
pk <-
ggplot(tmp.kg) +
geom_segment(aes(x = 0,xend = -log10(pvalue),y = Description,yend = Description),
lty = "dashed",linewidth = 0.75) +
geom_point(aes(x = -log10(pvalue),y = Description,color = -log10(pvalue)),size = 5) +
theme_bw() +
scale_y_discrete(position = "right",
labels = function(x) stringr::str_wrap(x, width = 40)) +
colorspace::scale_color_binned_sequential(palette = palette[x]) +
ylab("") + xlab("-log10(pvalue)")
# combine
cb <- cowplot::plot_grid(plotlist = list(p,pk))
return(cb)
}) -> gglist
# assign names
names(gglist) <- paste("C",1:9,sep = "")
# insert bar plot
pdf('sc_ggplot_gokegg.pdf',height = 20,width = 22,onefile = F)
visCluster(object = st.data,
plot.type = "both",
line.side = "left",
column_names_rot = 45,
markGenes = pbmc.markers$gene,
cluster.order = c(1:9),
ggplot.panel.arg = c(5,0.5,32,"grey90",NA),
gglist = gglist)
dev.off()