Chapter 6 Parse output results from GSEA software

GseaVis can also parse GSEA software output resluts and re-draw the data to supply a publication-quality plot.

6.1 GSEA software

Here we show a example using the GSEA software to do a enrichment analysis:

6.2 Parse GSEA results

readGseaFile can parse the GSEA software output directory and return a list containing multiple results about enrichment:

intergrated <- readGseaFile(filePath = "kegg_analysis.Gsea/")
str(intergrated)
# $meta
# # A tibble: 40 × 11
# ID            Description setSize enrichmentScore   NES  pvalue p.adjust qvalue  rank leading_edge
# <chr>         <chr>         <dbl>           <dbl> <dbl>   <dbl>    <dbl>  <dbl> <dbl> <chr>       
#   1 KEGG_DNA_REP… KEGG_DNA_R…      36          -0.742 -1.65 0.00196    0.108  0.1    5640 tags=72%, l…
# 2 KEGG_PYRIMID… KEGG_PYRIM…      97          -0.612 -1.58 0.00178    0.176  0.291  8892 tags=52%, l…
# 3 KEGG_AMINOAC… KEGG_AMINO…      41          -0.686 -1.55 0.00914    0.179  0.412  8244 tags=56%, l…
# 4 KEGG_CYSTEIN… KEGG_CYSTE…      34          -0.683 -1.49 0.0148     0.298  0.682  9190 tags=59%, l…
# 5 KEGG_HEMATOP… KEGG_HEMAT…      85          -0.596 -1.49 0.00515    0.257  0.715  4563 tags=26%, l…
# 6 KEGG_RNA_POL… KEGG_RNA_P…      28          -0.714 -1.49 0.0120     0.215  0.717  8772 tags=64%, l…
# 7 KEGG_RNA_DEG… KEGG_RNA_D…      59          -0.624 -1.48 0.00530    0.204  0.752 14222 tags=64%, l…
# 8 KEGG_PROTEAS… KEGG_PROTE…      46          -0.637 -1.48 0.0125     0.181  0.758 13047 tags=65%, l…
# 9 KEGG_PROXIMA… KEGG_PROXI…      23          -0.734 -1.47 0.0343     0.175  0.775  7113 tags=52%, l…
# 10 KEGG_CELL_CY… KEGG_CELL_…     124          -0.541 -1.45 0.00350    0.203  0.862  8600 tags=44%, l…
# # ℹ 30 more rows
# # ℹ 1 more variable: core_enrichment <chr>
# # ℹ Use `print(n = ...)` to see more rows
# 
# $glist
# AC068790.9     AC135048.2     AC142381.2           DGKI        TMEM265       RPL18P10 
# 4.968889       4.951065       4.811294       4.624895       4.367372       4.308214 
# MKRN9P           NPNT        C1orf21         TSPAN7       GOLGA8IP        ADAMTS9 

Check the resuts:

# check enrichment data
head(intergrated$meta[1:3,1:5])
# # A tibble: 3 × 5
#   ID                               Description                      setSize enrichmentScore   NES
#   <chr>                            <chr>                              <dbl>           <dbl> <dbl>
# 1 KEGG_DNA_REPLICATION             KEGG_DNA_REPLICATION                  36          -0.742 -1.65
# 2 KEGG_PYRIMIDINE_METABOLISM       KEGG_PYRIMIDINE_METABOLISM            97          -0.612 -1.58
# 3 KEGG_AMINOACYL_TRNA_BIOSYNTHESIS KEGG_AMINOACYL_TRNA_BIOSYNTHESIS      41          -0.686 -1.55

# check gene lists
head(intergrated$glist)
# AC068790.9 AC135048.2 AC142381.2       DGKI    TMEM265   RPL18P10
#   4.968889   4.951065   4.811294   4.624895   4.367372   4.308214

# check terms
head(intergrated$gset[1:3,])
#                   term  gene
# 1 KEGG_DNA_REPLICATION  DNA2
# 2 KEGG_DNA_REPLICATION POLE4
# 3 KEGG_DNA_REPLICATION POLE3

Or you can also supply the path to gseaNb directlly:

setid <- c("KEGG_RIBOSOME",
           "KEGG_HEDGEHOG_SIGNALING_PATHWAY",
           "KEGG_DNA_REPLICATION",
           "KEGG_PYRIMIDINE_METABOLISM")

# plot
gseaNb(filePath = "kegg_analysis.Gsea/",
       geneSetID = setid[1])

Lets compare the other terms:

lapply(1:4, function(x){
  gseaNb(filePath = intergrated,
         geneSetID = setid[x],
         addPval = T,
         pvalX = 0.6,
         pvalY = 0.6)
}) -> plist
# combine
cowplot::plot_grid(plotlist = plist,nrow = 1,align = 'hv')

Multiple terms plot:

# multiple terms
gseaNb(filePath = intergrated,
       geneSetID = setid,
       curveCol = ggsci::pal_npg()(4),
       subPlot = 2,
       addPval = T,
       pvalX = 1,
       pvalY = 1)

New style plot:

# new style plot
gseaNb(filePath = intergrated,
       geneSetID = setid[1],
       newGsea = T,
       addGene = T,
       markTopgene = T)

Multiple facet plot:

# multiple terms of new style
gseaNb(filePath = intergrated,
       geneSetID = setid,
       newGsea = T,
       rmHt = T,
       addPval = T,
       pvalX = 0.9,
       pvalY = 0.6)