7  Coverage plot

7.1 Introduction

The coverage_plot function in omicscope simplifies the creation of genomic coverage plots. Instead of the cumbersome process of converting BAM files to BigWig, this function works directly with your alignment files, saving you valuable time and effort. It produces crisp, publication-level graphics suitable for various sequencing applications. This makes it an essential tool not just for RNA-seq, but for anyone working with data from ATAC-seq, ChIP-seq, or CUT&Tag experiments.

7.2 Make a coverage plot

7.3 Gene plot

Using aligned BAM files and a GTF annotation file, we can specify genes for visualization via the target_gene parameter. Here, we’ll examine the expression changes of the pluripotency gene Nanog and the housekeeping gene Actb on Day 0 versus Day 10 of stem cell differentiation:

library(omicScope)
library(ggplot2)

gtf <- rtracklayer::import("../test-bam/Mus_musculus.GRCm38.102.gtf.gz")

# test
coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"))

In the bottom panel, the arrows on the gene structure indicate the direction of transcription. Arrows pointing to the right represent genes on the positive (+) strand, while those pointing to the left represent genes on the negative (-) strand.

To simplify the y-axis and show only a range label instead of the full axis, set both the add_range_label and remove_labelY parameters to TRUE:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              add_range_label = TRUE,
              remove_labelY = TRUE)

Use the sample_order and gene_order parameters to reorder the samples and genes:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              sample_order = c("day0-rep1","day10-rep1","day0-rep2","day10-rep2"),
              gene_order = c("Actb","Nanog"))

Use the group_name parameter to assign samples to groups. Setting merge_group to TRUE will then display the mean coverage across biological replicates for each group:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              group_name = c("Day-0","Day-0","Day-10","Day-10"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              merge_group = TRUE)

Set collapse_exon = TRUE to collapse all of a gene’s transcripts for visualization:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              group_name = c("Day-0","Day-0","Day-10","Day-10"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              merge_group = TRUE,
              collapse_exon = TRUE,
              exon_linewidth = 8)

If you want to make the gene structure panel at the bottom shorter, you can use the theme_sub_panel function:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              group_name = c("Day-0","Day-0","Day-10","Day-10"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              merge_group = TRUE,
              collapse_exon = TRUE,
              exon_linewidth = 8) +
    theme_sub_panel(heights = unit(c(3,3,1),"cm"))

Modify colors using standard ggplot2 syntax, or set them directly with the exon_col (for exons) and arrow_col (for intron lines) parameters:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/0b.sorted.bam",
                             "../test-bam/10a.sorted.bam",
                             "../test-bam/10b.sorted.bam"),
              sample_name <- c("day0-rep1","day0-rep2","day10-rep1","day10-rep2"),
              group_name = c("Day-0","Day-0","Day-10","Day-10"),
              gtf_file = gtf,
              target_gene = c("Nanog","Actb"),
              merge_group = TRUE,
              exon_col = "#05339C",arrow_col = "#05339C") +
    scale_fill_manual(values = c(Actb = "#FAB12F",Nanog = "#696FC7")) +
    scale_color_manual(values = c(Actb = "#FAB12F",Nanog = "#696FC7"))

7.3.1 Genomic region plot

Besides plotting coverage for specific genes, the function also supports visualization of user-defined genomic regions. You can specify the target_region parameter with genomic coordinates in the format: chromosome:start_position-end_position (e.g., “chr1:1000000-2000000”):

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/10a.sorted.bam"),
              sample_name <- c("day0-rep1","day10-rep1"),
              gtf_file = gtf,
              target_region = c("1:127061844-128745904",
                                "2:121127504-121354559")
              )

You can also set add_gene_label = FALSE to suppress gene name labeling:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/10a.sorted.bam"),
              sample_name <- c("day0-rep1","day10-rep1"),
              gtf_file = gtf,
              target_region = c("1:127061844-128745904",
                                "2:121127504-121354559"),
              add_range_label = TRUE,
              remove_labelY = TRUE,
              add_gene_label = FALSE)

Collapse all exons into a single track:

coverage_plot(bam_file <-  c("../test-bam/0a.sorted.bam",
                             "../test-bam/10a.sorted.bam"),
              sample_name <- c("day0-rep1","day10-rep1"),
              gtf_file = gtf,
              target_region = c("1:127061844-128745904",
                                "2:121127504-121354559"),
              add_gene_label = FALSE,
              collapse_exon = TRUE,
              exon_linewidth = 6)

7.4 New features

We are constantly working to enhance the coverage_plot function. Below is a description of the latest features and improvements.

7.4.1 Input bigwig files

In addition to directly processing BAM files, coverage_plot now supports BigWig files. As a popular format for visualization with a smaller file size, BigWig provides a lightweight alternative for generating coverage plots.

library(omicScope)

gtf <- rtracklayer::import("../../index-data/Mus_musculus.GRCm38.102.gtf.gz")

coverage_plot(bw_file = c("../7.bw-data/scrinput.bw","../7.bw-data/scrDinput.bw"),
              sample_name = c("dmso","treat"),
              gtf_file = gtf,
              target_gene = "Nanog")

Plotting a specific genomic region:

coverage_plot(bw_file = c("../7.bw-data/scrinput.bw","../7.bw-data/scrDinput.bw"),
              sample_name = c("dmso","treat"),
              gtf_file = gtf,
              target_region = "5:76176615-76376575")

7.4.2 UTR region visualization

As you may notice in the plot above, UTR regions are rendered with thinner lines than CDS regions. This new feature allows for better visual distinction between the coding and non-coding portions of a gene. To render all exons with a uniform width, you can set show_utr = FALSE:

coverage_plot(bw_file = c("../7.bw-data/scrinput.bw","../7.bw-data/scrDinput.bw"),
              sample_name = c("dmso","treat"),
              gtf_file = gtf,
              target_gene = "Nanog",
              show_utr = FALSE)

7.4.3 Highlight specific region

Sometimes, you may want to visually emphasize specific genomic areas. To do this, simply prepare a data frame containing the coordinates of your target regions and pass it to the highlight_region parameter.

In the example below, we’ll highlight several exons within the Nanog gene:

g <- data.frame(gtf) |> 
  dplyr::filter(gene_name == "Nanog" & type == "exon" & transcript_id == "ENSMUST00000012540") |> 
  dplyr::arrange(dplyr::desc(width)) |> 
  dplyr::select(seqnames,start,end) |> 
  dplyr::mutate(type = factor(1:n()))
  
g
#   seqnames     start       end type
# 1        6 122713142 122714633    1
# 2        6 122707568 122707933    2
# 3        6 122711538 122711803    3
# 4        6 122712629 122712715    4

Plot highlight region:

coverage_plot(bam_file = c("scrinput.bam","scrinput.bam"),
              sample_name = c("dmso","treat"),
              gtf_file = gtf,
              target_gene = "Nanog",
              highlight_region = g)

The name of the column to use for color mapping:

coverage_plot(bam_file = c("scrinput.bam","scrDinput.bam"),
              sample_name = c("dmso","treat"),
              gtf_file = gtf,
              target_gene = "Nanog",
              highlight_region = g,
              highlight_col_aes = "type")