Integrate Differential Expression Results and Peak Annotation Results of ChIP-seq and ATAC-seq.

DEbCA(
  de.res,
  chip.peak.res,
  atac.peak.res,
  peak.anno.key = c("Promoter", "5' UTR", "3' UTR", "Exon", "Intron", "Downstream",
    "Distal Intergenic", "All"),
  signif = "padj",
  signif.threshold = 0.05,
  l2fc.threshold = 1,
  label.key = NULL,
  merge.key = c("geneId", "ENSEMBL", "SYMBOL")
)

Arguments

de.res

Data frame contains all genes of differential expression analysis.

chip.peak.res

Dataframe contains peak annotation results of ChIP-seq data.

atac.peak.res

Dataframe contains peak annotation results of ATAC-seq data.

peak.anno.key

Peak location, chosen from "Promoter", "5' UTR", "3' UTR", "Exon", "Intron", "Downstream", "Distal Intergenic","All". Default: "Promoter".

signif

Significance criterion. For DESeq2 results, can be chosen from padj, pvalue. For edgeR results, can be chosen from FDR, PValue. Default: padj.

signif.threshold

Significance threshold to get differentially expressed genes. Default: 0.05.

l2fc.threshold

Log2 fold change threshold to get differentially expressed genes. Default: 1.

label.key

Which column to use as label. Default: NULL (use rownames of deres).

merge.key

The columns used for merging, chosen from geneId, ENSEMBL, SYMBOL. Default: geneId.

Value

Dataframe contains integrated results. The results will contain eleven categories: UP, DOWN, ChIP, ATAC, UPbChIP, DOWNbChIP UPbATAC, DOWNbATAC, ChIPbATAC, UPbPeak, DOWNbPeak.

Examples

library(DEbPeak) library(DESeq2) # ChIP-Seq data peak.file <- system.file("extdata", "debchip_peaks.bed", package = "DEbPeak") peak.df <- GetConsensusPeak(peak.file = peak.file) peak.profile <- PeakProfile(peak.df, species = "Mouse", by = "gene", region.type = "body", nbin = 800)
#> >> preparing promoter regions... 2023-07-02 17时06分49秒 #> >> preparing tag matrix... 2023-07-02 17时06分49秒 #> >> preparing start_site regions by ... 2023-07-02 17时06分49秒 #> >> preparing tag matrix... 2023-07-02 17时06分49秒 #> >> generating figure... 2023-07-02 17时06分58秒
#> >> done... 2023-07-02 17时06分58秒
#> >> binning method is used...2023-07-02 17时06分58秒 #> >> preparing start_site regions by gene... 2023-07-02 17时06分58秒 #> >> preparing tag matrix by binning... 2023-07-02 17时06分58秒 #> >> Running bootstrapping for tag matrix... 2023-07-02 17时07分05秒 #> >> binning method is used...2023-07-02 17时07分05秒 #> >> preparing body regions by gene... 2023-07-02 17时07分05秒 #> >> preparing tag matrix by binning... 2023-07-02 17时07分05秒 #> >> preparing matrix with extension from (TSS-20%)~(TTS+20%)... 2023-07-02 17时07分05秒 #> >> 1 peaks(0.1536098%), having lengths smaller than 800bp, are filtered... 2023-07-02 17时07分08秒 #> >> Running bootstrapping for tag matrix... 2023-07-02 17时07分46秒
peak.anno <- AnnoPeak( peak.df = peak.df, species = "Mouse", seq.style = "UCSC", up.dist = 20000, down.dist = 20000 )
#> >> preparing features information... 2023-07-02 17时07分47秒 #> >> identifying nearest features... 2023-07-02 17时07分47秒 #> >> calculating distance from peak to TSS... 2023-07-02 17时07分48秒 #> >> assigning genomic annotation... 2023-07-02 17时07分48秒 #> >> adding gene annotation... 2023-07-02 17时07分50秒
#> 'select()' returned 1:many mapping between keys and columns
#> >> assigning chromosome lengths 2023-07-02 17时07分50秒 #> >> done... 2023-07-02 17时07分50秒
#> Warning: Removed 6 rows containing non-finite values (`stat_count()`).
# RNA-Seq data count.file <- system.file("extdata", "debchip_count.txt", package = "DEbPeak") meta.file <- system.file("extdata", "debchip_meta.txt", package = "DEbPeak") count.matrix <- read.table(file = count.file, header = TRUE, sep = "\t") meta.info <- read.table(file = meta.file, header = TRUE) # create DESeqDataSet object dds <- DESeq2::DESeqDataSetFromMatrix( countData = count.matrix, colData = meta.info, design = ~condition )
#> Warning: some variables in design formula are characters, converting to factors
# set control level dds$condition <- relevel(dds$condition, ref = "NF") # conduct differential expressed genes analysis dds <- DESeq(dds)
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing
# extract results dds.results <- results(dds, contrast = c("condition", "RX", "NF")) dds.results.ordered <- dds.results[order(dds.results$log2FoldChange, decreasing = TRUE), ] # Integrated with RNA-Seq debchip.res <- DEbPeak( de.res = dds.results.ordered, peak.res = peak.anno[["df"]], peak.anno.key = "Promoter", merge.key = "SYMBOL" )
#> Differential expression analysis with DESeq2!
# ATAC-seq data atac.peak.file <- system.file("extdata", "debatac_peaks.bed", package = "DEbPeak") atac.peak.df <- GetConsensusPeak(peak.file = atac.peak.file) atac.peak.profile <- PeakProfile(atac.peak.df, species = "Mouse", by = "gene", region.type = "body", nbin = 800)
#> >> preparing promoter regions... 2023-07-02 17时08分00秒 #> >> preparing tag matrix... 2023-07-02 17时08分00秒 #> >> preparing start_site regions by ... 2023-07-02 17时08分00秒 #> >> preparing tag matrix... 2023-07-02 17时08分00秒 #> >> generating figure... 2023-07-02 17时08分07秒
#> >> done... 2023-07-02 17时08分12秒
#> >> binning method is used...2023-07-02 17时08分14秒 #> >> preparing start_site regions by gene... 2023-07-02 17时08分14秒 #> >> preparing tag matrix by binning... 2023-07-02 17时08分14秒 #> >> Running bootstrapping for tag matrix... 2023-07-02 17时08分58秒 #> >> binning method is used...2023-07-02 17时08分58秒 #> >> preparing body regions by gene... 2023-07-02 17时08分58秒 #> >> preparing tag matrix by binning... 2023-07-02 17时08分58秒
#> Warning: GRanges object contains 1 out-of-bound range located on sequence chrX. #> Note that ranges located on a sequence whose length is unknown (NA) or #> on a circular sequence are not considered out-of-bound (use #> seqlengths() and isCircular() to get the lengths and circularity flags #> of the underlying sequences). You can use trim() to trim these ranges. #> See ?`trim,GenomicRanges-method` for more information.
#> Warning: GRanges object contains 1 out-of-bound range located on sequence chrX. #> Note that ranges located on a sequence whose length is unknown (NA) or #> on a circular sequence are not considered out-of-bound (use #> seqlengths() and isCircular() to get the lengths and circularity flags #> of the underlying sequences). You can use trim() to trim these ranges. #> See ?`trim,GenomicRanges-method` for more information.
#> >> preparing matrix with extension from (TSS-20%)~(TTS+20%)... 2023-07-02 17时08分58秒 #> >> 16 peaks(0.296077%), having lengths smaller than 800bp, are filtered... 2023-07-02 17时09分04秒 #> >> Running bootstrapping for tag matrix... 2023-07-02 17时12分02秒
atac.peak.anno <- AnnoPeak( peak.df = atac.peak.df, species = "Mouse", seq.style = "UCSC", up.dist = 20000, down.dist = 20000 )
#> >> preparing features information... 2023-07-02 17时12分03秒 #> >> identifying nearest features... 2023-07-02 17时12分03秒 #> >> calculating distance from peak to TSS... 2023-07-02 17时12分03秒 #> >> assigning genomic annotation... 2023-07-02 17时12分03秒 #> >> adding gene annotation... 2023-07-02 17时12分05秒
#> 'select()' returned 1:many mapping between keys and columns
#> >> assigning chromosome lengths 2023-07-02 17时12分06秒 #> >> done... 2023-07-02 17时12分06秒
#> Warning: Removed 23 rows containing non-finite values (`stat_count()`).
debpeak.res <- DEbCA( de.res = dds.results.ordered, chip.peak.res = peak.anno[["df"]], atac.peak.res = atac.peak.anno[["df"]], peak.anno.key = "Promoter", merge.key = "SYMBOL" )
#> Differential expression analysis with DESeq2!