Integrate Differential Expression Results with Peak Annotation/Differential Expression Results.

DEbPeak(
  de.res,
  peak.res,
  peak.mode = c("consensus", "diff"),
  peak.anno.key = c("Promoter", "5' UTR", "3' UTR", "Exon", "Intron", "Downstream",
    "Distal Intergenic", "All"),
  signif = "padj",
  signif.threshold = 0.05,
  l2fc.threshold = 1,
  label.key = NULL,
  peak.signif = "padj",
  peak.signif.threshold = 0.05,
  peak.l2fc.threshold = 1,
  merge.key = c("geneId", "ENSEMBL", "SYMBOL"),
  species = c("Human", "Mouse", "Rat", "Fly", "Arabidopsis", "Yeast", "Zebrafish",
    "Worm", "Bovine", "Pig", "Chicken", "Rhesus", "Canine", "Xenopus", "Anopheles",
    "Chimp", "E coli strain Sakai", "Myxococcus xanthus DK 1622"),
  enhancer = FALSE,
  seq.style = "Ensembl",
  gtf.file = NULL,
  dis.threshold = 250000,
  n.cores = 1
)

Arguments

de.res	Dataframe contains all genes of differential expression analysis.
peak.res	Dataframe contains all peak annotation (`peak.mode` is consensus) or differential analysis results of peak-related data (`peak.mode` is diff).
peak.mode	The source of peak results, choose from consensus (peak annotation) and diff (differential analysis). Default: consensus.
peak.anno.key	Peak location, chosen from "Promoter", "5' UTR", "3' UTR", "Exon", "Intron", "Downstream", "Distal Intergenic","All". Used when `peak.mode` is consensus. Default: "Promoter".
signif	Significance criterion for RNA-seq results. For DESeq2 results, can be chosen from padj, pvalue. For edgeR results, can be chosen from FDR, PValue. Default: padj.
signif.threshold	Significance threshold for RNA-seq to get differentially expressed genes. Default: 0.05.
l2fc.threshold	Log2 fold change threshold for RNA-seq to get differentially expressed genes. Default: 1.
label.key	Which column to use as label. Default: NULL (use rownames of `de.res`).
peak.signif	Significance criterion for peak-associated results. For DESeq2 results, can be chosen from padj, pvalue. For edgeR results, can be chosen from FDR, PValue. Default: padj.
peak.signif.threshold	Significance threshold for peak-associated results to get differentially accessible/binding peaks. Default: 0.05.
peak.l2fc.threshold	Log2 fold change threshold for peak-associated results to get differentially accessible/binding peaks. Default: 1.
merge.key	The columns used for merging, chosen from geneId (ENTREZID), ENSEMBL, SYMBOL. Default: geneId.
species	Species used, chosen from "Human","Mouse","Rat","Fly","Arabidopsis", "Yeast","Zebrafish","Worm","Bovine","Pig","Chicken","Rhesus", "Canine","Xenopus","Anopheles","Chimp","E coli strain Sakai","Myxococcus xanthus DK 1622". Default: "Human".
enhancer	Logical value. Whether the data is enhancer-related data. Default: FALSE.
seq.style	The style of sequence, chosen from UCSC, NCBI, Ensembl, None. This should be compatible with the genome and gtf file you used to generate count matrix and peak files. Default: "UCSC".
gtf.file	GTF file used to create TxDb object. Useful when specie you used is not available in `species`. Default: NULL.
dis.threshold	Distance threshold. Default: 250000.
n.cores	The number of cores to be used for this job. Default:1.

Value

Dataframe contains integrated results. The results will contain five categories: UPbPeak, DOWNbPeak, UP, DOWN, Peak when peak.mode is consensus and Down_Up, Up_Up, Down_Down, Up_Down when peak.mode is diff. RNA in front.

Examples

library(DEbPeak)
library(DESeq2)
library(openxlsx)
### consensus mode
# ChIP-Seq data
peak.file <- system.file("extdata", "debchip_peaks.bed", package = "DEbPeak")
peak.df <- GetConsensusPeak(peak.file = peak.file)
peak.profile <- PeakProfile(peak.df, species = "Mouse", by = "gene", region.type = "body", nbin = 800)
#> >> preparing promoter regions...	 2023-07-02 17时12分20秒 
#> >> preparing tag matrix...		 2023-07-02 17时12分20秒 
#> >> preparing start_site regions by ... 2023-07-02 17时12分20秒
#> >> preparing tag matrix...  2023-07-02 17时12分20秒 
#> >> generating figure...		 2023-07-02 17时12分28秒 
#> >> done...			 2023-07-02 17时12分28秒 
#> >> binning method is used...2023-07-02 17时12分28秒
#> >> preparing start_site regions by gene... 2023-07-02 17时12分28秒
#> >> preparing tag matrix by binning...  2023-07-02 17时12分28秒 
#> >> Running bootstrapping for tag matrix...		 2023-07-02 17时12分37秒 
#> >> binning method is used...2023-07-02 17时12分37秒
#> >> preparing body regions by gene... 2023-07-02 17时12分37秒
#> >> preparing tag matrix by binning...  2023-07-02 17时12分37秒 
#> >> preparing matrix with extension from (TSS-20%)~(TTS+20%)... 2023-07-02 17时12分37秒
#> >> 1 peaks(0.1536098%), having lengths smaller than 800bp, are filtered... 2023-07-02 17时12分40秒
#> >> Running bootstrapping for tag matrix...		 2023-07-02 17时13分21秒 
peak.anno <- AnnoPeak(
  peak.df = peak.df, species = "Mouse",
  seq.style = "UCSC", up.dist = 20000, down.dist = 20000
)
#> >> preparing features information...		 2023-07-02 17时13分22秒 
#> >> identifying nearest features...		 2023-07-02 17时13分22秒 
#> >> calculating distance from peak to TSS...	 2023-07-02 17时13分22秒 
#> >> assigning genomic annotation...		 2023-07-02 17时13分22秒 
#> >> adding gene annotation...			 2023-07-02 17时13分25秒 
#> 'select()' returned 1:many mapping between keys and columns
#> >> assigning chromosome lengths			 2023-07-02 17时13分25秒 
#> >> done...					 2023-07-02 17时13分25秒 
#> Warning: Removed 6 rows containing non-finite values (`stat_count()`).
# RNA-Seq data
count.file <- system.file("extdata", "debchip_count.txt", package = "DEbPeak")
meta.file <- system.file("extdata", "debchip_meta.txt", package = "DEbPeak")
count.matrix <- read.table(file = count.file, header = TRUE, sep = "\t")
meta.info <- read.table(file = meta.file, header = TRUE)
# create DESeqDataSet object
dds <- DESeq2::DESeqDataSetFromMatrix(
  countData = count.matrix, colData = meta.info,
  design = ~condition
)
#> Warning: some variables in design formula are characters, converting to factors
# set control level
dds$condition <- relevel(dds$condition, ref = "NF")
# conduct differential expressed genes analysis
dds <- DESeq(dds)
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing
# extract results
dds.results <- results(dds, contrast = c("condition", "RX", "NF"))
dds.results.ordered <- dds.results[order(dds.results$log2FoldChange, decreasing = TRUE), ]
# Integrated with RNA-Seq
debchip.res <- DEbPeak(
  de.res = dds.results.ordered, peak.res = peak.anno[["df"]],
  peak.anno.key = "Promoter", merge.key = "SYMBOL"
)
#> Differential expression analysis with DESeq2!
### diff mode
# ATAC-Seq data
peak.matrix.file <- system.file("extdata", "RA_ATAC_count.txt", package = "DEbPeak")
peak.meta.file <- system.file("extdata", "RA_ATAC_meta.txt", package = "DEbPeak")
peak.matrix <- read.table(file = peak.matrix.file, header = TRUE, sep = "\t")
peak.meta <- read.table(file = peak.meta.file, header = TRUE)
dds.peak <- DESeq2::DESeqDataSetFromMatrix(
  countData = peak.matrix, colData = peak.meta,
  design = ~condition
)
#> Warning: some variables in design formula are characters, converting to factors
# set control level
dds.peak$condition <- relevel(dds.peak$condition, ref = "WT")
# conduct differential expressed genes analysis
dds.peak <- DESeq(dds.peak)
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> -- note: fitType='parametric', but the dispersion trend was not well captured by the
#>    function: y = a/x + b, and a local regression fit was automatically substituted.
#>    specify fitType='local' or 'mean' to avoid this message next time.
#> final dispersion estimates
#> fitting model and testing
# extract results
dds.peak.results <- results(dds.peak, contrast = c("condition", "KO", "WT"))
dds.peak.results.ordered <- dds.peak.results[order(dds.peak.results$log2FoldChange, decreasing = TRUE), ]
# RNA-seq results
# rna.diff.file <- system.file("extdata", "RA_RNA_diff.xlsx", package = "DEbPeak")
rna.diff.file <- system.file("extdata", "RA_RNA_diff.txt", package = "DEbPeak")
# rna.diff <- openxlsx::read.xlsx(xlsxFile = rna.diff.file, rowNames = T, check.names = FALSE)
rna.diff <- read.table(file = rna.diff.file, header = TRUE, sep = "\t")
# integrate differential expression results of RNA-seq and ATAC-seq
debatac.res <- DEbPeak(
  de.res = rna.diff, peak.res = dds.peak.results.ordered, peak.mode = "diff", peak.anno.key = "All", l2fc.threshold = 0,
  peak.l2fc.threshold = 0, species = "Mouse", merge.key = "SYMBOL"
)
#> Differential expression analysis with DESeq2!
#> 'select()' returned 1:many mapping between keys and columns
#> Warning: 1.63% of input gene IDs are fail to map...
#> Differential expression analysis with DESeq2!
#> 'select()' returned 1:many mapping between keys and columns
#> Warning: 13.86% of input gene IDs are fail to map...