print(GOdata) #print summary of GOdata object
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
####~fin~~~~~~~~~~~~~~~~~~~~~####
closeAllConnections()
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = gene_list,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = gene_list,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = gene_list,
useInfo = "all",
pdfSW = TRUE)
resultElimKS
gene_lists = list(
sig_genes = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig.txt",sep = "")),
sig_up = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig_up.txt",sep = "")),
sig_down = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig_down.txt",sep = ""))
)
# pathway analysis script
# by J.
####~load libraries~~~~~~~~~~####
library(rstudioapi)
library(topGO)
library(genefilter)
library(Rgraphviz)
library(readr)
library(svglite)
library(ggplot2)
####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #set wd to "Scripts" folder
output = "../07_dge_pathway_analysis"
dir.create(output)
setwd(output) #set wd to output folder
GO_file="../02_reference_data/gene2GO.map"
analyses = c("PREGvNON","PREGvPRE","PREGvPOST","POSTvPRE")
count = 00
###~~logfile~~~~~~~~~~~~~~~~~####
log_file=file(paste(Sys.time(),".log",sep=""))
sink(log_file,append=TRUE,type="output",split = TRUE)
analysis = "PREGvNON"
####~make directory~~~~~~~~~~####
count = count+1
if (count < 10) {
anal_dir=paste("0",count,"_",analysis,sep = "") #if the analysis' index is less than 10, add a trailing 0
} else {
anal_dir=paste(count,"_",analysis,sep = "")
}
dir.create(anal_dir)
###~~gene lists~~~~~~~~~~~~~~####
gene_universe = read_lines(paste("../06_dge_analysis/",anal_dir,"/gene_universe.txt",sep = ""))
gene_lists = list(
sig_genes = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig.txt",sep = "")),
sig_up = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig_up.txt",sep = "")),
sig_down = read_lines(paste("../06_dge_analysis/",anal_dir,"/genes_sig_down.txt",sep = ""))
)
myGeneLists = lapply(gene_lists, makeMyGeneLists)
makeMyGeneLists = function(gene_list) {
geneList = factor(as.integer(gene_universe %in% eval(as.name(gene_list)))) #gets factor vector
names(geneList) = gene_universe
}
myGeneLists = lapply(gene_lists, makeMyGeneLists)
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/08_dtu_PREGvPRE_part_ii.R")
pvhFisher = hist(pvalFisher, 50, xlab = "p-values")
ggsave("pvh_fisher.svg", plot = pvhFisher)
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/08_dtu_PREGvPRE_part_ii.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/10_dtu_PREGvPOST_part_ii.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/12_dtu_POSTvPRE_part_ii.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/12_dtu_POSTvPRE_part_ii.R")
####~load libraries~~~~~~~~~~####
library(rstudioapi)
library(topGO)
library(genefilter)
library(Rgraphviz)
library(readr)
library(svglite)
library(ggplot2)
####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #set wd to "Scripts" folder
output = "../07_dge_pathway_analysis"
dir.create(output)
setwd(output) #set wd to output folder
GO_file="../02_reference_data/gene2GO.map"
analyses = c("PREGvNON","PREGvPRE","PREGvPOST","POSTvPRE")
count = 00
###~~logfile~~~~~~~~~~~~~~~~~####
log_file=file(paste(Sys.time(),".log",sep=""))
sink(log_file,append=TRUE,type="output",split = TRUE)
analysis="PREGvNON"
####~load data~~~~~~~~~~~~~~~####
###~~gene lists~~~~~~~~~~~~~~####
gene_universe = read_lines(paste("../06_dge_analysis/",anal_dir,"/gene_universe.txt",sep = ""))
####~make directory~~~~~~~~~~####
count = count+1
if (count < 10) {
anal_dir=paste("0",count,"_",analysis,sep = "") #if the analysis' index is less than 10, add a trailing 0
} else {
anal_dir=paste(count,"_",analysis,sep = "")
}
dir.create(anal_dir)
setwd(anal_dir)
####~load data~~~~~~~~~~~~~~~####
###~~gene lists~~~~~~~~~~~~~~####
gene_universe = read_lines(paste("../../06_dge_analysis/",anal_dir,"/gene_universe.txt",sep = ""))
gene_lists = list(
sig_genes = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig.txt",sep = "")),
sig_up = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig_up.txt",sep = "")),
sig_down = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig_down.txt",sep = ""))
)
###~~make gene2GO~~~~~~~~~~~~####
gene2GO = readMappings(paste("..",GO_file,sep=""))
###~~make gene2GO~~~~~~~~~~~~####
gene2GO = readMappings(paste("../",GO_file,sep=""))
####~make geneLists~~~~~~~~~~####
makeMyGeneLists = function(gene_list) {
geneList = factor(as.integer(gene_universe %in% gene_list)) #gets factor vector
names(geneList) = gene_universe
}
myGeneLists = lapply(gene_lists, makeMyGeneLists)
View(myGeneLists)
####~make geneLists~~~~~~~~~~####
makeMyGeneLists = function(gene_list) {
geneList = factor(as.integer(gene_universe %in% gene_list)) #gets factor vector
names(geneList) = gene_universe
geneList
}
myGeneLists = lapply(gene_lists, makeMyGeneLists)
####~make topGOdata~~~~~~~~~~####
makeMyTGDOs = function(geneList) {
GOdata = new("topGOdata",
ontology = "BP",
description = paste("GO term analysis (BP)", analysis, sep = ""),
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = gene2GO)
}
myTGDOs = lapply(myGeneLists, makeMyTGDOs)
View(myTGDOs)
####~enrichment tests~~~~~~~~~~####
myVastRiches = function(GOdata) {
##~~~Fisher classic~~~~~~~~~~####
resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
##~~~Fisher weight~~~~~~~~~~~####
resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
resultWeight.stats = geneData(resultWeight)
resultWeight
##~~~Fisher elim~~~~~~~~~~~~~####
resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
resultElim.stats = geneData(resultElim)
resultElim
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
resultElimKS.stats = geneData(resultElimKS)
resultElimKS
###~~all significant nodes~~~####
GO_sig = GenTable(GOdata,
weight = resultWeight,
classic = resultFisher,
elim = resultElim,
elim_ks = resultElimKS,
orderBy = "weight",
ranksOf = "weight",
topNodes = resultFisher.stats[4],
numChar = 1000)
write_csv(GO_sig, paste("topGO_sig",name(GOdata),".csv",sep = ""))
###~~nodes graphs~~~~~~~~~~~~####
##~~~classic~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultFisher,
firstSigNodes = 5,
fn.prefix = name(GOdata),
useInfo = "all",
pdfSW = TRUE)
##~~~~weight~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultWeight,
firstSigNodes = 5,
fn.prefix = name(GOdata),
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = name(GOdata),
useInfo = "all",
pdfSW = TRUE)
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = paste(name(GOdata),"_KS",sep = ""),
useInfo = "all",
pdfSW = TRUE)
###~~print file~~~~~~~~~~~~####
sink("GO_enrichment_tests.txt")
print("Details of topGOdata object")
print(GOdata) #print summary of GOdata object
print("Results summary:")
print(resultFisher)
print(resultWeight)
print(resultElim)
print(resultElimKS)
sink()
}
lapply(myTGDOs, myVastRiches)
names(myTGDOs)
mapply(myVastRiches, names(myTGDOs), myTGDOs)
names = names(myTGDOs)
mapply(myVastRiches, name = names, GOdata = myTGDOs)
mapply(myVastRiches, GOdata = myTGDOs)
####~enrichment tests~~~~~~~~~~####
myVastRiches = function(name, GOdata) {
##~~~Fisher classic~~~~~~~~~~####
resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
##~~~Fisher weight~~~~~~~~~~~####
resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
resultWeight.stats = geneData(resultWeight)
resultWeight
##~~~Fisher elim~~~~~~~~~~~~~####
resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
resultElim.stats = geneData(resultElim)
resultElim
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
resultElimKS.stats = geneData(resultElimKS)
resultElimKS
###~~all significant nodes~~~####
GO_sig = GenTable(GOdata,
weight = resultWeight,
classic = resultFisher,
elim = resultElim,
elim_ks = resultElimKS,
orderBy = "weight",
ranksOf = "weight",
topNodes = resultFisher.stats[4],
numChar = 1000)
write_csv(GO_sig, paste("topGO_sig",name(GOdata),".csv",sep = ""))
###~~nodes graphs~~~~~~~~~~~~####
##~~~classic~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultFisher,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~weight~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultWeight,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = paste(name,"_KS",sep = ""),
useInfo = "all",
pdfSW = TRUE)
###~~print file~~~~~~~~~~~~####
sink("GO_enrichment_tests.txt")
print("Details of topGOdata object")
print(GOdata) #print summary of GOdata object
print("Results summary:")
print(resultFisher)
print(resultWeight)
print(resultElim)
print(resultElimKS)
sink()
}
names = names(myTGDOs)
mapply(myVastRiches, name = names, GOdata = myTGDOs)
####~enrichment tests~~~~~~~~~~####
myVastRiches = function(name, GOdata) {
##~~~Fisher classic~~~~~~~~~~####
resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
##~~~Fisher weight~~~~~~~~~~~####
resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
resultWeight.stats = geneData(resultWeight)
resultWeight
##~~~Fisher elim~~~~~~~~~~~~~####
resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
resultElim.stats = geneData(resultElim)
resultElim
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
resultElimKS.stats = geneData(resultElimKS)
resultElimKS
###~~all significant nodes~~~####
GO_sig = GenTable(GOdata,
weight = resultWeight,
classic = resultFisher,
elim = resultElim,
elim_ks = resultElimKS,
orderBy = "weight",
ranksOf = "weight",
topNodes = resultFisher.stats[4],
numChar = 1000)
write_csv(GO_sig, paste("topGO_sig",name,".csv",sep = ""))
###~~nodes graphs~~~~~~~~~~~~####
##~~~classic~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultFisher,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~weight~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultWeight,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = paste(name,"_KS",sep = ""),
useInfo = "all",
pdfSW = TRUE)
###~~print file~~~~~~~~~~~~####
sink("GO_enrichment_tests.txt")
print("Details of topGOdata object")
print(GOdata) #print summary of GOdata object
print("Results summary:")
print(resultFisher)
print(resultWeight)
print(resultElim)
print(resultElimKS)
sink()
}
mapply(myVastRiches, name = names, GOdata = myTGDOs)
##~~~~elim KS~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElimKS,
firstSigNodes = 5,
fn.prefix = paste(name,"_KS",sep = ""),
useInfo = "all",
pdfSW = TRUE)
mapply(myVastRiches, name = names, GOdata = myTGDOs)
####~enrichment tests~~~~~~~~~~####
myVastRiches = function(name, GOdata) {
##~~~Fisher classic~~~~~~~~~~####
resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
##~~~Fisher weight~~~~~~~~~~~####
resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
resultWeight.stats = geneData(resultWeight)
resultWeight
##~~~Fisher elim~~~~~~~~~~~~~####
resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
resultElim.stats = geneData(resultElim)
resultElim
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
resultElimKS.stats = geneData(resultElimKS)
resultElimKS
###~~all significant nodes~~~####
GO_sig = GenTable(GOdata,
weight = resultWeight,
classic = resultFisher,
elim = resultElim,
elim_ks = resultElimKS,
orderBy = "weight",
ranksOf = "weight",
topNodes = resultFisher.stats[4],
numChar = 1000)
write_csv(GO_sig, paste("topGO_sig",name,".csv",sep = ""))
###~~nodes graphs~~~~~~~~~~~~####
##~~~classic~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultFisher,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~weight~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultWeight,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
###~~print file~~~~~~~~~~~~####
sink("GO_enrichment_tests.txt")
print("Details of topGOdata object")
print(GOdata) #print summary of GOdata object
print("Results summary:")
print(resultFisher)
print(resultWeight)
print(resultElim)
print(resultElimKS)
sink()
}
mapply(myVastRiches, name = names, GOdata = myTGDOs)
####~fin~~~~~~~~~~~~~~~~~~~~~####
closeAllConnections()
mapply(myVastRiches, name = names, GOdata = myTGDOs)
####~enrichment tests~~~~~~~~~~####
myVastRiches = function(name, GOdata) {
##~~~Fisher classic~~~~~~~~~~####
resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
##~~~Fisher weight~~~~~~~~~~~####
resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
resultWeight.stats = geneData(resultWeight)
resultWeight
##~~~Fisher elim~~~~~~~~~~~~~####
resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
resultElim.stats = geneData(resultElim)
resultElim
##~~~KS elim~~~~~~~~~~~~~~~~~####
resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
resultElimKS.stats = geneData(resultElimKS)
resultElimKS
###~~all significant nodes~~~####
GO_sig = GenTable(GOdata,
weight = resultWeight,
classic = resultFisher,
elim = resultElim,
elim_ks = resultElimKS,
orderBy = "weight",
ranksOf = "weight",
topNodes = resultFisher.stats[4],
numChar = 1000)
write_csv(GO_sig, paste("topGO_sig",name,".csv",sep = ""))
###~~nodes graphs~~~~~~~~~~~~####
##~~~classic~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultFisher,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~weight~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultWeight,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
##~~~~elim~~~~~~~~~~~~~~~~~~~####
printGraph(GOdata,
resultElim,
firstSigNodes = 5,
fn.prefix = name,
useInfo = "all",
pdfSW = TRUE)
###~~print file~~~~~~~~~~~~####
sink("GO_enrichment_tests.txt")
print("Details of topGOdata object")
print(GOdata) #print summary of GOdata object
print("Results summary:")
print(resultFisher)
print(resultWeight)
print(resultElim)
print(resultElimKS)
sink()
}
names = names(myTGDOs) #get names for each part of analysis - can put your own here if you want
mapply(myVastRiches, name = names, GOdata = myTGDOs)
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
source("~/Library/CloudStorage/Dropbox/Elmer Lab/Projects/Zootoca_WV_pregnancy_RNAseq/02_data/02_bulk_RNAseq/01_scripts/06_pathway_analysis.R")
####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #set wd to Scripts folder
