# pathway analysis script
# by J.

####~load libraries~~~~~~~~~~####
library(topGO)
library(genefilter)
library(Rgraphviz)
library(readr)
library(svglite)

####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #set wd to "Scripts" folder

###~~set output directory~~~~####
output = "../10_wgcna_pathway_analysis/"
dir.create(output)
setwd(output)

###~~specify modules~~~~~~~~~####
modules = c("yellow","black","pink","red","brown","turquoise","cyan","midnightblue",
            "salmon","greenyellow","purple")

####~specify data~~~~~~~~~~~~####
em_file = "../04_edger/cpm.csv"
wgcna_folder = "../09_wgcna/" #specify folder with module gene lists
GO_annot_file = "../02_reference_data/gene2GO.map" #gene to GO mapping file

###~~logfile~~~~~~~~~~~~~~~~~####
log_file=file(paste("09_variance_partition_",Sys.Date(),".log",sep=""))
sink(log_file,append=TRUE,type="output",split=TRUE)

####~load data~~~~~~~~~~~~~~~####
em = read.csv(em_file, row.names = 1)
all_genes = row.names(em) #get gene universe from expression matrix rownames
gene2GO = readMappings(GO_annot_file)

####~start loop~~~~~~~~~~~~~~####
for (module in modules) {
  dir.create(module)
  setwd(module)
  sig_genes_file = paste("../",wgcna_folder,"genes-",module,".txt",sep="")
  sig_genes = read_lines(sig_genes_file) #get vector with sig genes
  
####~make topGOdata objects~~####
###~~make geneLists~~~~~~~~~~####
geneList_all = factor(as.integer(all_genes %in% sig_genes)) #gets factor vector
names(geneList_all) = all_genes

###~~make topGOdata~~~~~~~~~~####
GOdata_all = new("topGOdata", 
             ontology = "BP", 
             description = paste("GOtermanalysis (BP) ", module, sep = ""),
             allGenes = geneList_all,
             annot = annFUN.gene2GO,
             gene2GO = gene2GO)

####~enrichment testing~~~~~~####
myVastRiches = function(name, GOdata) {
  # Fisher classic
  resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
  resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
  
  # Fisher weight
  resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
  resultWeight.stats = geneData(resultWeight)
  resultWeight
  
  # Fisher elim
  resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
  resultElim.stats = geneData(resultElim)
  resultElim
  
  # KS elim
  resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
  resultElimKS.stats = geneData(resultElimKS)
  resultElimKS
  
  # all significant nodes
  GO_sig = GenTable(GOdata, 
                    weight = resultWeight,
                    classic = resultFisher, 
                    elim = resultElim,
                    elim_ks = resultElimKS,
                    orderBy = "weight", 
                    ranksOf = "weight", 
                    topNodes = resultFisher.stats[4],
                    numChar = 1000) 
  write_csv(GO_sig, paste("topGO_",name,".csv",sep = ""))
  
  # nodes graphs
  # classic
  printGraph(GOdata, 
             resultFisher, 
             firstSigNodes = 5, 
             fn.prefix = name, 
             useInfo = "all", 
             pdfSW = TRUE)
  
  # weight
  printGraph(GOdata,
             resultWeight,
             firstSigNodes = 5,
             fn.prefix = name,
             useInfo = "all",
             pdfSW = TRUE)
  
  # elim
  printGraph(GOdata,
             resultElim,
             firstSigNodes = 5,
             fn.prefix = name,
             useInfo = "all",
             pdfSW = TRUE)
  
  # print file
  sink(paste("GO_enrichment_tests_",name,".txt",sep=""))
  print("Details of topGOdata object")
  print(GOdata) #print summary of GOdata object
  print("Results summary:")
  print(resultFisher)
  print(resultWeight)
  print(resultElim)
  print(resultElimKS)
  sink()
}

myVastRiches(name = module, GOdata = GOdata_all)

####~close loop~~~~~~~~~~~~~~####
setwd("..")
}

####~fin~~~~~~~~~~~~~~~~~~~~~####
closeAllConnections()
