# pathway analysis script
# by J.

####~load libraries~~~~~~~~~~####
library(rstudioapi)
library(topGO)
library(genefilter)
library(Rgraphviz)
library(readr)
library(svglite)
library(ggplot2)

####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #set wd to "Scripts" folder
output = "../07_dge_pathway_analysis"
dir.create(output) 
setwd(output) #set wd to output folder
GO_file="../02_reference_data/gene2GO.map"
analyses = c("PREGvNON","PREGvPRE","PREGvPOST","POSTvPRE")
count = 00

###~~logfile~~~~~~~~~~~~~~~~~####
log_file=file(paste(Sys.time(),".log",sep=""))
sink(log_file,append=TRUE,type="output",split = TRUE)

####~start loop~~~~~~~~~~~~~~####
for (analysis in analyses) {

  ####~make directory~~~~~~~~~~####
  count = count+1
  if (count < 10) {
    anal_dir=paste("0",count,"_",analysis,sep = "") #if the analysis' index is less than 10, add a trailing 0
  } else {
    anal_dir=paste(count,"_",analysis,sep = "") 
  } 
  dir.create(anal_dir)
  setwd(anal_dir)
  
  ####~load data~~~~~~~~~~~~~~~####
  ###~~gene lists~~~~~~~~~~~~~~####
  gene_universe = read_lines(paste("../../06_dge_analysis/",anal_dir,"/gene_universe.txt",sep = ""))
  gene_lists = list(
    sig_genes = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig.txt",sep = "")),
    sig_up = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig_up.txt",sep = "")),
    sig_down = read_lines(paste("../../06_dge_analysis/",anal_dir,"/genes_sig_down.txt",sep = ""))
  ) #get lists of genes
  
  ###~~make gene2GO~~~~~~~~~~~~####
  gene2GO = readMappings(paste("../",GO_file,sep=""))
  
  ####~make geneLists~~~~~~~~~~####
  makeMyGeneLists = function(gene_list) {
    geneList = factor(as.integer(gene_universe %in% gene_list)) #gets factor vector
    names(geneList) = gene_universe
    geneList
  } #get "geneList" objects for topGO...
  
  myGeneLists = lapply(gene_lists, makeMyGeneLists) #...from lists of genes
  
  ####~make topGOdata~~~~~~~~~~####
  makeMyTGDOs = function(geneList) {
    GOdata = new("topGOdata", 
                 ontology = "BP", 
                 description = paste("GO term analysis (BP)", analysis, sep = ""),
                 allGenes = geneList,
                 annot = annFUN.gene2GO,
                 gene2GO = gene2GO)
  } #get topGO data objects...
  
  myTGDOs = lapply(myGeneLists, makeMyTGDOs) #...from geneList objects

  ####~enrichment tests~~~~~~~~~~####
  myVastRiches = function(name, GOdata) {
    ##~~~Fisher classic~~~~~~~~~~####
    resultFisher = runTest(GOdata, algorithm = "classic", statistic = "fisher") #get test result
    resultFisher.stats = geneData(resultFisher) #get stats for the test - sig GO terms is no. 4 here
    
    ##~~~Fisher weight~~~~~~~~~~~####
    resultWeight = runTest(GOdata, algorithm = "weight", statistic = "fisher")
    resultWeight.stats = geneData(resultWeight)
    resultWeight
    
    ##~~~Fisher elim~~~~~~~~~~~~~####
    resultElim = runTest(GOdata, algorithm = "elim", statistic = "fisher")
    resultElim.stats = geneData(resultElim)
    resultElim
    
    ##~~~KS elim~~~~~~~~~~~~~~~~~####
    resultElimKS = runTest(GOdata, algorithm = "elim", statistic = "ks")
    resultElimKS.stats = geneData(resultElimKS)
    resultElimKS
    
    ###~~all significant nodes~~~####
    GO_sig = GenTable(GOdata, 
                      weight = resultWeight,
                      classic = resultFisher, 
                      elim = resultElim,
                      elim_ks = resultElimKS,
                      orderBy = "weight", 
                      ranksOf = "weight", 
                      topNodes = resultFisher.stats[4],
                      numChar = 1000) 
    write_csv(GO_sig, paste("topGO_",name,".csv",sep = ""))
    
    ###~~nodes graphs~~~~~~~~~~~~####
    ##~~~classic~~~~~~~~~~~~~~~~~####
    printGraph(GOdata, 
               resultFisher, 
               firstSigNodes = 5, 
               fn.prefix = name, 
               useInfo = "all", 
               pdfSW = TRUE)
    
    ##~~~~weight~~~~~~~~~~~~~~~~~####
    printGraph(GOdata,
               resultWeight,
               firstSigNodes = 5,
               fn.prefix = name,
               useInfo = "all",
               pdfSW = TRUE)
    
    ##~~~~elim~~~~~~~~~~~~~~~~~~~####
    printGraph(GOdata,
               resultElim,
               firstSigNodes = 5,
               fn.prefix = name,
               useInfo = "all",
               pdfSW = TRUE)
    
    ###~~print file~~~~~~~~~~~~####
    sink(paste("GO_enrichment_tests_",name,".txt",sep=""))
    print("Details of topGOdata object")
    print(GOdata) #print summary of GOdata object
    print("Results summary:")
    print(resultFisher)
    print(resultWeight)
    print(resultElim)
    print(resultElimKS)
    sink()
  }
  
  names = names(myTGDOs) #get names for each part of analysis - can put your own here if you want
  mapply(myVastRiches, name = names, GOdata = myTGDOs)

  ####~end of loop~~~~~~~~~~~~~####
  setwd("..")
}

####~fin~~~~~~~~~~~~~~~~~~~~~####
closeAllConnections()
