main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 2, 0.75))
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 2, 0.75)),
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 2, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(10, 1.3, 1, 1, 2, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 10, 1, 1, 2, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 1, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
nintersects = 12,
text.scale = c(1.3, 1.3, 1, 1, 1, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
nintersects = 11,
text.scale = c(1.3, 1.3, 1, 1, 1, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
nintersects = 11,
text.scale = c(1.3, 1.3, 1, 1, 2, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
nintersects = 11,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 4,
line.size = 2,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3.5,
line.size = 2,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3.5,
line.size = 1.5,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
keep.order = TRUE
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
upsetDTUvsDGE = upset(upsetData,
sets = c("DTU (preg v post-preg)",
"DGE (preg v post-preg)",
"DTU (preg v pre-preg)",
"DGE (preg v pre-preg)",
"DTU (post-preg v pre-preg)",
"DGE (post-preg v pre-preg)"),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3.5,
line.size = 1.5,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3.5,
line.size = 1.5,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3.5,
line.size = 1.5,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 0.75),
point.size = 3,
line.size = 1,
number.angles = 30,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.5),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
number.angles = 10,
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
number.angles = 90,
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
nsets = 6,
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPOST DTU",
"PREGvPOST DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
##~~~UpSet plot~~~~~~~~~~~~~~####
DGEvDTU = list("PREGvPOST DTU" = DTUs$PREGvPRE,
"PREGvPOST DGE" = DEGs$PREGvPRE,
"PREGvPOST DTU" = DTUs$PREGvPOST,
"PREGvPOST DGE" = DEGs$PREGvPOST,
"POSTvPRE DTU" = DTUs$POSTvPRE,
"POSTvPRE DGE" = DEGs$POSTvPRE)
upsetData = fromList(DGEvDTU) #make upset plot dataset
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPOST DTU",
"PREGvPOST DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
##~~~UpSet plot~~~~~~~~~~~~~~####
DGEvDTU = list("PREGvPOST DTU" = DTUs$PREGvPRE,
"PREGvPOST DGE" = DEGs$PREGvPRE,
"PREGvPOST DTU" = DTUs$PREGvPOST,
"PREGvPOST DGE" = DEGs$PREGvPOST,
"POSTvPRE DTU" = DTUs$POSTvPRE,
"POSTvPRE DGE" = DEGs$POSTvPRE)
upsetData = fromList(DGEvDTU) #make upset plot dataset
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPOST DTU",
"PREGvPOST DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
##~~~UpSet plot~~~~~~~~~~~~~~####
DGEvDTU = list("PREGvPRE DTU" = DTUs$PREGvPRE,
"PREGvPRE DGE" = DEGs$PREGvPRE,
"PREGvPOST DTU" = DTUs$PREGvPOST,
"PREGvPOST DGE" = DEGs$PREGvPOST,
"POSTvPRE DTU" = DTUs$POSTvPRE,
"POSTvPRE DGE" = DEGs$POSTvPRE)
upsetData = fromList(DGEvDTU) #make upset plot dataset
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPRE DTU",
"PREGvPRE DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPRE DTU",
"PREGvPRE DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1, 1.3, 1, 1, 1.75, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
###~~PREGvPRE~~~~~~~~~~~~~~~~####
load(file = "../08_dtu_preg_v_pre/switchListAnalysed.Rdata")
upsetDTUvsDGE = upset(upsetData,
sets = c(
"PREGvPRE DTU",
"PREGvPRE DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"POSTvPRE DTU",
"POSTvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.25, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE = upset(upsetData,
sets = c(
"POSTvPRE DTU",
"POSTvPRE DGE",
"PREGvPOST DTU",
"PREGvPOST DGE",
"PREGvPRE DTU",
"PREGvPRE DGE"
),
order.by = "freq",
keep.order = TRUE,
text.scale = c(1.3, 1.3, 1, 1, 1.25, 1.2),
point.size = 3,
line.size = 1,
main.bar.color = "black",
sets.bar.color = "grey")
upsetDTUvsDGE
upsetDTUvsDGE
# Get functional gene annotation using eggnogg protein mappings
# R script
# by J.
####~load libraries~~~~~~~~~~####
library(phylotools)
library(stringr)
library(readr)
library(rstudioapi)
####~housekeeping~~~~~~~~~~~~####
rm(list=ls()) #clear the environment
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
setwd("../02_reference_data/")
###~~logfile~~~~~~~~~~~~~~~~~####
log_file=file(paste("02_get_functional_annotation_",Sys.Date(),".log",sep=""))
sink(log_file,append=TRUE,type="output")
sink(log_file,append=TRUE,type="message")
Sys.time()
####~get eggnogg annotation~~####
eggnog = read.delim("../02_reference_data/eggnog.tsv", header = FALSE)
colnames(eggnog) = eggnog[5,] #get correct column headers
eggnog = eggnog[-c(1:5),] #delete empty rows and header row
colnames(eggnog)[1] = "query" #remove hashtag from 1st column name
###~~make prot2gene~~~~~~~~~~####
gene2prot = read.delim("../02_reference_data/gene2prot.txt", header = FALSE)
names(gene2prot) = c("SYMBOL","REFSEQ")
prot2gene = gene2prot[,c("REFSEQ","SYMBOL")]
###~~add symbol to eggnogg~~~####
eggnog_symbols = merge(eggnog, prot2gene, by=1, all.x=TRUE)
####~get longest prot only~~~####
all_prot = read.fasta(file = "../02_reference_data/protein.faa")
all_prot$seq.name = gsub(" .*","",all_prot$seq.name) #remove everything but prot ID
all_prot$length = str_count(all_prot$seq.text) #make new column with str length
prot_lengths = all_prot[,c(1,3)] #make new df with lengths and IDs only
prot_lengths = merge(prot_lengths, prot2gene, by=1, all.x=TRUE)
prot_lengths = prot_lengths[order(prot_lengths$SYMBOL, -abs(prot_lengths$length)),] #order df by SYMBOL and prot lengths
prot_longbois = prot_lengths[ !duplicated(prot_lengths$SYMBOL),] #get new df with only longest prots
####~get nogs for longbois~~~####
eggnog_longest = subset(eggnog_symbols, query %in% prot_longbois$seq.name)
row.names(eggnog_longest) = eggnog_longest$SYMBOL #sets gene symbols as row names
####~map file for topGO~~~~~~####
gene2GO = eggnog_longest[,c("SYMBOL","GOs")] #make gene2GO annotation list
View(gene2GO)
View(eggnog_longest)
####~map file for topGO~~~~~~####
gene2GO = eggnog_longest[,c("SYMBOL","GOs","Preferred_name")] #make gene2GO annotation list
####~map file for topGO~~~~~~####
gene2GO = eggnog_longest[,c("SYMBOL","GOs","Preferred_name")] #make gene2GO annotation list
gene2GO$newname = ifelse(
grepl("^LOC\\d+", gene2GO$SYMBOL) & !(gene2GO$Preferred_name %in% c("","0","NA","NaN","-")),
paste(sep = "", gene2GO$Preferred_name,"_",gene2GO$SYMBOL),
gene2GO$SYMBOL
)
View(gene2GO)
gene2GO$SYMBOL = ifelse(
grepl("^LOC\\d+", gene2GO$SYMBOL) & !(gene2GO$Preferred_name %in% c("","0","NA","NaN","-")),
paste(sep = "", gene2GO$Preferred_name,"_",gene2GO$SYMBOL),
gene2GO$SYMBOL
) #change the gene annotation for LOC genes to include the eggnog predicted gene name
####~map file for topGO~~~~~~####
gene2GO = eggnog_longest[,c("SYMBOL","GOs","Preferred_name")] #make gene2GO annotation list
gene2GO$SYMBOL = ifelse(
grepl("^LOC\\d+", gene2GO$SYMBOL) & !(gene2GO$Preferred_name %in% c("","0","NA","NaN","-")),
paste(sep = "", gene2GO$Preferred_name,"_",gene2GO$SYMBOL),
gene2GO$SYMBOL
) #change the gene annotation for LOC genes to include the eggnog predicted gene name
gene2GO = gene2GO[-3,]
gene2GO = gene2GO[,-3]
####~map file for topGO~~~~~~####
gene2GO = eggnog_longest[,c("SYMBOL","GOs","Preferred_name")] #make gene2GO annotation list
gene2GO$SYMBOL = ifelse(
grepl("^LOC\\d+", gene2GO$SYMBOL) & !(gene2GO$Preferred_name %in% c("","0","NA","NaN","-")),
paste(sep = "", gene2GO$Preferred_name,"_",gene2GO$SYMBOL),
gene2GO$SYMBOL
) #change the gene annotation for LOC genes to include the eggnog predicted gene name
gene2GO = gene2GO[,-3] #remove the extraneous "Preferred_name" column
write_tsv(gene2GO, file = "../02_reference_data/gene2GO.map", col_names = FALSE)
