# Load packages ---- library(tidyverse) library(data.table) library(vegan) library(qvalue) library(psych) library(adegenet) # Load data ---- setwd("~/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/07.Selection_analyses/RDA/plink_files") gen <- read.table("combined.imputed12.raw", header = TRUE, row.names = 1) gen <- gen[,-c(1:5)] # remove first 5 columns gen <- gen[ , colSums(is.na(gen)) == 0] # remove a column with missing data setwd("~/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/07.Selection_analyses/RDA/") env <- read.csv("lakes_env.csv",header=TRUE,row.names = 1) rownames(gen) <- rownames(env) #Run RDA conditioned by lake and lineage: lakes.rda1 <- rda(gen ~ Lake_type + System, data = env, scale = TRUE) # Testing significance of predictors: vif.cca(lakes.rda1) RsquareAdj(lakes.rda1) summary(eigenvals(lakes.rda1, model = "constrained")) screeplot(lakes.rda1) signif.full <- anova.cca(lakes.rda1, parallel = 4, permutations = 999) # full model signif.full signif.axis <- anova.cca(lakes.rda1, by="axis", parallel = 4, permutations = 999) # per variable signif.axis ##Plotting RDA: cols <- c("Allt na lairige"="#a4dede","Carron"="#008396","Eck"="gray60","Glashan"="#9b2915","Lomond"="gray60","Shira"="#287c71", "Sloy"="#6c966f","Tarsan"="#fe7c73") lake <- env$Lake bg <- c("#1B9E77","#1B9E77","#D95F02","#1B9E77","#D95F02","#1B9E77","#1B9E77","#1B9E77") syst <- env$System shape <- c(21,24) # axes 1 & 2 plot(lakes.rda1, type="n", scaling=3) points(lakes.rda1, display="species", pch=20, cex=0.7, col="gray32", scaling=3) # the SNPs points(lakes.rda1, display="sites", pch=shape[syst], cex=4, col="gray32", scaling=3, bg=bg[lake]) # the wolves text(lakes.rda1, scaling=3, display="bp", col="#0868ac", cex=1) # the predictors legend("bottomright", legend=levels(lake), bty="n", col="gray32", pch=21, cex=1, pt.bg=bg) # Identify candidate SNPs involved in local adaptation: # RDA1 lakes.rda1_scores <- vegan::scores(lakes.rda1, choices = 1:3, display = "species") lakes.rda1_scores <- as.data.frame(lakes.rda1_scores) lakes.rda1_scores$zscore1 <- ((lakes.rda1_scores$RDA1 - (mean(lakes.rda1_scores$RDA1)))/sd(lakes.rda1_scores$RDA1)) head(lakes.rda1_scores) lakes.rda1_scores$zscore2 <- ((lakes.rda1_scores$RDA2 - (mean(lakes.rda1_scores$RDA2)))/sd(lakes.rda1_scores$RDA2)) head(lakes.rda1_scores) write.csv(lakes.rda1_scores, "RDA_loadings_lakes_RDAzscore_laketype.csv") outliers <- lakes.rda1_scores %>% rownames_to_column('SNP') %>% filter(zscore2 >= 2.5 | zscore2 <= -2.5) %>% column_to_rownames('SNP') outliers <- setDT(outliers, keep.rownames = TRUE)[] outliers$rn <- parse_number(outliers$rn) outliers$rn <- sub("\\..*", "", outliers$rn) outliers <- rename(outliers, Locus = rn) write.csv(outliers, "RDA_laketype_outliers.csv", row.names = FALSE)