# Allele frequency of outlier SNPs ---- theme.plot <- function() { theme_bw() + theme(panel.background = element_rect(colour="black",fill="white", size=1), axis.text = element_text(size=12, color = "black"), axis.ticks = element_line(size = 0.5, colour = "black"), axis.ticks.length = unit(3, "mm"), axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30), axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), legend.title = element_blank(), legend.text = element_blank(), legend.position = "none") } setwd("~/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/06.Population_genetics/combined/vcftools/") eck_freq <- read.table("Eck_AlleleFreq_genomescan.frq",header=TRUE) gla_freq <- read.table("Gla_AlleleFreq_genomescan.frq",header=TRUE) tar_freq <- read.table("Tar_AlleleFreq_genomescan.frq",header=TRUE) lom_freq <- read.table("Lom_AlleleFreq_genomescan.frq",header=TRUE) slo_freq <- read.table("Slo_AlleleFreq_genomescan.frq",header=TRUE) car_freq <- read.table("Car_AlleleFreq_genomescan.frq",header=TRUE) lai_freq <- read.table("Lai_AlleleFreq_genomescan.frq",header=TRUE) shi_freq <- read.table("Shi_AlleleFreq_genomescan.frq",header=TRUE) eck_freq$Lake <- "Eck" gla_freq$Lake <- "Glasahn" tar_freq$Lake <- "Tarsan" lom_freq$Lake <- "Lomond" slo_freq$Lake <- "Sloy" car_freq$Lake <- "Carron" lai_freq$Lake <- "Allt na lairige" shi_freq$Lake <- "Shira" eck_freq$Population <- "Source" gla_freq$Population <- "Refuge" tar_freq$Population <- "Refuge" lom_freq$Population <- "Source" slo_freq$Population <- "Refuge" car_freq$Population <- "Refuge" lai_freq$Population <- "Refuge" shi_freq$Population <- "Refuge" combined_freq <- rbind(eck_freq, gla_freq, tar_freq, lom_freq, slo_freq, car_freq, lai_freq, shi_freq) freq_source <- data.frame((eck_freq$Major + lom_freq$Major)/2) colnames(freq_source) <- "Major" freq_refuge <- data.frame((gla_freq$Major + tar_freq$Major + car_freq$Major + lai_freq$Major + shi_freq$Major + slo_freq$Major)/6) colnames(freq_refuge) <- "Major" allele_freq_change <- data.frame(freq_source$Major - freq_refuge$Major) colnames(allele_freq_change) <- "freq_change" # calculate Z-Score allele_freq_change$z_score <- (allele_freq_change$freq_change - mean(allele_freq_change$freq_change)) / sd(allele_freq_change$freq_change) # add genomic coordinates and locus name allele_freq_change$CHROM <- lom_freq$CHROM allele_freq_change$POS <- lom_freq$POS locus_name <- read.table("../sumstats/combined.whitelist.txt", header = FALSE) allele_freq_change$Locus <- paste0("CLocus_", locus_name$V1) # read in outlier loci outlier_rda <- read.table("../../../07.Selection_analyses/RDA/RDA_laketype_outliers.txt") outlier_baypass <- read.table("../../../07.Selection_analyses/BayPass/baypass_laketype_outliers.txt") # create outlier loci data outliers_combined <- unique(rbind(outlier_rda, outlier_baypass)) outliers_shared <- filter(outlier_rda, V1 %in% outlier_baypass$V1) # add abs freq change allele_freq_change$abs_freq_change <- abs(allele_freq_change$freq_change) allele_freq_change$z_abs_freq_change <- (allele_freq_change$abs_freq_change - mean(allele_freq_change$abs_freq_change)) / sd(allele_freq_change$abs_freq_change) # add SNP number for plotting allele_freq_change$SNP <- 1:nrow(allele_freq_change) # add outlier factor allele_freq_change3 <- allele_freq_change %>% mutate(SNP_type = case_when( Locus %in% outliers_combined$V1 ~ "outlier SNPs", !Locus %in% outliers_combined$V1 ~ "all SNPs" )) allele_freq_change4 <- allele_freq_change3 allele_freq_change4$CHROM <- parse_number(allele_freq_change4$CHROM) # subset datatset for all outlier combinations outlier_dataset_combined <- filter(allele_freq_change3, SNP_type == "outlier SNPs") outlier_dataset_rda <- filter(allele_freq_change3, Locus %in% outlier_rda$V1) outlier_dataset_baypass <- filter(allele_freq_change3, Locus %in% outlier_baypass$V1) outlier_dataset_shared <- filter(allele_freq_change3, Locus %in% outliers_shared$V1) # plot 1 violin plot ggplot() + geom_violin(data = filter(allele_freq_change3, SNP_type == "all SNPs"), aes(x = SNP_type, y = z_score), fill = "gray90") + geom_violin(data = filter(outlier_dataset_combined, freq_change > 0), aes(x = SNP_type, y = z_score), fill = "#36648B") + geom_jitter(data = filter(outlier_dataset_combined, freq_change > 0), aes(x = SNP_type, y = z_score), width = 0.05) + geom_violin(data = filter(outlier_dataset_combined, freq_change < 0), aes(x = SNP_type, y = z_score), fill = "#36648B") + geom_jitter(data = filter(outlier_dataset_combined, freq_change < 0), aes(x = SNP_type, y = z_score), width = 0.05) + geom_hline(yintercept = quantile(allele_freq_change3$z_score, prob = 0.25, na.rm = TRUE), linetype = 'dashed') + geom_hline(yintercept = quantile(allele_freq_change3$z_score, prob = 0.75, na.rm = TRUE), linetype = 'dashed') + theme.plot() + labs(x = NULL, y = "Allele frequency difference\n (z-score)") + ylim(-5,5) # plot 2 nCHR <- length(unique(allele_freq_change$CHROM)) ggplot() + geom_point(data = allele_freq_change3, aes(x = SNP, y = z_abs_freq_change, colour = CHROM), alpha = 0.7) + geom_point(data = outlier_dataset_rda, aes(x = SNP, y = z_abs_freq_change), size = 4, colour = "#1C86EE") + geom_point(data = outlier_dataset_baypass, aes(x = SNP, y = z_abs_freq_change), size = 4, colour = "#FFB90F") + geom_point(data = outlier_dataset_shared, aes(x = SNP, y = z_abs_freq_change), size = 4, colour = "#CD0000") + scale_color_manual(values = rep(c("grey10", "grey70"), nCHR)) + theme.plot() + labs(y = "Absolute allele frequency change\n (z-score)")