## Demultiplex EpiRAD Illumina reads with process_radtags from Stacks pipeline process_radtags -P -c -q -r -p ./EpiRAD_raw_reads/ -o ./demultiplexed_reads/EpiRAD/Intermediate_files3/1.Demultiplexed_reads -b ./files/EpiRAD_barcodes.txt --inline_inline -i gzfastq -y gzfastq --renz_1 pstI --renz_2 hpaII -t 65 ## Use trimmomatic to trim first 5bp from forward reads and first 3bp from reverse reads for infile in ./01.Demultiplexed_reads/*1.fq.gz do base=$(basename $infile .fq.gz) java -jar /usr/local/bin/trimmomatic-0.38.jar SE -threads 16 $infile ./02.Trimmomatic_filtering/$base.fq.gz HEADCROP:5 done for infile in ./01.Demultiplexed_reads/*2.fq.gz do base=$(basename $infile .fq.gz) java -jar /usr/local/bin/trimmomatic-0.38.jar SE -threads 16 $infile ./02.Trimmomatic_filtering/$base.fq.gz HEADCROP:3 done ## Use a loop and Trimmomatic for further filtering for R1 in ./02.Trimmomatic_filtering/*.1.* do R2=${R1//1.fq.gz/2.fq.gz} R1paired=${R1//.fq.gz/_P1.fq.gz} R1unpaired=${R1//.fq.gz/_U1.fq.gz} R2paired=${R2//.fq.gz/_P2.fq.gz} R2unpaired=${R2//.fq.gz/_U2.fq.gz} echo "$R1 $R2" java -jar /usr/local/bin/trimmomatic-0.38.jar PE -threads 4 -phred33 $R1 $R2 $R1paired $R1unpaired $R2paired $R2unpaired LEADING:20 TRAILING:20 MINLEN:60 done ####### now use BWA to align reads of individuals against reference assemblies for R1 in ./02.Trimmomatic_filtering/*P1.fq.gz do R2=${R1//.1_P1.fq.gz/.2_P2.fq.gz} base=$(basename $R1 .1_P1.fq.gz) echo "$base" bwa mem -t 8 /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/genomes/whitefish/EW_assembly.fa $R1 $R2 | \ samtools view -bSq 20 - | \ samtools sort - > ./04.bam_alignments/$base.bam done for bam in ./04.bam_alignments/*.bam do base=$(basename $bam .bam) echo "$base" samtools index ./04.bam_alignments/$base.bam done ####### Use ref_map.pl from STACKS ref_map.pl -T 4 --samples ./04.bam_alignments/ -o ./05.Stacks --popmap ./popmap.txt ########## identify loci that explain batch effect using PCA and pc loadings in adegent populations -P ./5.Stacks_ref -O ./5.Stacks_ref/batch_effect -M ./popmap_ddRAD_epiRAD.txt -t 4 -p 6 -r 0.667 --min-maf 0.05 --max-obs-het 0.6 --write_single_snp --structure --vcf ####### populations -P ./05.Stacks -O ./06.Population_genetics/combined -M ./popmap_ddRAD_epiRAD_filtered.txt -B ./05.Stacks/batch_effect/blacklist_loci.txt -t 4 -p 6 -r 0.75 --min-maf 0.05 --max-obs-het 0.6 --write_single_snp --vcf cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/06.Population_genetics/combined vcftools --vcf populations.snps.vcf --minDP 5 --min-meanDP 8 --max-meanDP 40 --maf 0.05 --max-missing 0.667 --recode --recode-INFO-all --out filtered vcftools --vcf filtered.recode.vcf --missing-indv mawk '$5 > 0.3' out.imiss | cut -f1 > lowDP.indv vcftools --vcf filtered.recode.vcf --remove lowDP.indv --recode --recode-INFO-all --out comb.filt # filter by HWE ./filter_hwe_by_pop.pl -v comb.filt.recode.vcf -p popmap_ddRAD_epiRAD_filtered.txt -o comb.hwe # filter by missing data per population ./pop_missing_filter.sh combined.hwe.recode.vcf popmap_ddRAD_epiRAD_filtered.txt 0.33 1 combined ## create sumstats file with only loci in the combined.recode.vcf cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/combined cut -f 3 combined.recode.vcf | tail -n +16 | cut -f 1 -d":" > ./sumstats/combined.whitelist.txt cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/ populations -P ./05.Stacks -O 06.Population_genetics/combined/sumstats -M ./popmap_ddRAD_epiRAD_combined_final.txt --whitelist ./06.Population_genetics/combined/sumstats/combined.whitelist.txt -t 4 -p 6 -r 0.75 --min-maf 0.05 --max-obs-het 0.6 --vcf ############## ############## create vcf files for the lomond and eck systems ############## cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3 populations -P ./5.Stacks_ref -O ./05.Stacks/lomond -M ./popmap_ddRAD_epiRAD_lom.txt -B ./05.Stacks/blacklist_combined.txt -t 4 -p 4 -r 0.75 --min-maf 0.05 --max-obs-het 0.6 --write_single_snp --vcf populations -P ./5.Stacks_ref -O ./05.Stacks/eck -M ./popmap_ddRAD_epiRAD_eck.txt -B ./05.Stacks/blacklist_combined.txt -t 4 -p 2 -r 0.75 --min-maf 0.05 --max-obs-het 0.6 --write_single_snp --vcf ######### filter vcf files for each system # Lomond cd ./5.Stacks_ref/lomond/ vcftools --vcf populations.snps.vcf --site-mean-depth --out populations.snps.depth vcftools --vcf populations.snps.vcf --depth --out populations.snps vcftools --vcf populations.snps.vcf --freq2 --out populations.snps --max-alleles 2 vcftools --vcf populations.snps.vcf --site-quality --out populations.snps vcftools --vcf populations.snps.vcf --minDP 5 --min-meanDP 8 --max-meanDP 40 --maf 0.05 --max-missing 0.667 --recode --recode-INFO-all --out lom vcftools --vcf lom.recode.vcf --missing-indv mawk '$5 > 0.3' out.imiss | cut -f1 > lowDP.indv vcftools --vcf lom.recode.vcf --remove lowDP.indv --recode --recode-INFO-all --out lom.filt ./filter_hwe_by_pop.pl -v lom.filt.recode.vcf -p popmap_ddRAD_epiRAD_lom.txt -o lom.hwe # filter by missing data per population ./pop_missing_filter.sh lom.hwe.recode.vcf popmap_ddRAD_epiRAD_lom_final.txt 0.33 1 lom # Eck cd ./5.Stacks_ref/eck/ vcftools --vcf populations.snps.vcf --site-mean-depth --out eck.depth vcftools --vcf populations.snps.vcf --depth --out eck.snps vcftools --vcf populations.snps.vcf --freq2 --out eck.snps --max-alleles 2 vcftools --vcf populations.snps.vcf --minDP 5 --min-meanDP 8 --max-meanDP 40 --maf 0.05 --max-missing 0.667 --recode --recode-INFO-all --out eck vcftools --vcf eck.recode.vcf --missing-indv mawk '$5 > 0.3' out.imiss | cut -f1 > lowDP.indv vcftools --vcf eck.recode.vcf --remove lowDP.indv --recode --recode-INFO-all --out eck.filt ./filter_hwe_by_pop.pl -v eck.filt.recode.vcf -p popmap_ddRAD_epiRAD_eck2.txt -o eck.hwe # filter by missing data per population ./pop_missing_filter.sh eck.hwe.recode.vcf popmap_ddRAD_epiRAD_eck_final.txt 0.33 1 eck ############### ############### genetic diversity analysis in STACKS (heterozygosity, nucleotide ############### diversity) using only the loci from the vcf file cd ./06.Population_genetics/lomond/ cut -f 3 lom.recode.vcf | tail -n +16 | cut -f 1 -d":" > ./sumstats/lom.whitelist.txt populations -P ./05.Stacks -O ./06.Population_genetics/lomond/sumstats -M ./popmap_ddRAD_epiRAD_lom_final.txt --whitelist ./06.Population_genetics/lomond/sumstats/lom.whitelist.txt -t 4 -p 5 -r 0.667 --min-maf 0.05 --max-obs-het 0.6 --vcf --fstat --fst-correction p_value -k cd ./06.Population_genetics/eck/ cut -f 3 eck.recode.vcf | tail -n +16 | cut -f 1 -d":" > ./sumstats/eck.whitelist.txt populations -P ./05.Stacks -O ./06.Population_genetics/eck/sumstats -M ./popmap_ddRAD_epiRAD_eck_final.txt --whitelist ./06.Population_genetics/eck/sumstats/eck.whitelist.txt -t 4 -p 3 -r 0.667 --min-maf 0.05 --max-obs-het 0.6 --vcf --fstat --fst-correction p_value -k ############################## ########### Inbreeding with PLINK ############################## # Lomond cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/06.Population_genetics/lomond/inbreeding/ plink --vcf lom.recode.vcf --make-bed --out lom.plink --allow-extra-chr plink --bfile lom.plink --recode --tab --out lom.plink --allow-extra-chr plink --file lom.plink --double-id --allow-extra-chr --indep-pairwise 1000 10 0.2 --out lom.LD plink --file lom.plink --double-id --allow-extra-chr --set-missing-var-ids @:# --extract lom.LD.prune.in --make-bed --out lom.filtered.LD plink --bfile lom.filtered.LD --recode --tab --out lom.filtered.LD --allow-extra-chr plink --file lom.filtered.LD --maf 0.05 --recode --out lom.LD.maf --allow-extra-chr plink --file lom.LD.maf --het 'small-sample' --allow-extra-chr --out lom.LD.maf # Eck cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/eck/inbreeding/maf_stacks plink --vcf eck.recode.vcf --make-bed --out eck.plink --allow-extra-chr plink --bfile eck.plink --recode --tab --out eck.plink --allow-extra-chr plink --file eck.plink --double-id --allow-extra-chr --indep-pairwise 1000 10 0.2 --out eck.LD plink --file eck.plink --double-id --allow-extra-chr --set-missing-var-ids @:# --extract eck.LD.prune.in --make-bed --out eck.filtered.LD plink --bfile eck.filtered.LD --recode --tab --out eck.filtered.LD --allow-extra-chr plink --file eck.filtered.LD --maf 0.05 --recode --out eck.LD.maf --allow-extra-chr plink --file eck.LD.maf --het 'small-sample' --allow-extra-chr --out eck.LD.maf ############################## ########### Relatedness with PLINK ############################## #### Lomond cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/lomond/relatedness/ plink --file lom.LD.maf --make-rel square0 --allow-extra-chr --out lom.LD.maf #### Eck cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/eck/relatedness/maf_stacks plink --file eck.LD.maf --make-rel square0 --allow-extra-chr --out eck.LD.maf ############################## ########### Admixture ############################## # Lomond cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/lomond/admixture plink --vcf lom.recode.vcf --make-bed --out lom --allow-extra-chr plink --bfile lom --double-id --allow-extra-chr --recode12 --tab --out lom12 sh ./run_admixture_loop.sh lom12.ped grep -h CV ./results/log*.out # Eck cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/eck/admixture plink --vcf eck.recode.vcf --make-bed --out eck --allow-extra-chr plink --bfile eck --double-id --allow-extra-chr --recode12 --tab --out eck12 sh ./run_admixture_loop.sh eck12.ped grep -h CV ./results/log*.out ############################### ######## RDA analysis ############################### #### split file according to lake system cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/7.Selection_analyses/RDA/vcf_files vcftools --vcf combined.recode.vcf --keep eck.txt --recode --recode-INFO-all --out eck.samples vcftools --vcf combined.recode.vcf --keep lom.txt --recode --recode-INFO-all --out lom.samples # impute with TASSEL (not shown) # bgzip vcf files bgzip eck.imputed.vcf bgzip lom.imputed.vcf bcftools index eck.imputed.vcf.gz bcftools index lom.imputed.vcf.gz # merge the two vcf files bcftools merge eck.imputed.vcf.gz lom.imputed.vcf.gz -o combined.imputed.vcf.gz bgzip -d combined.imputed.vcf.gz ## cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/7.Selection_analyses/RDA plink --vcf ./vcf_files/combined.imputed.vcf --recodeA --out ./plink_files/combined.imputed12 --allow-extra-chr cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/7.Selection_analyses/RDA plink --vcf ./vcf_files/by_system/eck.imputed.vcf --recodeA --out ./plink_files/eck.imputed12 --allow-extra-chr plink --vcf ./vcf_files/by_system/lom.imputed.vcf --recodeA --out ./plink_files/lom.imputed12 --allow-extra-chr #################################### ########## CREATE TREEMIX FILES ## used for BayPass #################################### cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/7.Selection_analyses/BayPass # convert vcf to bed plink --vcf combined.recode.vcf --make-bed --out combined --double-id --allow-extra-chr # convert from bed to ped plink --bfile combined --recode --tab --out combined --allow-extra-chr # create a frequency file plink --file combined --freq --missing --family --out combined --allow-no-sex --allow-extra-chr gzip *.frq.strat # gzip the file # Run python script python plink2treemix.py combined.frq.strat.gz combined.frq.input.strat.gz ############################################################################################################################################################################################################################################################### ###### vcftools analyses cd /Users/marcocrotti/Desktop/ddRAD_epiRAD_powan/demultiplexed_reads/EpiRAD/Intermediate_files3/6.Population_genetics/lomond ## Allele frequencies vcftools --vcf lom.recode.vcf --out ./vcftools/Lom1_AlleleFreq_genomescan --freq2 --keep ./vcftools/lom1.txt vcftools --vcf lom.recode.vcf --out ./vcftools/Lom_AlleleFreq_genomescan --freq2 --keep ./vcftools/lom.txt vcftools --vcf lom.recode.vcf --out ./vcftools/Slo_AlleleFreq_genomescan --freq2 --keep ./vcftools/slo.txt vcftools --vcf lom.recode.vcf --out ./vcftools/Car_AlleleFreq_genomescan --freq2 --keep ./vcftools/car.txt vcftools --vcf lom.recode.vcf --out ./vcftools/Lai_AlleleFreq_genomescan --freq2 --keep ./vcftools/lai.txt vcftools --vcf lom.recode.vcf --out ./vcftools/Shi_AlleleFreq_genomescan --freq2 --keep ./vcftools/shi.txt cd ./5/Stacks3/eck ## Allele frequencies vcftools --vcf DP5g80maf05.recode.vcf --out ./vcftools/Eck_AlleleFreq_genomescan --freq2 --keep ./vcftools/eck_popmap2.txt vcftools --vcf DP5g80maf05.recode.vcf --out ./vcftools/Tar_AlleleFreq_genomescan --freq2 --keep ./vcftools/tar_popmap.txt vcftools --vcf DP5g80maf05.recode.vcf --out ./vcftools/Gla_AlleleFreq_genomescan --freq2 --keep ./vcftools/gla_popmap.txt