#### Combined data processing script for ONT sequencing - concatenates reads and runs porechop and filtlong
#### Place script in the top level of the Nanopore sequencing output directory structure, and run
#### NB!!!! do not run this script multiple times in the same folder - it will duplicate reads in the /Reads folder and cause filtlong to crash
#### Move or delete old output folders if re-running the script
#!/bin/bash
## contactenate files for each run & barcode
mkdir Reads
for experiment in */; do
	dir0=${experiment%*/}	# remove the trailing "/"
	base0=${dir0##*/}   	# print everything after the final "/" >>>> This is the experiment
    for dir in ${base0}/*/; do
		dir1=${dir%*/}       # remove the trailing "/"
		base1=${dir1##*/}    # print everything after the final "/" >>>> This is the run number
        for bardir in ${base0}/${base1}/fastq_pass/barcode*/; do				
			dir2=${bardir%*/}      # remove the trailing "/"
			base2=${dir2##*/}      # print everything after the final "/" >>>> This is the barcode number
			cat ${base0}/${base1}/fastq_pass/${base2}/*.fastq.gz >> Reads/${base1}_${base2}.fastq.gz
			cat ${base0}/${base1}/fastq_fail/${base2}/*.fastq.gz >> Reads/${base1}_${base2}.fastq.gz	#include failed reads in analysis - filtlong should deal with low quality reads, comment out this line if you don't want this!
		done
	done
done
## run nanoqc on each file
mkdir NanoQC
ls Reads/ > filenames.tmp
for filename in `cat filenames.tmp`; do
    nanoqc -o NanoQC/$filename/ ./Reads/$filename
done
rm filenames.tmp
## trimming and filtering
mkdir Reads/temp/
mkdir Reads/filtered/
for infile in Reads/*.fastq.gz; do
    base=$(basename ${infile} .fastq.gz)
    porechop -i Reads/${base}.fastq.gz -o Reads/temp/${base}.fastq.gz --extra_end_trim 20 -t 8
    filtlong --min_length 25 --target_bases 5000000000000 --mean_q_weight 9 Reads/temp/${base}.fastq.gz | gzip > Reads/filtered/${base}.fastq.gz
done
rm -r ./Reads/temp/