##-->-- MSc Population Health Sciences Research Project --<--##

##Socio-economic inequalities within drug-related deaths and drug treatment expenditure in England, 2013 to 2019: 
##an ecological study using routine data.

##Student ID: 2507575
##Ethics Approval MVLS Application No. 200210149
##R 4.0.5

##Script finalised 27/07/2022 and data to be held for 10 years. 
##Any questions please email James Ormond: j.ormond@nhs.net or james.ormond@hotmail.com

rm(list = ls())  ##Removes any objects in the Environment

##### REMOVE ALL USER INSTALLED R PACKAGES: ####

##This code was used to remove existing installed and loaded packages with conflicting functions.
##Only those packages listed in section 1 are used, version numbers stated. 

##This is because the DPLYR package had some function changes between version 1.0.8 and later versions.
##The SII/RII code was taken from a Public Health Scotland example, that used version 1.0.8 of dplyr.
##Please do not use a dplyr package later than 1.0.8 as it will provide incorrect results. 
##Older DPLYR packages can be downloaded here: https://cran.r-project.org/src/contrib/Archive/dplyr/ then manually loaded in.

##Code to remove user installed packages:

# create a list of all installed packages
ip <- as.data.frame(installed.packages())
head(ip)
# if you use MRO, make sure that no packages in this library will be removed
ip <- subset(ip, !grepl("MRO", ip$LibPath))
# we don't want to remove base or recommended packages either\
ip <- ip[!(ip[,"Priority"] %in% c("base", "recommended")),]
# determine the library where the packages are installed
path.lib <- unique(ip$LibPath)
# create a vector with all the names of the packages you want to remove
pkgs.to.remove <- ip[,1]
head(pkgs.to.remove)
# remove the packages
sapply(pkgs.to.remove, remove.packages, lib = path.lib)

##### REMOVE ALL USER INSTALLED R PACKAGES: #####

##### *1. Install & Load Packages* #####

##Install packages required
#if (!require("readxl")) install.packages("readxl")     #version 1.4.0      used to load data                 
#if (!require("ggplot2")) install.packages("ggplot2")   #version 3.3.6      used to visualise data
#if (!require("purrr")) install.packages("purrr")       #version 0.3.4      used for SII/RII
#if (!require("tidyr")) install.packages("tidyr")       #version 1.2.0      used for SII/RII
#if (!require("broom")) install.packages("broom")       #version 0.8.0      used for SII/RII
#if (!require("plm")) install.packages("plm")           #version 2.6-1      used for fixed effects regression 
#if (!require("dplyr")) install.packages("dplyr")       #version 1.0.8      used for SII/RII   #not 1.0.9 (!)
#if (!require("lmtest")) install.packages("lmtest")     #version 0.9-40     used for BP test
#if (!require("epitools")) install.packages("epitools") #version 0.5-10.1   used for Poisson distribution CI rates
#if (!require("Greg")) install.packages("Greg")         ##version 1.4.0     used for confint_robust

##Load packages required
library(readxl)
library(ggplot2)
library(purrr)
library(tidyr)
library(broom)
library(plm) 
library(dplyr)
library(lmtest) 
library(epitools)
library(Greg) 

##### *1. Install & Load Packages END* #####

##### *2. Data Processing / Data Set Preparation* #####

##Change path from: "C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project" to new data path.

##Load Data (Change path if required)
Table1 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 1")  ##Drug Deaths
Table2 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 2")  ##IMD 2015
Table3 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 3")  ##Population Estimates
Table4 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 4")  ##20/21 Net Expenditure
Table5 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 5")  ##19/20 Net Expenditure
Table6 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 6")  ##18/19 Net Expenditure
Table7 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 7")  ##17/18 Net Expenditure
Table8 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 8")  ##16/17 Net Expenditure
Table9 <- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 9")  ##15/16 Net Expenditure
Table10<- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 10") ##14/15 Net Expenditure
Table11<- read_excel("C:/Users/james.ormond/OneDrive - Midlands and Lancashire CSU/Desktop/MSc Project/Data.xlsx", sheet = "Table 11") ##13/14 Net Expenditure

##Tidy Tables
Table1 <- Table1[ -c(11:30) ]    ##removes columns 2012 and before. 
Table2 <- Table2[ -c(3,4,7:10) ] ##removes columns not needed. 
Table3 <- Table3[ -c(12:23) ]    ##removes columns not needed. 
Table4 <- Table4[ -c(1,4,5,8) ]  ##removes columns not needed. 
Table5 <- Table5[ -c(1,4,5,8) ]  ##removes columns not needed. 
Table6 <- Table6[ -c(1,4,5,8) ]  ##removes columns not needed. 
Table7 <- Table7[ -c(1,4,5,8) ]  ##removes columns not needed. 
Table8 <- Table8[ -c(1,4,5,8) ]  ##removes columns not needed. 
Table9 <- Table9[ -c(1,4,5,7) ]  ##removes columns not needed. 
#Table 11 does not have ONS code in file, link to table 10 to assign.
Table12 <- merge(Table10, Table11, by = "E-code", all.x= F, all.y = F) ##Add ONS Code to Table 11
Table11 <-Table12[ -c(1,4:10,12)]  ##removes columns not needed. 
Table10 <-Table10[ -c(1,4,5,7)]  ##removes columns not needed. 
rm(Table12) #remove table used for and to check join

##Rename Area Codes to enable joins
colnames(Table1)[colnames(Table1) == "Area Codes"] <- "Code"
colnames(Table2)[colnames(Table2) == "Upper Tier Local Authority District code (2013)"] <- "Code"
colnames(Table4)[colnames(Table4) == "ONS Code"] <- "Code"
colnames(Table5)[colnames(Table5) == "ONS Code"] <- "Code"
colnames(Table6)[colnames(Table6) == "ONS Code"] <- "Code"
colnames(Table7)[colnames(Table7) == "ONS Code"] <- "Code"
colnames(Table8)[colnames(Table8) == "ONS Code"] <- "Code"
colnames(Table9)[colnames(Table9) == "ONS Code"] <- "Code"
colnames(Table10)[colnames(Table10)== "ONS Code"]<- "Code"
colnames(Table11)[colnames(Table11)== "ONS Code"]<- "Code"

##Re-code Buckinghamshire - See explanation below.
Table1$Code <- ifelse(Table1$Code == "E06000060", "E10000002",Table1$Code)
Table3$Code <- ifelse(Table3$Code == "E06000060", "E10000002",Table3$Code)
Table4$Code <- ifelse(Table4$Code == "E06000060", "E10000002",Table4$Code)

##Re-join 2 Northamptonshire regions - See explanation below.
North_WestNorthamptonshire <- c("E06000061","E06000062")
Table1[Table1$Code %in% North_WestNorthamptonshire,] ##Find North & East Values
Northamptonshire <- data.frame("Code" = c("E10000021"),"Area Names" = c("Northamptonshire"),"2020" = c(28),
                               "2019" = c(34),"2018" = c(40),"2017" = c(49),"2016" = c(45), "2015" = c(36),
                               "2014" = c(36),"2013" = c(37)) #Create record for combined regions. 
Table1[nrow(Table1) + 1,] <- Northamptonshire #add combined data
rm(North_WestNorthamptonshire, Northamptonshire) #Tidy Environment

##Join Tables 1-3 for RQ1
RQ1 <-  merge(Table2, Table1, by = "Code", all.x= F, all.y = F)    ##First joins Deaths to IMD
RQ1 <- merge(RQ1, Table3, by = "Code", all.x= F, all.y = F)        ##Then joins Pop Estimates
RQ1 <- RQ1[ -c(5,14,15) ]  ##removes columns not needed. 
rm(Table1, Table2, Table3) ##Tidy Environment

RQ1<-RQ1[!(RQ1$Code=="E06000053" | RQ1$Code=="E09000001"),] #removes City of London and Isles of Scilly due to small populations.

## 5 UT LA Codes do not link initially:
#E06000028	Bournemouth - Alexiou excluded due to boundary changes
#E10000021	Northamptonshire - Dissolved Apr-21 into North(E06000061) and East(E06000062) Northamptonshire. 
#E06000029	Poole - Alexiou excluded due to boundary changes
#E10000009	Dorset - Alexiou excluded due to boundary changes
#E10000002	Buckinghamshire - Code changed to E06000060.
#Alexiou: https://www.thelancet.com/journals/lanpub/article/PIIS2468-2667(21)00110-9/fulltext

#"A new non-metropolitan district to be known as Buckinghamshire is constituted, being *coterminous* with 
#Buckinghamshire and comprising the areas of the district councils" Apr-20. https://www.legislation.gov.uk/uksi/2019/957/made
##(!)Decision made to keep Buckinghamshire in study by changing Code back to original for ease.## 

##Prepare Expenditure - RQ2
UpperTierCodes <- unique(RQ1$Code) ##unique list of the upper tier LA codes we are interested in. 

##Use the unique LA list to filter all expenditure tables to relevant LAs
Table4 <- Table4[Table4$Code %in% UpperTierCodes,]
Table5 <- Table5[Table5$Code %in% UpperTierCodes,]
Table6 <- Table6[Table6$Code %in% UpperTierCodes,]
Table7 <- Table7[Table7$Code %in% UpperTierCodes,]
Table8 <- Table8[Table8$Code %in% UpperTierCodes,] 
#In Table 9 Code E10000023 North Yorkshire not listed in file. On further inspection listed as E10000022. Possible DQ error.
Table9["Code"][Table9["Code"] == "E10000022"] <- "E10000023" #This fixes DQ error to keep North Yorkshire within the study. 
Table9 <- Table9[Table9$Code %in% UpperTierCodes,] ## 
Table10<- Table10[Table10$Code %in% UpperTierCodes,]
Table11<- Table11[Table11$Code %in% UpperTierCodes,]

##Join Tables 4-11 for RQ2
colnames(Table11)[colnames(Table11)== "Substance misuse - Drug misuse - adults.y"]<- "2013 Spend" ##Rename column
colnames(Table10)[colnames(Table10)== "Substance misuse - Drug misuse - adults"]  <- "2014 Spend" ##Rename column 
RQ2 <-  merge(Table11, Table10, by = "Code", all.x= F, all.y = F)  ##First adds 2013 spend to 2014
colnames(Table9)[colnames(Table9)== "Substance misuse - Drug misuse - adults"]  <- "2015 Spend" ##Rename column 
RQ2 <-  merge(RQ2, Table9, by = "Code", all.x= F, all.y = F)  ##Then adds 2015 spend

#Change columns to numeric to sum:
Table8$`Substance misuse - Treatment for drug misuse in adults`                     <- as.numeric(as.character(Table8$`Substance misuse - Treatment for drug misuse in adults`))
Table8$`Substance misuse - Preventing and reducing harm from drug misuse in adults` <- as.numeric(as.character(Table8$`Substance misuse - Preventing and reducing harm from drug misuse in adults`))
#sum columns:
Table8$'2016 Spend'<- Table8$'Substance misuse - Treatment for drug misuse in adults' + Table8$'Substance misuse - Preventing and reducing harm from drug misuse in adults'
RQ2 <-  merge(RQ2, Table8, by = "Code", all.x= F, all.y = F)  ##Then adds 2016 spend
RQ2 <- RQ2[ -c(4,6,8:10) ]  ##removes columns not needed. 

#Change columns to numeric to sum:
Table7$`Substance misuse - Treatment for drug misuse in adults`                     <- as.numeric(as.character(Table7$`Substance misuse - Treatment for drug misuse in adults`))
Table7$`Substance misuse - Preventing and reducing harm from drug misuse in adults` <- as.numeric(as.character(Table7$`Substance misuse - Preventing and reducing harm from drug misuse in adults`))
#sum columns:
Table7$'2017 Spend'<- Table7$'Substance misuse - Treatment for drug misuse in adults' + Table7$'Substance misuse - Preventing and reducing harm from drug misuse in adults'
RQ2 <-  merge(RQ2, Table7, by = "Code", all.x= F, all.y = F)  ##Then adds 2017 spend
RQ2 <- RQ2[ -c(7:9) ]  ##removes columns not needed. 

#Change columns to numeric to sum:
Table6$`Substance misuse - Treatment for drug misuse in adults`                     <- as.numeric(as.character(Table6$`Substance misuse - Treatment for drug misuse in adults`))
Table6$`Substance misuse - Preventing and reducing harm from drug misuse in adults` <- as.numeric(as.character(Table6$`Substance misuse - Preventing and reducing harm from drug misuse in adults`))
#sum columns:
Table6$'2018 Spend'<- Table6$'Substance misuse - Treatment for drug misuse in adults' + Table6$'Substance misuse - Preventing and reducing harm from drug misuse in adults'
RQ2 <-  merge(RQ2, Table6, by = "Code", all.x= F, all.y = F)  ##Then adds 2018 spend
RQ2 <- RQ2[ -c(8:10) ]  ##removes columns not needed. 

#Change columns to numeric to sum:
Table5$`Substance misuse - Treatment for drug misuse in adults`                     <- as.numeric(as.character(Table5$`Substance misuse - Treatment for drug misuse in adults`))
Table5$`Substance misuse - Preventing and reducing harm from drug misuse in adults` <- as.numeric(as.character(Table5$`Substance misuse - Preventing and reducing harm from drug misuse in adults`))
#sum columns:
Table5$'2019 Spend'<- Table5$'Substance misuse - Treatment for drug misuse in adults' + Table5$'Substance misuse - Preventing and reducing harm from drug misuse in adults'
RQ2 <-  merge(RQ2, Table5, by = "Code", all.x= F, all.y = F)  ##Then adds 2019 spend
RQ2 <- RQ2[ -c(9:11) ]  ##removes columns not needed. 

#Change columns to numeric to sum:
Table4$`Substance misuse - Treatment for drug misuse in adults`                     <- as.numeric(as.character(Table4$`Substance misuse - Treatment for drug misuse in adults`))
Table4$`Substance misuse - Preventing and reducing harm from drug misuse in adults` <- as.numeric(as.character(Table4$`Substance misuse - Preventing and reducing harm from drug misuse in adults`))
#sum columns:
Table4$'2020 Spend'<- Table4$'Substance misuse - Treatment for drug misuse in adults' + Table4$'Substance misuse - Preventing and reducing harm from drug misuse in adults'
RQ2 <-  merge(RQ2, Table4, by = "Code", all.x= F, all.y = F)  ##Then adds 2020 spend
RQ2 <- RQ2[ -c(10:12) ]  ##removes columns not needed. 

rm(Table4, Table5, Table6, Table7, Table8, Table9, Table10, Table11, UpperTierCodes) ##Tidy Environment


##Linear Regression Imputation to create 2020 spend for 3x missing LAs. 
Medway <- c(2299,2751,2519,2608,1393,1393,1383)
Reading <- c(2351,2592,2033,1391,1238,1078,1054)
Slough <- c(1778,1778,982,881,1397,1557,720)
Year <- c(2013,2014,2015,2016,2017,2018,2019)
p <-  as.data.frame(2020) #p = prediction
colnames(p) <- "Year"

Medway <- data.frame(Medway, Year)
Reading <- data.frame(Reading, Year)
Slough <- data.frame(Slough, Year)

model_M <- lm(Medway ~ Year, data = Medway)
model_R <- lm(Reading ~ Year, data = Reading)
model_S <- lm(Slough ~ Year, data = Slough)

predict(model_M, newdata = p) #Predicts 1108       E06000035
predict(model_R, newdata = p) #Predicts 574.7143   E06000038
predict(model_S, newdata = p) #Predcits 841.7143   E06000039

rm(Medway, Reading, Slough, model_M, model_R, model_S, Year, p)

RQ2$`2020 Spend` <- ifelse(RQ2$Code == "E06000035", 1108,    RQ2$`2020 Spend`)
RQ2$`2020 Spend` <- ifelse(RQ2$Code == "E06000038", 574.7143,RQ2$`2020 Spend`)
RQ2$`2020 Spend` <- ifelse(RQ2$Code == "E06000039", 841.7143,RQ2$`2020 Spend`)

##Create Income Deprivation Quintiles:
Quintiles <- RQ1[,1:3 ]
Quintiles <- Quintiles[with(Quintiles, order(`Income - Average score`)),] #Order by Income
Quintiles$NewRank <- seq.int(nrow(Quintiles)) #Create Ranking Order
Quintiles$Quintile <- "Q1" #Create Variable                                        #30 In Q1
Quintiles$Quintile <- ifelse(Quintiles$NewRank < 118, "Q2",Quintiles$Quintile)     #29 in Q2
Quintiles$Quintile <- ifelse(Quintiles$NewRank < 89, "Q3",Quintiles$Quintile)      #29 in Q3
Quintiles$Quintile <- ifelse(Quintiles$NewRank < 60, "Q4",Quintiles$Quintile)      #29 in Q4
Quintiles$Quintile <- ifelse(Quintiles$NewRank < 31, "Q5",Quintiles$Quintile)      #30 in Q5
RQ1 <-  merge(RQ1, Quintiles, by = "Code", all.x= F, all.y = F)  ##Adds Quintile to RQ1
RQ1 <- RQ1[ -c(21:23) ]  ##removes columns not needed. 
RQ2 <-  merge(RQ2, Quintiles, by = "Code", all.x= F, all.y = F)  ##Adds Quintile to RQ2
RQ2 <- RQ2[ -c(11:13) ]  ##removes columns not needed. 
rm(Quintiles)

##Create Population Standard Rates Per 100,000
RQ1$DRP_2020 <- ((RQ1$`2020` / RQ1$`Mid-2020`) * 100000)    # Create Death Rate Per 100k Population 2020
RQ1$DRP_2019 <- ((RQ1$`2019` / RQ1$`Mid-2019`) * 100000)    # Create Death Rate Per 100k Population 2019
RQ1$DRP_2018 <- ((RQ1$`2018` / RQ1$`Mid-2018`) * 100000)    # Create Death Rate Per 100k Population 2018
RQ1$DRP_2017 <- ((RQ1$`2017` / RQ1$`Mid-2017`) * 100000)    # Create Death Rate Per 100k Population 2017
RQ1$DRP_2016 <- ((RQ1$`2016` / RQ1$`Mid-2016`) * 100000)    # Create Death Rate Per 100k Population 2016
RQ1$DRP_2015 <- ((RQ1$`2015` / RQ1$`Mid-2015`) * 100000)    # Create Death Rate Per 100k Population 2015
RQ1$DRP_2014 <- ((RQ1$`2014` / RQ1$`Mid-2014`) * 100000)    # Create Death Rate Per 100k Population 2014
RQ1$DRP_2013 <- ((RQ1$`2013` / RQ1$`Mid-2013`) * 100000)    # Create Death Rate Per 100k Population 2013

##Adjust Spend By GDP Deflator to 20-21 Prices - Allows Real Terms Comparisons:
#2013-14:	85.766 #2014-15:	86.754 #2015-16:	87.292 #2016-17:	89.245
#2017-18:	90.782 #2018-19:	92.553 #2019-20:	94.656 #2020-21:	100.000

RQ2$'2013 Spend_GDP' <-    RQ2$`2013 Spend` * (0.85766)
RQ2$'2014 Spend_GDP' <-    RQ2$`2014 Spend` * (0.86754)
RQ2$'2015 Spend_GDP' <-    RQ2$`2015 Spend` * (0.87292)
RQ2$'2016 Spend_GDP' <-    RQ2$`2016 Spend` * (0.89245)
RQ2$'2017 Spend_GDP' <-    RQ2$`2017 Spend` * (0.90782)
RQ2$'2018 Spend_GDP' <-    RQ2$`2018 Spend` * (0.92553)
RQ2$'2019 Spend_GDP' <-    RQ2$`2019 Spend` * (0.94656)
RQ2$'2020 Spend_GDP' <-    RQ2$`2020 Spend` * (1      )

##Add Spend Per Person
Pop <- RQ1
Pop <- Pop[ -c(2:12,21:29) ] ##removes columns not needed. 
RQ2 <-  merge(RQ2, Pop, by = "Code", all.x= F, all.y = F)  ##Adds Pop to RQ2
rm(Pop)

##Multiplied by 1,000 as data published in 1,000s
RQ2$'2013 Spend_GDP' <-    (RQ2$'2013 Spend_GDP'  * 1000) 
RQ2$'2014 Spend_GDP' <-    (RQ2$'2014 Spend_GDP'  * 1000)
RQ2$'2015 Spend_GDP' <-    (RQ2$'2015 Spend_GDP'  * 1000)
RQ2$'2016 Spend_GDP' <-    (RQ2$'2016 Spend_GDP'  * 1000)
RQ2$'2017 Spend_GDP' <-    (RQ2$'2017 Spend_GDP'  * 1000)
RQ2$'2018 Spend_GDP' <-    (RQ2$'2018 Spend_GDP'  * 1000)
RQ2$'2019 Spend_GDP' <-    (RQ2$'2019 Spend_GDP'  * 1000)
RQ2$'2020 Spend_GDP' <-    (RQ2$'2020 Spend_GDP'  * 1000)

##Spend Per Pop
RQ2$`2013 Spend_GDP_Pop` <- (RQ2$'2013 Spend_GDP' / RQ2$`Mid-2013`)
RQ2$`2014 Spend_GDP_Pop` <- (RQ2$'2014 Spend_GDP' / RQ2$`Mid-2014`)
RQ2$`2015 Spend_GDP_Pop` <- (RQ2$'2015 Spend_GDP' / RQ2$`Mid-2015`)
RQ2$`2016 Spend_GDP_Pop` <- (RQ2$'2016 Spend_GDP' / RQ2$`Mid-2016`)
RQ2$`2017 Spend_GDP_Pop` <- (RQ2$'2017 Spend_GDP' / RQ2$`Mid-2017`)
RQ2$`2018 Spend_GDP_Pop` <- (RQ2$'2018 Spend_GDP' / RQ2$`Mid-2018`)
RQ2$`2019 Spend_GDP_Pop` <- (RQ2$'2019 Spend_GDP' / RQ2$`Mid-2019`)
RQ2$`2020 Spend_GDP_Pop` <- (RQ2$'2020 Spend_GDP' / RQ2$`Mid-2020`)

##Changes in Spend
RQ2$SpendDiff13_14 <- RQ2$`2014 Spend_GDP_Pop` - RQ2$`2013 Spend_GDP_Pop`
RQ2$SpendDiff14_15 <- RQ2$`2015 Spend_GDP_Pop` - RQ2$`2014 Spend_GDP_Pop`
RQ2$SpendDiff15_16 <- RQ2$`2016 Spend_GDP_Pop` - RQ2$`2015 Spend_GDP_Pop`
RQ2$SpendDiff16_17 <- RQ2$`2017 Spend_GDP_Pop` - RQ2$`2016 Spend_GDP_Pop`
RQ2$SpendDiff17_18 <- RQ2$`2018 Spend_GDP_Pop` - RQ2$`2017 Spend_GDP_Pop`
RQ2$SpendDiff18_19 <- RQ2$`2019 Spend_GDP_Pop` - RQ2$`2018 Spend_GDP_Pop`
RQ2$SpendDiff19_20 <- RQ2$`2020 Spend_GDP_Pop` - RQ2$`2019 Spend_GDP_Pop`

##DRD Differences At LA Level
RQ1$DRD13_14 <- RQ1$DRP_2014 - RQ1$DRP_2013
RQ1$DRD14_15 <- RQ1$DRP_2015 - RQ1$DRP_2014
RQ1$DRD15_16 <- RQ1$DRP_2016 - RQ1$DRP_2015
RQ1$DRD16_17 <- RQ1$DRP_2017 - RQ1$DRP_2016
RQ1$DRD17_18 <- RQ1$DRP_2018 - RQ1$DRP_2017
RQ1$DRD18_19 <- RQ1$DRP_2019 - RQ1$DRP_2018
RQ1$DRD19_20 <- RQ1$DRP_2020 - RQ1$DRP_2019

##### *2. Data Processing / Data Set Preparation END* #####

##### *3. Table 2 & Figure 1 Confidence Interval of Rates* ####

##2013
CIs <- aggregate(RQ1$`2013`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2013`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2013  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2013A <-pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2013$Q <- seq.int(nrow(CI2013))
CI2013$Year <- 2013

##2014
CIs <- aggregate(RQ1$`2014`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2014`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2014  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2014A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2014$Q <- seq.int(nrow(CI2014))
CI2014$Year <- 2014

##2015
CIs <- aggregate(RQ1$`2015`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2015`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2015  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2015A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2015$Q <- seq.int(nrow(CI2015))
CI2015$Year <- 2015

##2016
CIs <- aggregate(RQ1$`2016`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2016`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2016  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2016A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2016$Q <- seq.int(nrow(CI2016))
CI2016$Year <- 2016

##2017
CIs <- aggregate(RQ1$`2017`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2017`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2017  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2017A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2017$Q <- seq.int(nrow(CI2017))
CI2017$Year <- 2017

##2018
CIs <- aggregate(RQ1$`2018`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2018`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2018  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2018A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2018$Q <- seq.int(nrow(CI2018))
CI2018$Year <- 2018

##2019
CIs <- aggregate(RQ1$`2019`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2019`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2019  <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2019A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2019$Q <- seq.int(nrow(CI2019))
CI2019$Year <- 2019

CIQ <- rbind(CI2013, CI2014, CI2015, CI2016, CI2017, CI2018, CI2019)                          
CIQA <- rbind(CI2013A, CI2014A, CI2015A, CI2016A, CI2017A, CI2018A, CI2019A)

rm(CI2013, CI2014, CI2015, CI2016, CI2017, CI2018, CI2019,
   CI2013A, CI2014A, CI2015A, CI2016A, CI2017A, CI2018A, CI2019A, CIs)

##All Years
pois.approx(sum(CIQA$`x`)/7, sum(pt=CIQA$`pt`)/7, conf.level = 0.95)
rm(CIQA)

##2013-2019
CIs <- aggregate(CIQ$`x`, list(CIQ$Q), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(CIQ$`pt`, list(CIQ$Q), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"       

pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)

Tab2 <- CIQ[ -c(1,2,6) ]
Tab2$rate  <- Tab2$rate *100000
Tab2$lower <- Tab2$lower *100000
Tab2$upper <- Tab2$upper * 100000
colnames(Tab2)[colnames(Tab2) == "Q"] <- "Quintile"   
rm(CIs, CIQ)

Tab2$Quintile <- as.factor(Tab2$Quintile)

Tab2$Quintile <- ifelse(Tab2$Quintile == "1", "Q1",Tab2$Quintile)
Tab2$Quintile <- ifelse(Tab2$Quintile == "2", "Q2",Tab2$Quintile)
Tab2$Quintile <- ifelse(Tab2$Quintile == "3", "Q3",Tab2$Quintile)
Tab2$Quintile <- ifelse(Tab2$Quintile == "4", "Q4",Tab2$Quintile)
Tab2$Quintile <- ifelse(Tab2$Quintile == "5", "Q5",Tab2$Quintile)

##Figure 1 - DRD
ggplot(Tab2, aes(x=Year, y=rate, group=Quintile, color=Quintile, cex.names =1)) +
  geom_line()  +
  ylab("Drug Mortality Rate Per 100,000 Population") +
  theme_classic() +
  xlab("Year") +
  scale_color_manual(values = c("green", "orange","red", "blue","black")) +
 geom_errorbar(aes(ymin=Tab2$`lower`, ymax=Tab2$`upper`), width = 0.1, size = 0.5) +
  geom_point(aes(x=Year, y=rate), size = 2)
##Export as 900 x 500


##Sensitivity Analysis: 2020

##2020
CIs <- aggregate(RQ1$`2020`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2020`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"                                               ##rename to value

CI2020 <- pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
CI2020A  <- pois.approx(sum(CIs$value), pt=sum(CIs$population), conf.level = 0.95)

CI2020$Q <- seq.int(nrow(CI2020))
CI2020$Year <- 2020


CIQ <- rbind(CI2013, CI2014, CI2015, CI2016, CI2017, CI2018, CI2019, CI2020)                          
CIQA <- rbind(CI2013A, CI2014A, CI2015A, CI2016A, CI2017A, CI2018A, CI2019A, CI2020A)

rm(CI2013, CI2014, CI2015, CI2016, CI2017, CI2018, CI2019, CI2020,
   CI2013A, CI2014A, CI2015A, CI2016A, CI2017A, CI2018A, CI2019A, CI2020A)

##All Years
pois.approx(sum(CIQA$`x`)/8, sum(pt=CIQA$`pt`)/8, conf.level = 0.95)
rm(CIQA)

##2013-2020
CIs <- aggregate(CIQ$`x`, list(CIQ$Q), FUN=sum)                                         ##YEAR
colnames(CIs)[colnames(CIs) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(CIs)[colnames(CIs) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(CIQ$`pt`, list(CIQ$Q), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
CIs <- merge(x=CIs, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(CIs)[colnames(CIs) == "x"] <- "population"       

pois.approx(CIs$value, pt=CIs$population, conf.level = 0.95)
rm(CIs, CIQA)

##### *3. Table 2 & Figure 1 Confidence Interval of Rates End*#####

##### *4. Figure 4 Spend Per Quintile* ####

##2020
Fig2 <- aggregate(RQ2$`2020 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2020`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2020
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2020 <- Fig2

##2019
Fig2 <- aggregate(RQ2$`2019 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2019`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2019
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2019 <- Fig2

##2018
Fig2 <- aggregate(RQ2$`2018 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2018`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2018
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2018 <- Fig2

##2017
Fig2 <- aggregate(RQ2$`2017 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2017`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2017
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2017 <- Fig2

##2016
Fig2 <- aggregate(RQ2$`2016 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2016`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2016
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2016 <- Fig2

##2015
Fig2 <- aggregate(RQ2$`2015 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2015`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2015
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2015 <- Fig2

##2014
Fig2 <- aggregate(RQ2$`2014 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2014`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2014
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2014 <- Fig2

##2013
Fig2 <- aggregate(RQ2$`2013 Spend_GDP`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(Fig2)[colnames(Fig2) == "Group.1"] <- "quintile"                                         ##rename to quintile
colnames(Fig2)[colnames(Fig2) == "x"] <- "value"                                                  ##rename to value

population <- aggregate(RQ2$`Mid-2013`, list(RQ2$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
Fig2 <- merge(x=Fig2, y=population, by="quintile", all.x=F, all.y=F)                              ##link data sets
rm(population)
colnames(Fig2)[colnames(Fig2) == "x"] <- "population"        
Fig2$Year <- 2013
Fig2$Spend <- (Fig2$value / Fig2$population)
Fig2.2013 <- Fig2

##Join all years:
Fig2 <- rbind(Fig2.2013, Fig2.2014, Fig2.2015, Fig2.2016, Fig2.2017, Fig2.2018, Fig2.2019)##Add Fig2.2020 back here to add 2020 in.
colnames(Fig2)[colnames(Fig2)== "quintile"]<- "Quintile"
rm(Fig2.2013, Fig2.2014, Fig2.2015, Fig2.2016, Fig2.2017, Fig2.2018, Fig2.2019, Fig2.2020)

##Plot Chart
ggplot(Fig2, aes(x=Year, y=Spend, group=Quintile, color=Quintile, cex.names =1)) +
  geom_line()  +
  ylab("Drug Treatment Spend (�) Per Person") +
  theme_classic() +
  xlab("Year") +
  scale_color_manual(values = c("green", "orange","red", "blue","black")) +
  geom_point(aes(x=Year, y=Spend), size = 2)
##Export as 900 x 500

##### *4. Figure 4 Spend Per Quintile END *####

##### *5. Assumption Checks - Normality Tests By Quintile* ####

NormalityTest <-RQ1[, c(21:29)]
NormalityTest$Quintile <- ifelse(NormalityTest$Quintile == "Q1", "1",NormalityTest$Quintile)
NormalityTest$Quintile <- ifelse(NormalityTest$Quintile == "Q2", "2",NormalityTest$Quintile)
NormalityTest$Quintile <- ifelse(NormalityTest$Quintile == "Q3", "3",NormalityTest$Quintile)
NormalityTest$Quintile <- ifelse(NormalityTest$Quintile == "Q4", "4",NormalityTest$Quintile)
NormalityTest$Quintile <- ifelse(NormalityTest$Quintile == "Q5", "5",NormalityTest$Quintile)
NormalityTest$Quintile <- as.integer(NormalityTest$Quintile)

Norm13 <-lm(NormalityTest$DRP_2013 ~ NormalityTest$Quintile)
hist(Norm13$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm13$residuals), sd = sd(Norm13$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm13$residuals)
plot(Norm13)

Norm14 <-lm(NormalityTest$DRP_2014 ~ NormalityTest$Quintile)
hist(Norm14$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm14$residuals), sd = sd(Norm14$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm14$residuals)
plot(Norm14)

Norm15 <-lm(NormalityTest$DRP_2015 ~ NormalityTest$Quintile)
hist(Norm15$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm15$residuals), sd = sd(Norm15$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm15$residuals)
#plot(Norm15)

Norm16 <-lm(NormalityTest$DRP_2016 ~ NormalityTest$Quintile)
hist(Norm16$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm16$residuals), sd = sd(Norm16$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm16$residuals)
#plot(Norm16)

Norm17 <-lm(NormalityTest$DRP_2017 ~ NormalityTest$Quintile)
hist(Norm17$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm17$residuals), sd = sd(Norm17$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm17$residuals)
#plot(Norm17)

Norm18 <-lm(NormalityTest$DRP_2018 ~ NormalityTest$Quintile)
hist(Norm18$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm18$residuals), sd = sd(Norm18$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm18$residuals)
#plot(Norm18)

Norm19 <-lm(NormalityTest$DRP_2019 ~ NormalityTest$Quintile)
hist(Norm19$residuals, freq=FALSE, main = "", ymax = 0.1)
curve(dnorm(x, mean = mean(Norm19$residuals), sd = sd(Norm19$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm19$residuals)
plot(Norm19)

Norm20 <-lm(NormalityTest$DRP_2020 ~ NormalityTest$Quintile)
hist(Norm20$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm20$residuals), sd = sd(Norm20$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm20$residuals)
#plot(Norm20)

rm(NormalityTest, Norm13, Norm14, Norm15, Norm16, Norm17, Norm18, Norm19, Norm20)
##### *5. Assumption Checks - Normality Tests By Quintile End* ####

##### *6. Slope Index of Inequality and Relative Index of Inequality* ####

##2013
SII <- aggregate(RQ1$`2013`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2013`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2013 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2013"                                                                                ##YEAR
SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2013 <- SII                                                                                    ##YEAR

##2014
SII <- aggregate(RQ1$`2014`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2014`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2014 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2014"                                                                                ##YEAR
SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2014 <- SII                                                                                    ##YEAR

##2015
SII <- aggregate(RQ1$`2015`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2015`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2015 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2015"                                                                                ##YEAR
SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2015 <- SII                                                                                    ##YEAR

##2016
SII <- aggregate(RQ1$`2016`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2016`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2016 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2016"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2016 <- SII                                                                                    ##YEAR

##2017
SII <- aggregate(RQ1$`2017`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2017`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2017 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2017"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2017 <- SII                                                                                    ##YEAR

##2018
SII <- aggregate(RQ1$`2018`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2018`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2018 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2018"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2018 <- SII                                                                                    ##YEAR

##2019
SII <- aggregate(RQ1$`2019`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2019`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2019 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2019"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]    

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2019 <- SII                                                                                    ##YEAR

##2020
SII <- aggregate(RQ1$`2020`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2020`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

SIIall2020 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2020"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SII2020 <- SII                                                                                    ##YEAR

##ALL ACROSS STUDY PERIOD
 
SIIall <- rbind(SIIall2013, SIIall2014, SIIall2015, SIIall2016, SIIall2017, SIIall2018, SIIall2019) ###(!) Include SIIall2020 for SA (!)###
rm(SIIall2013, SIIall2014, SIIall2015, SIIall2016, SIIall2017, SIIall2018, SIIall2019, SIIall2020)

SII <- aggregate(SIIall$value, list(SIIall$quintile), FUN=sum)              #select all
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                     ##rename
SII$x <- SII$x /7                                                                                  ###(!) CHANGE 7 to 8 FOR SA (!)###

population <- aggregate(SIIall$population, list(SIIall$quintile), FUN=sum)  ##create population
population$x <- population$x /7                                                               ###(!) CHANGE 7 to 8 FOR SA (!)###

colnames(population)[colnames(population) == "Group.1"] <- "quintile"       ##rename to link
colnames(population)[colnames(population) == "x"] <- "population"           ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)          ##link data sets

colnames(SII)[colnames(SII) == "x"] <- "value"                        
rm(population)

SII$overall_value <- ((sum(SII$value) / sum(SII$population)) * 100000)     ##create overall rate
SII$value <- ((SII$value / SII$population) * 100000)

SII$area <- "2013_19"                                                                               ###(!) CHANGE 2013_19 to 2013_20 FOR SA (!)###

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SIIall <- SII
##

SII <- rbind(SII2013, SII2014, SII2015, SII2016, SII2017, SII2018, SII2019, SIIall)                 ###(!) Include SII2020 for SA (!)###
rm(SII2013, SII2014, SII2015, SII2016, SII2017, SII2018, SII2019, SIIall, SII2020)

SII$quintile <- ifelse(SII$quintile == "Q1", "1",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q2", "2",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q3", "3",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q4", "4",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q5", "5",SII$quintile)
SII$quintile <- as.integer(SII$quintile)


SII <- SII %>% 
  mutate(abs_range = most_depr_value - least_depr_value,
         rel_range = most_depr_value / least_depr_value) %>% ungroup()

## Slope of index of inequality (SII) -- LINEAR
sii_model <- SII %>%  group_by(area) %>% 
  mutate(cumulative_pro = cumsum(proportion_pop),  # cumulative proportion population for each area
         relative_rank = case_when(
           quintile == 1 ~ 0.5*proportion_pop,
           quintile != 1 ~ lag(cumulative_pro) + 0.5*proportion_pop),
         sqr_proportion_pop = sqrt(proportion_pop), #square root of the proportion of the population in each SIMD
         
         relrank_sqr_proppop = relative_rank * sqr_proportion_pop,
         
         value_sqr_proppop = sqr_proportion_pop * value) %>% #value based on population weights
  
  nest() %>% #creating one column called data with all the variables not in the grouping
  
  # Calculating linear regression for all the groups, then formatting the results
  # and calculating the confidence intervals
  mutate(model = map(data, ~ lm(value_sqr_proppop ~ sqr_proportion_pop + relrank_sqr_proppop + 0, data = .)),
         #extracting sii from model, a bit fiddly but it works
         sii = -1 * as.numeric(map(map(model, "coefficients"), "relrank_sqr_proppop")),
         
         cis = map(model, confint_tidy  )) %>% #calculating confidence intervals
  
  ungroup() %>% unnest(cis) %>% #Unnesting the CIs 
  
  #selecting only even row numbers which are the ones that have the sii cis
  filter(row_number() %% 2 == 0) %>% 
  mutate(lowci_sii = -1 * conf.high, #fixing interpretation
         upci_sii = -1 * conf.low) %>% 
  select(-conf.low, -conf.high) #non-needed variables

sii_model

Fig3<-  subset(sii_model, select=-c(data,model))
Fig3<-Fig3[!(Fig3$area=="2013_19"),]                                     ###(!) CHANGE TO 2013_20 For SA (!)###

#Merging sii results with main data set
SII <- left_join(SII, sii_model, by = "area")
rm(sii_model)

## Relative index on inequality (RII) - LINEAR
SII <- SII %>% mutate(rii = sii / overall_value,
                      lowci_rii = lowci_sii / overall_value,
                      upci_rii = upci_sii / overall_value,
                      rii_int = rii * 0.5 *100,
                      lowci_rii_int = lowci_rii * 0.5 *100,
                      upci_rii_int = upci_rii * 0.5 *100)
SII
##### *6. Slope Index of Inequality and Relative Index of Inequality END* ####

##### *7. Figure 2, 3, 7 & 8* ####

Fig1 <-SII[, c(1,2,5)]
Fig1 <- Fig1[Fig1$area != "2013_19",]                                               ###(!) CHANGE TO 2013_20 FOR SA (!)###
colnames(Fig1)[colnames(Fig1) == "quintile"] <- "Quintile"       ##rename to link
Fig1$Quintile <- as.character(Fig1$Quintile)

Fig1$Quintile <- ifelse(Fig1$Quintile == "1", "Q1",Fig1$Quintile)
Fig1$Quintile <- ifelse(Fig1$Quintile == "2", "Q2",Fig1$Quintile)
Fig1$Quintile <- ifelse(Fig1$Quintile == "3", "Q3",Fig1$Quintile)
Fig1$Quintile <- ifelse(Fig1$Quintile == "4", "Q4",Fig1$Quintile)
Fig1$Quintile <- ifelse(Fig1$Quintile == "5", "Q5",Fig1$Quintile)

##Figure 2 - SII
ggplot(Fig3) + 
  geom_point(aes(x=area, y=sii), size =3) +
  geom_errorbar(aes(x=area, ymin=lowci_sii, ymax=upci_sii), width = 0.4, size = 1) +
  theme_classic() +
  ylab("SII") +
  xlab("Year") +
  geom_line(aes(x=area,  y=sii), group=1) +
  ylim(0,12)
##Export as 900 x 500  

##Figure 3 - RII
Fig4<-  subset(SII, select=-c(value, overall_value, population, most_depr_value, least_depr_value, total_pop, proportion_pop, abs_range, rel_range, data, model, sii, lowci_sii, upci_sii, rii_int,  lowci_rii_int, upci_rii_int  ))
Fig4<-Fig4[!(Fig4$area=="2013_19"),]                                                              ###(!) CHANGE TO 2013_20 FOR SA (!)###
Fig4<- Fig4[Fig4$quintile==1,] ##all quintiles the same, so pick any to remove duplicate rows. 
Fig4 <- subset(Fig4, select=-c(quintile ))

ggplot(Fig4) + 
  geom_point(aes(x=area, y=rii), size =3) +
  geom_errorbar(aes(x=area, ymin=lowci_rii, ymax=upci_rii), width = 0.4, size = 1) +
  theme_classic() +
  ylab("RII") +
  xlab("Year") +
  geom_line(aes(x=area,  y=rii), group=1)+
  ylim(0,1.6)
##Export as 900 x 500  

rm(SII)

##Figure 5
Fig5.2013 <- Fig1[Fig1$area=="2013",]
Fig5.2014 <- Fig1[Fig1$area=="2014",]
Fig5.2015 <- Fig1[Fig1$area=="2015",]
Fig5.2016 <- Fig1[Fig1$area=="2016",]
Fig5.2017 <- Fig1[Fig1$area=="2017",]
Fig5.2018 <- Fig1[Fig1$area=="2018",]
Fig5.2019 <- Fig1[Fig1$area=="2019",]
#Fig5.2020 <- Fig1[Fig1$area=="2020",]

Fig5 <- merge(x=Fig5.2013, y=Fig5.2014, by="Quintile", all.x=F, all.y=F)            ##link data sets
colnames(Fig5)[colnames(Fig5) == "value.x"] <- "2013"                               ##rename
colnames(Fig5)[colnames(Fig5) == "value.y"] <- "2014"                               ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2015, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2015"                                 ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2016, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2016"                                 ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2017, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2017"                                 ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2018, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2018"                                 ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2019, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2019"                                 ##rename

Fig5 <- merge(x=Fig5, y=Fig5.2020, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5)[colnames(Fig5) == "value"] <- "2020"                                 ##rename

Fig5 <- Fig5[ -c(3,5,7,9,11,13,15) ] ##removes columns not needed. ##Add 17 if including 2020.
rm(Fig5.2013, Fig5.2014, Fig5.2015, Fig5.2016, Fig5.2017, Fig5.2018, Fig5.2019, Fig5.2020)

Fig5.2013 <- Fig2[Fig2$Year=="2013", -c(2:4)]
Fig5.2014 <- Fig2[Fig2$Year=="2014", -c(2:4)]
Fig5.2015 <- Fig2[Fig2$Year=="2015", -c(2:4)]
Fig5.2016 <- Fig2[Fig2$Year=="2016", -c(2:4)]
Fig5.2017 <- Fig2[Fig2$Year=="2017", -c(2:4)]
Fig5.2018 <- Fig2[Fig2$Year=="2018", -c(2:4)]
Fig5.2019 <- Fig2[Fig2$Year=="2019", -c(2:4)]
#Fig5.2020 <- Fig2[Fig2$Year=="2020", -c(2:4)]

Fig5.1 <- merge(x=Fig5.2013, y=Fig5.2014, by="Quintile", all.x=F, all.y=F)          ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend.x"] <- "Spend2013"                               ##rename
colnames(Fig5.1)[colnames(Fig5.1) == "Spend.y"] <- "Spend2014"                               ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2015, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2015"                                 ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2016, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2016"                                 ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2017, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2017"                                 ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2018, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2018"                                 ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2019, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2019"                                 ##rename

Fig5.1 <- merge(x=Fig5.1, y=Fig5.2020, by="Quintile", all.x=F, all.y=F)                 ##link data sets
colnames(Fig5.1)[colnames(Fig5.1) == "Spend"]   <- "Spend2020"                                 ##rename

##This is the overall rates for grouped quintile
Fig5 <- merge(x=Fig5, y=Fig5.1, by="Quintile", all.x=F, all.y=F)          ##link data sets
rm(Fig5.2013, Fig5.2014, Fig5.2015, Fig5.2016, Fig5.2017, Fig5.2018, Fig5.2019, Fig5.2020, Fig5.1)

##Calculate differences
Fig5$`2013_14` <- Fig5$`2014` - Fig5$`2013`
Fig5$`2014_15` <- Fig5$`2015` - Fig5$`2014`
Fig5$`2015_16` <- Fig5$`2016` - Fig5$`2015`
Fig5$`2016_17` <- Fig5$`2017` - Fig5$`2016`
Fig5$`2017_18` <- Fig5$`2018` - Fig5$`2017`
Fig5$`2018_19` <- Fig5$`2019` - Fig5$`2018`
Fig5$`2019_20` <- Fig5$`2020` - Fig5$`2019`

Fig5$`Spend2013_14` <- Fig5$`Spend2014` - Fig5$`Spend2013`
Fig5$`Spend2014_15` <- Fig5$`Spend2015` - Fig5$`Spend2014`
Fig5$`Spend2015_16` <- Fig5$`Spend2016` - Fig5$`Spend2015`
Fig5$`Spend2016_17` <- Fig5$`Spend2017` - Fig5$`Spend2016`
Fig5$`Spend2017_18` <- Fig5$`Spend2018` - Fig5$`Spend2017`
Fig5$`Spend2018_19` <- Fig5$`Spend2019` - Fig5$`Spend2018`
Fig5$`Spend2019_20` <- Fig5$`Spend2020` - Fig5$`Spend2019`

Fig5 <- Fig5[ -c(2:15) ]    ##removes columns                                   ###(!) CHANGE TO 17 FOR SA (!)###

##Long formatting:
Fig5.201314 <- subset(Fig5, select=-c(`2014_15`,    `2015_16`,     `2016_17`,   `2017_18`,     `2018_19`, 
                                      `Spend2014_15`, `Spend2015_16`, `Spend2016_17`,`Spend2017_18`, `Spend2018_19`)) ## add `2019_20`,`Spend2019_20` for SA

Fig5.201415 <- subset(Fig5, select=-c(  `2013_14`,      `2015_16`,     `2016_17`,   `2017_18`,     `2018_19`,
                                      `Spend2013_14`,  `Spend2015_16`, `Spend2016_17`,`Spend2017_18`, `Spend2018_19`)) ## add `2019_20`,`Spend2019_20` for SA
  
Fig5.201516 <- subset(Fig5, select=-c(`2013_14`,  `2014_15`,      `2016_17`,   `2017_18`,     `2018_19`,  
                                      `Spend2013_14`, `Spend2014_15`,  `Spend2016_17`,`Spend2017_18`, `Spend2018_19`)) ## add `2019_20`,`Spend2019_20` for SA
  
Fig5.201617 <- subset(Fig5, select=-c(`2013_14`,  `2014_15`,    `2015_16`,     `2017_18`,     `2018_19`,  
                                      `Spend2013_14`, `Spend2014_15`, `Spend2015_16`, `Spend2017_18`, `Spend2018_19`)) ## add `2019_20`,`Spend2019_20` for SA
  
Fig5.201718 <- subset(Fig5, select=-c(`2013_14`,  `2014_15`,    `2015_16`,     `2016_17`,    `2018_19`,
                                      `Spend2013_14`, `Spend2014_15`, `Spend2015_16`, `Spend2016_17`, `Spend2018_19`) ## add `2019_20`,`Spend2019_20` for SA
  
Fig5.201819 <- subset(Fig5, select=-c(`2013_14`,  `2014_15`,    `2015_16`,     `2016_17`,   `2017_18`, 
                                      `Spend2013_14`, `Spend2014_15`, `Spend2015_16`, `Spend2016_17`,`Spend2017_18`)) ## add `2019_20`,`Spend2019_20` for SA

#Fig5.201920 <- subset(Fig5, select=-c(`2013_14`,  `2014_15`,    `2015_16`,     `2016_17`,   `2017_18`, `2018_19`,    
#                                      `Spend2013_14`, `Spend2014_15`, `Spend2015_16`, `Spend2016_17`,`Spend2017_18`, `Spend2018_19`))

Fig5.201314$Year <- "2013_14"
Fig5.201415$Year <- "2014_15"
Fig5.201516$Year <- "2015_16"
Fig5.201617$Year <- "2016_17"
Fig5.201718$Year <- "2017_18"
Fig5.201819$Year <- "2018_19"
#Fig5.201920$Year <- "2019_20"

colnames(Fig5.201314)[colnames(Fig5.201314) == "Spend2013_14"] <- "Spend"                     ##rename
colnames(Fig5.201415)[colnames(Fig5.201415) == "Spend2014_15"] <- "Spend"                     ##rename
colnames(Fig5.201516)[colnames(Fig5.201516) == "Spend2015_16"] <- "Spend"                     ##rename
colnames(Fig5.201617)[colnames(Fig5.201617) == "Spend2016_17"] <- "Spend"                     ##rename
colnames(Fig5.201718)[colnames(Fig5.201718) == "Spend2017_18"] <- "Spend"                     ##rename
colnames(Fig5.201819)[colnames(Fig5.201819) == "Spend2018_19"] <- "Spend"                     ##rename
#colnames(Fig5.201920)[colnames(Fig5.201920) == "Spend2019_20"] <- "Spend"                    ##rename

colnames(Fig5.201314)[colnames(Fig5.201314) == "2013_14"] <- "DRD"                     ##rename
colnames(Fig5.201415)[colnames(Fig5.201415) == "2014_15"] <- "DRD"                     ##rename
colnames(Fig5.201516)[colnames(Fig5.201516) == "2015_16"] <- "DRD"                     ##rename
colnames(Fig5.201617)[colnames(Fig5.201617) == "2016_17"] <- "DRD"                     ##rename
colnames(Fig5.201718)[colnames(Fig5.201718) == "2017_18"] <- "DRD"                     ##rename
colnames(Fig5.201819)[colnames(Fig5.201819) == "2018_19"] <- "DRD"                     ##rename
#colnames(Fig5.201920)[colnames(Fig5.201920) == "2019_20"] <- "DRD"                    ##rename

Fig5 <- rbind(Fig5.201314, Fig5.201415, Fig5.201516, Fig5.201617, Fig5.201718, Fig5.201819, Fig5.201920) ##add for SA Fig5.201920
rm(Fig5.201314, Fig5.201415, Fig5.201516, Fig5.201617, Fig5.201718, Fig5.201819, Fig5.201920) 

##Fig 5 now in long format for overall quintile change rates between years. 

Fig6 <- subset(Fig5, select=-c(`Spend`))
Fig7 <- subset(Fig5, select=-c(`DRD`))
rm(Fig5)

##Figure 7 - DRD Changes By Quintile Group
ggplot(Fig6, aes(x=Year, y=DRD, group=Quintile, color=Quintile)) +
  geom_line()  +
  ylab("DRD Change Per 100,000 Population") +
  theme_classic() +
  scale_color_manual(values = c("green", "orange","red", "blue","black")) +
  geom_point(aes(x=Year, y=DRD), size = 2)

##Figure 8 - Spend Changes By Quintile Group
ggplot(Fig7, aes(x=Year, y=Spend, group=Quintile, color=Quintile)) +
  geom_line()  +
  ylab("Spend Per Person (�) Annual Change") +
  theme_classic() +
  xlab("Years") +
  scale_color_manual(values = c("green", "orange","red", "blue","black")) +
  geom_point(aes(x=Year, y=Spend), size = 2)

##### *7. Figure 2, 3, 7 & 8 END* #####

##### *8. Figure 5* ####

##Multiple Linear Regression model *Utilising EACH LA VALUE - UNGROUPED*:

#Spend Data needed in long format, not wide
Fig5.2014 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff14_15`, `SpendDiff15_16`, `SpendDiff16_17`, `SpendDiff17_18`, `SpendDiff18_19`, `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))


Fig5.2015 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`,  `SpendDiff15_16`, `SpendDiff16_17`, `SpendDiff17_18`, `SpendDiff18_19`, `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))

Fig5.2016 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`, `SpendDiff14_15`, `SpendDiff16_17`, `SpendDiff17_18`, `SpendDiff18_19`, `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))

Fig5.2017 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`, `SpendDiff14_15`, `SpendDiff15_16`, `SpendDiff17_18`, `SpendDiff18_19`, `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))


Fig5.2018 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`, `SpendDiff14_15`, `SpendDiff15_16`, `SpendDiff16_17`,  `SpendDiff18_19`, `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))

Fig5.2019 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`, `SpendDiff14_15`, `SpendDiff15_16`, `SpendDiff16_17`, `SpendDiff17_18`,  `SpendDiff19_20`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))

Fig5.2020 <- subset(RQ2, select=-c( `2013 Spend`, `2014 Spend`, `2015 Spend`, `2016 Spend`, `2017 Spend`, `2018 Spend`, `2019 Spend`, `2020 Spend`, 
                                    `2013 Spend_GDP`, `2014 Spend_GDP`, `2015 Spend_GDP`, `2016 Spend_GDP`, `2017 Spend_GDP`, `2018 Spend_GDP`, `2019 Spend_GDP`, `2020 Spend_GDP`,
                                    `2013 Spend_GDP_Pop`,`2014 Spend_GDP_Pop`, `2015 Spend_GDP_Pop`, `2016 Spend_GDP_Pop`, `2017 Spend_GDP_Pop`, `2018 Spend_GDP_Pop`, `2019 Spend_GDP_Pop`, 
                                    `2020 Spend_GDP_Pop`, `SpendDiff13_14`, `SpendDiff14_15`, `SpendDiff15_16`, `SpendDiff16_17`, `SpendDiff17_18`, `SpendDiff18_19`,
                                    `Mid-2020`, `Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`, `Mid-2015`, `Mid-2014`, `Mid-2013`))

Fig5.2014$Year <- "2013_14"
Fig5.2015$Year <- "2014_15"
Fig5.2016$Year <- "2015_16"
Fig5.2017$Year <- "2016_17"
Fig5.2018$Year <- "2017_18"
Fig5.2019$Year <- "2018_19"
Fig5.2020$Year <- "2019_20"

colnames(Fig5.2014)[colnames(Fig5.2014) == "SpendDiff13_14"] <- "Spend"                     ##rename
colnames(Fig5.2015)[colnames(Fig5.2015) == "SpendDiff14_15"] <- "Spend"                     ##rename
colnames(Fig5.2016)[colnames(Fig5.2016) == "SpendDiff15_16"] <- "Spend"                     ##rename
colnames(Fig5.2017)[colnames(Fig5.2017) == "SpendDiff16_17"] <- "Spend"                     ##rename
colnames(Fig5.2018)[colnames(Fig5.2018) == "SpendDiff17_18"] <- "Spend"                     ##rename
colnames(Fig5.2019)[colnames(Fig5.2019) == "SpendDiff18_19"] <- "Spend"                     ##rename
colnames(Fig5.2020)[colnames(Fig5.2020) == "SpendDiff19_20"] <- "Spend"                     ##rename

#DRD Data needed in long format, not wide
Fig5.1.2014 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD14_15`, `DRD15_16`, `DRD16_17`, `DRD17_18`, `DRD18_19`, `DRD19_20`))

Fig5.1.2015 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD15_16`, `DRD16_17`, `DRD17_18`, `DRD18_19`, `DRD19_20`))

Fig5.1.2016 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD14_15`,  `DRD16_17`, `DRD17_18`, `DRD18_19`, `DRD19_20`))

Fig5.1.2017 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD14_15`, `DRD15_16`,  `DRD17_18`, `DRD18_19`, `DRD19_20`))

Fig5.1.2018 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD14_15`, `DRD15_16`, `DRD16_17`,  `DRD18_19`, `DRD19_20`))

Fig5.1.2019 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD14_15`, `DRD15_16`, `DRD16_17`, `DRD17_18`,  `DRD19_20`))

Fig5.1.2020 <- subset(RQ1, select=-c(`Income - Average score.x`, `Income - Rank of average score`, `2020`, `2019`, `2018`, `2017`, `2016`, `2015`, `2014`, `2013`, `Mid-2020`,`Mid-2019`, `Mid-2018`, `Mid-2017`, `Mid-2016`,`Mid-2015`, `Mid-2014`, `Mid-2013`, `Quintile`,  
                                     `DRP_2020`,  `DRP_2019`,  `DRP_2018`, `DRP_2017`,  `DRP_2016`,  `DRP_2015`, `DRP_2014`,`DRP_2013`,  
                                     `Upper Tier Local Authority District name (2013).x`,
                                     `DRD13_14`, `DRD14_15`, `DRD15_16`, `DRD16_17`, `DRD17_18`, `DRD18_19`))

colnames(Fig5.1.2014)[colnames(Fig5.1.2014) == "DRD13_14"] <- "DRD"                     ##rename
colnames(Fig5.1.2015)[colnames(Fig5.1.2015) == "DRD14_15"] <- "DRD"                     ##rename
colnames(Fig5.1.2016)[colnames(Fig5.1.2016) == "DRD15_16"] <- "DRD"                     ##rename
colnames(Fig5.1.2017)[colnames(Fig5.1.2017) == "DRD16_17"] <- "DRD"                     ##rename
colnames(Fig5.1.2018)[colnames(Fig5.1.2018) == "DRD17_18"] <- "DRD"                     ##rename
colnames(Fig5.1.2019)[colnames(Fig5.1.2019) == "DRD18_19"] <- "DRD"                     ##rename
colnames(Fig5.1.2020)[colnames(Fig5.1.2020) == "DRD19_20"] <- "DRD"                     ##rename

Fig5.2.2014  <-  merge(x=Fig5.2014, y=Fig5.1.2014, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2015  <-  merge(x=Fig5.2015, y=Fig5.1.2015, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2016  <-  merge(x=Fig5.2016, y=Fig5.1.2016, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2017  <-  merge(x=Fig5.2017, y=Fig5.1.2017, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2018  <-  merge(x=Fig5.2018, y=Fig5.1.2018, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2019  <-  merge(x=Fig5.2019, y=Fig5.1.2019, by="Code", all.x=F, all.y=F)         ##link data sets          
Fig5.2.2020  <-  merge(x=Fig5.2020, y=Fig5.1.2020, by="Code", all.x=F, all.y=F)         ##link data sets          

rm(Fig5.2014, Fig5.2015, Fig5.2016, Fig5.2017, Fig5.2018, Fig5.2019, Fig5.2020,  Fig5.1.2014, Fig5.1.2015, Fig5.1.2016, Fig5.1.2017, Fig5.1.2018, Fig5.1.2019, Fig5.1.2020)

Fig5 <- rbind(Fig5.2.2014, Fig5.2.2015, Fig5.2.2016, Fig5.2.2017, Fig5.2.2018, Fig5.2.2019)    ###(!) ADD Fig5.2.2020 FOR SA (!)###

rm(Fig5.2.2014, Fig5.2.2015, Fig5.2.2016, Fig5.2.2017, Fig5.2.2018, Fig5.2.2019, Fig5.2.2020) 
colnames(Fig5)[colnames(Fig5) == "Local authority.x"] <- "LA"    ##rename
#Fig5$Quintile <- ifelse(Fig5$Quintile == "Q1", "1",Fig5$Quintile)
#Fig5$Quintile <- ifelse(Fig5$Quintile == "Q2", "2",Fig5$Quintile)
#Fig5$Quintile <- ifelse(Fig5$Quintile == "Q3", "3",Fig5$Quintile)
#Fig5$Quintile <- ifelse(Fig5$Quintile == "Q4", "4",Fig5$Quintile)
#Fig5$Quintile <- ifelse(Fig5$Quintile == "Q5", "5",Fig5$Quintile)
##Data now in long format. 

##Figure 5 - LA *Ungrouped* Rates Vs Spend Change Trend
ggplot(data = Fig5, aes(x=Spend, y=DRD, colour = Quintile)) +
  geom_point() +
  geom_smooth(method="lm", level = 0.95) +
  theme_classic() + 
  ylab("Annual Change In DRD Rate Per 100,000") +
  xlab("Annual Change In LA Spend Per Person") +
  facet_wrap(~Quintile) +
  scale_color_manual(values = c("green", "orange","red", "blue","black"))
##900 x 500 export

##### *8. Figure 5 END* #####

##### *9. Fixed Effects Models* ####

##Uses same assumptions as simple linear regression: Constant variance, Linearity, Independence of observations, Normality of residuals, error free measurement of X values.
##Fixed-effects regression will be employed which removes unobserved confounders between authorities which remain for each authority. 
##Furthermore, calendar year will be adjusted for as a fixed effect, to account for unobserved confounders between the years.

##Dummy variable
model4 <- lm(DRD ~ Spend + Quintile + factor(Year) + factor(LA), data = Fig5)
summary(model4)
##this allows to look at LA level fixed effects. though it doesn't contribute to spend / IMD discussion. 

##fixed effects accounts for variables not accounted for *if they are fixed over time or fixed over LA* 
  #there could be other omited variables out there.

##not accounting for deprivation or difference within LAs
model5 <- plm(DRD ~ Spend, 
              data= Fig5, 
              index=c("Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 
summary(model5)
confint(model5)
##Estimate -0.060975 - more spent, less DRD
##95% CI: -0.1478483 0.02589824 - p 0.16928


##within quintiles controlled for, but not differences between LAs 
model6 <- plm(DRD ~ Spend + Quintile, 
              data= Fig5, 
              index=c("Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 
summary(model6)
#Estimate: Spend -0.059117  
confint(model6) 
#Spend -0.1461402 0.02790585 - the more spent, the less DRD. P= 0.44642


##within Year & LA controlled for 
model7 <- plm(DRD ~ Spend + Quintile, 
              data= Fig5, 
              index=c("LA","Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 
summary(model7)
#Estimate: Spend -0.048579
confint(model7) 
#Spend -0.1460745 0.04891577 - the more spent, the less DRD. P= 0.32909


##Regression Assumptions: 

#1. Linearity
plot(Fig5$Spend, Fig5$DRD)
abline(Fig5$Spend, Fig5$DRD)

#2. Constant Variance of Residuals
summary(model7, vcovHC) ##makes the standard error robust

#The null hypothesis for the Breusch-Pagan test is homoskedasticity
bptest(model7)
## P<0.05, presence of heteroskedasticity

summary(model7, vcovHC) ##makes the standard error robust
confint(coeftest(model7, vcovHC)) ##makes the standard error robust

summary(model7, vcovHC(model7, method ="arellano")) ##makes the standard error robust - same results as above. 

#3. Normality of Residuals
hist(model7$residuals, freq=F)
curve(dnorm(x, mean = mean(model7$residuals), sd = sd(model7$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(model7$residuals)
##Residuals are approximately normally distributed. The model results appear trustworthy. 


hist(Fig5$Spend, freq=F) ##No signs of non-normality
curve(dnorm(x, mean = mean(Fig5$Spend), sd = sd(Fig5$Spend)), col="red", lwd = 1, add=TRUE)

hist(Fig5$DRD, freq = F)   ##No signs of non-normality
curve(dnorm(x, mean = mean(Fig5$DRD), sd = sd(Fig5$DRD)), col="red", lwd = 1, add=TRUE)

##Models By Quintile
Fig5.Q1 <- Fig5[Fig5$Quintile=="Q1", ]
Fig5.Q2 <- Fig5[Fig5$Quintile=="Q2", ]
Fig5.Q3 <- Fig5[Fig5$Quintile=="Q3", ]
Fig5.Q4 <- Fig5[Fig5$Quintile=="Q4", ]
Fig5.Q5 <- Fig5[Fig5$Quintile=="Q5", ]

##Qintile fixed effects models
model7.Q1 <- plm(DRD ~ Spend, data= Fig5.Q1, index=c("LA","Year"), model="within")
model7.Q2 <- plm(DRD ~ Spend, data= Fig5.Q2, index=c("LA","Year"), model="within")
model7.Q3 <- plm(DRD ~ Spend, data= Fig5.Q3, index=c("LA","Year"), model="within")
model7.Q4 <- plm(DRD ~ Spend, data= Fig5.Q4, index=c("LA","Year"), model="within")
model7.Q5 <- plm(DRD ~ Spend, data= Fig5.Q5, index=c("LA","Year"), model="within")

summary(model7.Q1, vcovHC) ##makes the standard error robust
confint(coeftest(model7.Q1, vcovHC)) ##makes the standard error robust

summary(model7.Q2, vcovHC) ##makes the standard error robust
confint(coeftest(model7.Q2, vcovHC)) ##makes the standard error robust

summary(model7.Q3, vcovHC) ##makes the standard error robust
confint(coeftest(model7.Q3, vcovHC)) ##makes the standard error robust

summary(model7.Q4, vcovHC) ##makes the standard error robust
confint(coeftest(model7.Q4, vcovHC)) ##makes the standard error robust

summary(model7.Q5, vcovHC) ##makes the standard error robust
confint(coeftest(model7.Q5, vcovHC)) ##makes the standard error robust

rm(model4, model5, model6, model7, model7.Q1, model7.Q2, model7.Q3, model7.Q4, model7.Q5,
   Fig5.Q1, Fig5.Q2, Fig5.Q3, Fig5.Q4, Fig5.Q5)

##### *9. Fixed Effects Models END* #####

##### *10. Assumption Checks - Normality Tests By LA Deprivation* ####

NormalityTest <-RQ1[, c(4,22:29)]

Norm13 <-lm(NormalityTest$DRP_2013 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm13$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm13$residuals), sd = sd(Norm13$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm13$residuals)
#plot(Norm13)

Norm14 <-lm(NormalityTest$DRP_2014 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm14$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm14$residuals), sd = sd(Norm14$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm14$residuals)
#plot(Norm14)

Norm15 <-lm(NormalityTest$DRP_2015 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm15$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm15$residuals), sd = sd(Norm15$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm15$residuals)
#plot(Norm15)

Norm16 <-lm(NormalityTest$DRP_2016 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm16$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm16$residuals), sd = sd(Norm16$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm16$residuals)
#plot(Norm16)

Norm17 <-lm(NormalityTest$DRP_2017 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm17$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm17$residuals), sd = sd(Norm17$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm17$residuals)
#plot(Norm17)

Norm18 <-lm(NormalityTest$DRP_2018 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm18$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm18$residuals), sd = sd(Norm18$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm18$residuals)
#plot(Norm18)

Norm19 <-lm(NormalityTest$DRP_2019 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm19$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm19$residuals), sd = sd(Norm19$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm19$residuals)
#plot(Norm19)

Norm20 <-lm(NormalityTest$DRP_2020 ~ NormalityTest$`Income - Rank of average score`)
hist(Norm20$residuals, freq=FALSE)
curve(dnorm(x, mean = mean(Norm20$residuals), sd = sd(Norm20$residuals)), col="red", lwd = 1, add=TRUE)
boxplot(Norm20$residuals)
#plot(Norm20)
rm(NormalityTest, Norm13, Norm14, Norm15, Norm16, Norm17, Norm18, Norm19, Norm20)

##### *10. Assumption Checks - Normality Tests By LA Deprivation END* #####

##### *11. Sensitivity Analysis - SII/RII By LA Deprivation (Continuous Variable)* ####

#Create New Ranking of Deprivation
RQ1 <- RQ1[order(RQ1$`Income - Rank of average score`),]                                        ##order by average income score to rank
RQ1$Rank <- 1:nrow(RQ1)                                                                         ##assign rank 1-147

##2013
SII <- RQ1[, c(37,20,29)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2013"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2013"] <- "value"                                           ##rename

SIIall2013 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2013`) / sum(RQ1$`Mid-2013`)*100000)
SII$area <- "2013" 
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2013 <- SII                                                                                   

##2014
SII <- RQ1[, c(37,19,28)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2014"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2014"] <- "value"                                           ##rename

SIIall2014 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2014`) / sum(RQ1$`Mid-2014`)*100000)
SII$area <- "2014"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2014 <- SII       

##2015
SII <- RQ1[, c(37,18,27)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2015"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2015"] <- "value"                                           ##rename

SIIall2015 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2015`) / sum(RQ1$`Mid-2015`)*100000)
SII$area <- "2015"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2015 <- SII     

##2016
SII <- RQ1[, c(37,17,26)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2016"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2016"] <- "value"                                           ##rename

SIIall2016 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2016`) / sum(RQ1$`Mid-2016`)*100000)
SII$area <- "2016"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2016 <- SII   

##2017
SII <- RQ1[, c(37,16,25)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2017"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2017"] <- "value"                                           ##rename

SIIall2017 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2017`) / sum(RQ1$`Mid-2017`)*100000)
SII$area <- "2017"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2017 <- SII    

##2018
SII <- RQ1[, c(37,15,24)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2018"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2018"] <- "value"                                           ##rename

SIIall2018 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2018`) / sum(RQ1$`Mid-2018`)*100000)
SII$area <- "2018"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2018 <- SII    

##2019
SII <- RQ1[, c(37,14,23)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2019"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2019"] <- "value"                                           ##rename

SIIall2019 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2019`) / sum(RQ1$`Mid-2019`)*100000)
SII$area <- "2019"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2019 <- SII    

##2020
SII <- RQ1[, c(37,13,22)]    #-1 to each. 
colnames(SII)[colnames(SII) == "Rank"] <- "quintile"                                            ##rename
colnames(SII)[colnames(SII) == "Mid-2020"] <- "population"                                      ##rename
colnames(SII)[colnames(SII) == "DRP_2020"] <- "value"                                           ##rename

SIIall2020 <- SII                                                                                 

SII$overall_value <- (sum(RQ1$`2020`) / sum(RQ1$`Mid-2020`)*100000)
SII$area <- "2020"                                                                                
SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   
SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop
SII2020 <- SII    

##ALL ACROSS STUDY PERIOD
SIIall <- rbind(SIIall2013, SIIall2014, SIIall2015, SIIall2016, SIIall2017, SIIall2018, SIIall2019) #Include SIIall2020 for SA
rm(SIIall2013, SIIall2014, SIIall2015, SIIall2016, SIIall2017, SIIall2018, SIIall2019, SIIall2020)

SII <- aggregate(SIIall$value, list(SIIall$quintile), FUN=mean)             ##select all
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                     ##rename
colnames(SII)[colnames(SII) == "x"] <- "value"                              ##rename

population <- aggregate(SIIall$population, list(SIIall$quintile), FUN=mean) ##create population
colnames(population)[colnames(population) == "Group.1"] <- "quintile"       ##rename to link
colnames(population)[colnames(population) == "x"] <- "population"           ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)          ##link data sets
rm(population)

SII$overall_value <- 6.292717
SII$area <- "2013_19"

SII$most_depr_value <- SII$value[SII$quintile == 1]   
SII$least_depr_value <- SII$value[SII$quintile == 147]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

SIIall <- SII
##

SII <- rbind(SII2013, SII2014, SII2015, SII2016, SII2017, SII2018, SII2019, SIIall)
rm(SII2013, SII2014, SII2015, SII2016, SII2017, SII2018, SII2019, SIIall, SII2020)

#SII$quintile <- as.integer(SII$quintile)

SII <- SII %>% 
  mutate(abs_range = most_depr_value - least_depr_value,
         rel_range = most_depr_value / least_depr_value) %>% ungroup()

## Slope of index on inequality (SII) -- LINEAR
sii_model <- SII %>%  group_by(area) %>% 
  mutate(cumulative_pro = cumsum(proportion_pop),  # cumulative proportion population for each area
         relative_rank = case_when(
           quintile == 1 ~ 0.5*proportion_pop,
           quintile != 1 ~ lag(cumulative_pro) + 0.5*proportion_pop),
         sqr_proportion_pop = sqrt(proportion_pop), #square root of the proportion of the population in each SIMD
         
         relrank_sqr_proppop = relative_rank * sqr_proportion_pop,
         
         value_sqr_proppop = sqr_proportion_pop * value) %>% #value based on population weights
  
  nest() %>% #creating one column called data with all the variables not in the grouping
  
  # Calculating linear regression for all the groups, then formatting the results
  # and calculating the confidence intervals
  mutate(model = map(data, ~ lm(value_sqr_proppop ~ sqr_proportion_pop + relrank_sqr_proppop + 0, data = .)),
         #extracting sii from model, a bit fiddly but it works
         sii = -1 * as.numeric(map(map(model, "coefficients"), "relrank_sqr_proppop")),
         
         cis = map(model, confint_tidy  )) %>% #calculating confidence intervals
  
  ungroup() %>% unnest(cis) %>% #Unnesting the CIs 
  
  #selecting only even row numbers which are the ones that have the sii cis
  filter(row_number() %% 2 == 0) %>% 
  mutate(lowci_sii = -1 * conf.high, #fixing interpretation
         upci_sii = -1 * conf.low) %>% 
  select(-conf.low, -conf.high) #non-needed variables

sii_model

Fig3SA<-  subset(sii_model, select=-c(data,model))
Fig3SA<-Fig3SA[!(Fig3SA$area=="2013_19"),] 

#Merging sii results with main data set
SII <- left_join(SII, sii_model, by = "area")
rm(sii_model)

## Relative index on inequality (RII) - LINEAR
SII <- SII %>% mutate(rii = sii / overall_value,
                      lowci_rii = lowci_sii / overall_value,
                      upci_rii = upci_sii / overall_value,
                      rii_int = rii * 0.5 *100,
                      lowci_rii_int = lowci_rii * 0.5 *100,
                      upci_rii_int = upci_rii * 0.5 *100)

##SA Figure 3 - SII
ggplot(Fig3SA) + 
  geom_point(aes(x=area, y=sii), size =3) +
  geom_errorbar(aes(x=area, ymin=lowci_sii, ymax=upci_sii), width = 0.4, size = 1) +
  theme_classic() +
  ylab("SII") +
  xlab("Year") +
  geom_line(aes(x=area,  y=sii), group=1) +
  ylim(0,12)

##Figure 4 - RII
Fig4SA<- subset(SII, select=-c(value, overall_value, population, most_depr_value, least_depr_value, total_pop, proportion_pop, abs_range, rel_range, data, model, sii, lowci_sii, upci_sii, rii_int,  lowci_rii_int, upci_rii_int  ))
Fig4SA<- Fig4SA[!(Fig4SA$area=="2013_19"),] 
Fig4SA<- Fig4SA[Fig4SA$quintile==1,] ##all quintiles the same, so pick any to remove duplicate rows. 
Fig4SA<- subset(Fig4SA, select=-c(quintile ))
Fig4SA

ggplot(Fig4SA) + 
  geom_point(aes(x=area, y=rii), size =3) +
  geom_errorbar(aes(x=area, ymin=lowci_rii, ymax=upci_rii), width = 0.4, size = 1) +
  theme_classic() +
  ylab("RII") +
  xlab("Year") +
  geom_line(aes(x=area,  y=rii), group=1)+
  ylim(0,1.6)

rm(Fig3SA, Fig4SA)

##### *11. Sensitivity Analysis - SII/RII By LA Deprivation (Continuous Variable) END* #####

##### *12. Sensitivity Analysis - RQ2 Fixed Effects By LA Deprivation (Continuous Variable)* ####

#Create New Ranking of Deprivation
RQ1 <- RQ1[order(RQ1$`Income - Rank of average score`),]                                        ##order by average income score to rank
RQ1$Rank <- 1:nrow(RQ1)                                                                         ##assign rank 1-147

LArank <- RQ1[, c(1,37)] 
Fig5SA <- merge(x=Fig5, y=LArank, by="Code", all.x=F, all.y=F)                                  ##link data sets
Fig5SA <- Fig5SA[, -c(3)] 
rm(LArank)

##within quintiles controlled for, but not differences between LAs 
model6SA <- plm(DRD ~ Spend + Rank, 
              data= Fig5SA, 
              index=c("Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 
summary(model6SA)
#Estimate: Spend -0.0579557  
confint(model6SA) 
#Spend -0.144815445 0.0289039709 - the more spent, the less DRD. p-value: 0.098456 (similar est and CI, lower P than model6)

##within quintiles & LA controlled for --SA IS THE SAME AS FIXED BETWEEN LAs(!)
model7SA <- plm(DRD ~ Spend + Rank, 
              data= Fig5SA, 
              index=c("LA","Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 


summary(model7SA, vcovHC)
#Estimate: Spend -0.048579
confint(coeftest(model7SA,vcovHC)) 
#Spend -0.1423328 0.04517408 - the more spent, the less DRD. P= 0.32909


##within quintiles & LA controlled for --SA IS THE SAME AS FIXED BETWEEN LAs(!)
model8SA <- plm(DRD ~ Spend * Rank, 
                data= Fig5SA, 
                index=c("LA","Year"), 
                model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 


summary(model8SA, vcovHC)
#Estimate: Spend -0.048579
confint(coeftest(model8SA,vcovHC)) 
#Spend -0.1423328 0.04517408 - the more spent, the less DRD. P= 0.32909

##within quintiles & LA controlled for --SA IS THE SAME AS FIXED BETWEEN LAs(!)
model9SA <- plm(DRD ~ Spend + Spend * Rank, 
                data= Fig5SA, 
                index=c("LA","Year"), 
                model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 

summary(model9SA, vcovHC)
confint(coeftest(model9SA,vcovHC)) 

##within quintiles & LA controlled for --SA IS THE SAME AS FIXED BETWEEN LAs(!)
model10SA <- plm(DRD ~ Spend + Spend * Rank, 
                data= Fig5SA, 
                index=c("Year"), 
                model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 

summary(model10SA, vcovHC)
confint(coeftest(model10SA,vcovHC)) 

rm(model6SA, model7SA, model8SA, model9SA)

##### *12. Sensitivity Analysis - RQ2 Fixed Effects By LA Deprivation (Continuous Variable) END* #####

##### *13. Sensitivity Analysis - RQ2 Random Effects* ####

##within quintiles & LA controlled for - RANDOM EFFECTS
model7RE <- plm(DRD ~ Spend + Quintile, 
              data= Fig5, 
              index=c("LA","Year"), 
              model="random")         ##RANDOM EFFECTS
summary(model7RE)
#Estimate: Spend FIXED =  -0.048579, RANDOM = -0.053156
confint(model7RE) 
#Spend WAS (FE): -0.1460745 0.04891577 NOW(RE): -0.1389663 0.0326537 WAS: P= 0.32909 NOW: 0.48939
??phtest
#Hausman rest compares random and fixed effects models:
phtest(model7, model7RE)
##P>0.05 so reject that they are different. 

bptest(model7RE)
## P<0.05, presence of heteroskedasticity

#Breusch-Godfrey/Wooldridge test for serial correlation in panel models:
pbgtest(model7RE) ##presence of serial correlation, therefore white2 is appropriate. 

summary(model7RE, vcovHC(model7RE, method ="white2")) ##makes the standard error robust
confint(coeftest(model7RE, vcovHC(model7RE, method ="white2"))) ##makes the standard error robust

##Models By Quintile
Fig5.Q1 <- Fig5[Fig5$Quintile=="Q1", ]
Fig5.Q2 <- Fig5[Fig5$Quintile=="Q2", ]
Fig5.Q3 <- Fig5[Fig5$Quintile=="Q3", ]
Fig5.Q4 <- Fig5[Fig5$Quintile=="Q4", ]
Fig5.Q5 <- Fig5[Fig5$Quintile=="Q5", ]

##Qintile fixed effects models
model7.Q1RE <- plm(DRD ~ Spend, data= Fig5.Q1, index=c("LA","Year"), model="random")
model7.Q2RE <- plm(DRD ~ Spend, data= Fig5.Q2, index=c("LA","Year"), model="random")
model7.Q3RE <- plm(DRD ~ Spend, data= Fig5.Q3, index=c("LA","Year"), model="random")
model7.Q4RE <- plm(DRD ~ Spend, data= Fig5.Q4, index=c("LA","Year"), model="random")
model7.Q5RE <- plm(DRD ~ Spend, data= Fig5.Q5, index=c("LA","Year"), model="random")

summary(model7.Q1RE) #WAS(FE) - Estimate: Spend -0.084857               NOW:(RE) -0.08989
confint(model7.Q1RE) #WAS(FE) - Spend -0.3176591 0.1479448  P= 0.47609  NOW:(RE) -0.29385114 0.1140719 P= 0.3877

summary(model7.Q2RE) #WAS - Estimate: Spend -0.050293               NOW: -0.053211
confint(model7.Q2RE) #WAS - Spend -0.2609926 0.160406  P= 0.64061   NOW: -0.2317933 0.1253706

summary(model7.Q3RE) #WAS - Estimate: Spend 0.0042879               NOW: -0.01610
confint(model7.Q3RE) #WAS - Spend -0.2560109 0.2645867 P= 0.97429   NOW: -0.25181747 0.2196182
 
summary(model7.Q4RE) #WAS - Estimate: 0.0012619                     NOW: 0.011696
confint(model7.Q4RE) #WAS - Spend -0.1452045 0.1477284 P= 0.98655   NOW: -0.11852602 0.1419173

summary(model7.Q5RE) #WAS - Estimate: -0.104683                     NOW: -0.100162
confint(model7.Q5RE) #WAS - Spend -0.2981294 0.08876264 P= 0.29057  NOW: -0.27423715 0.07391228

rm(model7RE, model7.Q1RE, model7.Q2RE, model7.Q3RE, model7.Q4RE, model7.Q5RE, Fig5.Q1, Fig5.Q2, Fig5.Q3, Fig5.Q4, Fig5.Q5)

##### *13. Sensitivity Analysis - RQ2 Random Effects END* #####

##### *14. Sensitivity Analysis - Inclusion of 2020 SII/RII* ####

##2020 SII
SII <- aggregate(RQ1$`2020`, list(RQ1$Quintile), FUN=sum)                                         ##YEAR
colnames(SII)[colnames(SII) == "Group.1"] <- "quintile"                                           ##rename to quintile
colnames(SII)[colnames(SII) == "x"] <- "value"                                                    ##rename to value

population <- aggregate(RQ1$`Mid-2020`, list(RQ1$Quintile), FUN=sum)                              ##YEAR
colnames(population)[colnames(population) == "Group.1"] <- "quintile"                             ##rename to link
SII <- merge(x=SII, y=population, by="quintile", all.x=F, all.y=F)                                ##link data sets
rm(population)
colnames(SII)[colnames(SII) == "x"] <- "population"                                               ##rename to value

#SIIall2020 <- SII                                                                                 ##Year

SII$overall_value <- (sum(SII$value) / sum(SII$population)*100000)
SII$value <- ((SII$value / SII$population)*100000)

SII$area <- "2020"                                                                                ##YEAR

SII$most_depr_value <- SII$value[SII$quintile == "Q1"]   
SII$least_depr_value <- SII$value[SII$quintile == "Q5"]   

SII$total_pop <- sum(SII$population)
SII$proportion_pop <- SII$population/SII$total_pop

#SII2020 <- SII   
SII$quintile <- ifelse(SII$quintile == "Q1", "1",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q2", "2",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q3", "3",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q4", "4",SII$quintile)
SII$quintile <- ifelse(SII$quintile == "Q5", "5",SII$quintile)
SII$quintile <- as.integer(SII$quintile)

SII <- SII %>% 
  mutate(abs_range = most_depr_value - least_depr_value,
         rel_range = most_depr_value / least_depr_value) %>% ungroup()

## Slope of index of inequality (SII) -- LINEAR
sii_model <- SII %>%  group_by(area) %>% 
  mutate(cumulative_pro = cumsum(proportion_pop),  # cumulative proportion population for each area
         relative_rank = case_when(
           quintile == 1 ~ 0.5*proportion_pop,
           quintile != 1 ~ lag(cumulative_pro) + 0.5*proportion_pop),
         sqr_proportion_pop = sqrt(proportion_pop), #square root of the proportion of the population in each SIMD
         
         relrank_sqr_proppop = relative_rank * sqr_proportion_pop,
         
         value_sqr_proppop = sqr_proportion_pop * value) %>% #value based on population weights
  
  nest() %>% #creating one column called data with all the variables not in the grouping
  
  # Calculating linear regression for all the groups, then formatting the results
  # and calculating the confidence intervals
  mutate(model = map(data, ~ lm(value_sqr_proppop ~ sqr_proportion_pop + relrank_sqr_proppop + 0, data = .)),
         #extracting sii from model, a bit fiddly but it works
         sii = -1 * as.numeric(map(map(model, "coefficients"), "relrank_sqr_proppop")),
         
         cis = map(model, confint_tidy  )) %>% #calculating confidence intervals
  
  ungroup() %>% unnest(cis) %>% #Unnesting the CIs 
  
  #selecting only even row numbers which are the ones that have the sii cis
  filter(row_number() %% 2 == 0) %>% 
  mutate(lowci_sii = -1 * conf.high, #fixing interpretation
         upci_sii = -1 * conf.low) %>% 
  select(-conf.low, -conf.high) #non-needed variables

sii_model

#Merging sii results with main data set
SII <- left_join(SII, sii_model, by = "area")
rm(sii_model)

## Relative index on inequality (RII) - LINEAR
SII <- SII %>% mutate(rii = sii / overall_value,
                      lowci_rii = lowci_sii / overall_value,
                      upci_rii = upci_sii / overall_value,
                      rii_int = rii * 0.5 *100,
                      lowci_rii_int = lowci_rii * 0.5 *100,
                      upci_rii_int = upci_rii * 0.5 *100)

##### *14. Sensitivity Analysis - Inclusion of 2020 SII/RII END* #####

##### *15. Sensitivity Analysis - Inclusion of 2020 FE Models* ####

##2013 - 2020 FIXED EFFECT MODELS: 
model7_SA20 <- plm(DRD ~ Spend + Quintile, 
              data= Fig5, 
              index=c("LA","Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 
summary(model7_SA20)
#Estimate: WAS: Spend -0.048579 NOW: -0.063953
confint(model7_SA20) 
#Spend WAS: -0.1460745 0.04891577 P= 0.32909. NOW: -0.1585936 0.03068697 P= 0.1857

bptest(model7_SA20)
## P<0.05, presence of heteroskedasticity

summary(model7_SA20, vcovHC) ##makes the standard error robust
confint(coeftest(model7_SA20, vcovHC)) ##makes the standard error robust

##Models By Quintile
Fig5.Q1_SA20 <- Fig5[Fig5$Quintile=="Q1", ]
Fig5.Q2_SA20 <- Fig5[Fig5$Quintile=="Q2", ]
Fig5.Q3_SA20 <- Fig5[Fig5$Quintile=="Q3", ]
Fig5.Q4_SA20 <- Fig5[Fig5$Quintile=="Q4", ]
Fig5.Q5_SA20 <- Fig5[Fig5$Quintile=="Q5", ]

##Qintile fixed effects models
model7.Q1_SA20 <- plm(DRD ~ Spend, data= Fig5.Q1_SA20, index=c("LA","Year"), model="within")
model7.Q2_SA20 <- plm(DRD ~ Spend, data= Fig5.Q2_SA20, index=c("LA","Year"), model="within")
model7.Q3_SA20 <- plm(DRD ~ Spend, data= Fig5.Q3_SA20, index=c("LA","Year"), model="within")
model7.Q4_SA20 <- plm(DRD ~ Spend, data= Fig5.Q4_SA20, index=c("LA","Year"), model="within")
model7.Q5_SA20 <- plm(DRD ~ Spend, data= Fig5.Q5_SA20, index=c("LA","Year"), model="within")

summary(model7.Q1_SA20, vcovHC)
confint(model7.Q1_SA20, vcovHC() 

confint_robust(model7.Q1_SA20, level=0.95, HC_type="HC3")

##Long \& Ervin (2000) conduct a simulation study of HC estimators (HC0 to HC3) in the linear regression model, 
##recommending to use HC3 which is thus the default in vcovHC

summary(model7.Q2_SA20, vcovHC) 
confint_robust(model7.Q2_SA20, level=0.95, HC_type="HC3")

summary(model7.Q3_SA20, vcovHC) 
confint_robust(model7.Q3_SA20, level=0.95, HC_type="HC3")

summary(model7.Q4_SA20, vcovHC)
confint_robust(model7.Q4_SA20, level=0.95, HC_type="HC3")

summary(model7.Q5_SA20, vcovHC) 
confint_robust(model7.Q5_SA20, level=0.95, HC_type="HC3")

rm(model7_SA20, model7.Q1_SA20, model7.Q2_SA20, model7.Q3_SA20, model7.Q4_SA20, model7.Q5_SA20, Fig5.Q1_SA20,
   Fig5.Q2_SA20, Fig5.Q3_SA20, Fig5.Q4_SA20, Fig5.Q5_SA20)

##### *15. Sensitivity Analysis - Inclusion of 2020 FE Models END* #####

##### *16. Sensitivity Analysis - Mortality Lagged By A Year* ####

##Run Fig5 Code
Fig5SA_lag <- Fig5[,(1:5)]  ##THIS IS SPEND DIFF, Remains constant. 
Fig5SA_lag2 <- Fig5[,c(1,2,3,5,6)]

##Fig5SA_lag = SPEND CHANGES
##Fig5SA_lag2 = DRD CHANGES

##Lag DRD
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2013_14"] <- "2012_13"
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2014_15"] <- "2013_14"
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2015_16"] <- "2014_15"
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2016_17"] <- "2015_16"
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2017_18"] <- "2016_17"
Fig5SA_lag2$Year[Fig5SA_lag2$Year == "2018_19"] <- "2017_18"

##remove 1819 from lag
Fig5SA_lag<-Fig5SA_lag[!(Fig5SA_lag$Year =="2018_19"),]
##remove 13_14 from lag2
Fig5SA_lag2<-Fig5SA_lag2[!(Fig5SA_lag2$Year =="2012_13"),]

##Join back with DRD lagged
Fig5SA_lag3 <- merge(Fig5SA_lag, Fig5SA_lag2, by.x=c("Code","LA","Quintile","Year"), by.y=c("Code","LA","Quintile","Year") )
rm(Fig5SA_lag, Fig5SA_lag2)

##Figure 5 - LA *Ungrouped* Rates Vs Spend Change Trend
ggplot(data = Fig5SA_lag3, aes(x=Spend, y=DRD, colour = Quintile)) +
  geom_point() +
  geom_smooth(method="lm", level = 0.95) +
  theme_classic() + 
  ylab("Annual Change In DRD Rate Per 100,000") +
  xlab("Annual Change In LA Spend Per Person") +
  facet_wrap(~Quintile) +
  scale_color_manual(values = c("green", "orange","red", "blue","black"))
##900 x 500 export

##within quintiles & LA controlled for 
model7_lag <- plm(DRD ~ Spend + Quintile, 
              data= Fig5SA_lag3, 
              index=c("LA","Year"), 
              model="within")         ##this means within the LA and within the Year   ##fixed effects year  -within can do more effects, e.g. random. specifiy this in the model = part. 

bptest(model7_lag)

summary(model7_lag, vcovHC)
confint_robust(model7_lag, level=0.95, HC_type="HC3")

##Models By Quintile
Fig5SA_lag3.Q1 <- Fig5SA_lag3[Fig5SA_lag3$Quintile=="Q1", ]
Fig5SA_lag3.Q2 <- Fig5SA_lag3[Fig5SA_lag3$Quintile=="Q2", ]
Fig5SA_lag3.Q3 <- Fig5SA_lag3[Fig5SA_lag3$Quintile=="Q3", ]
Fig5SA_lag3.Q4 <- Fig5SA_lag3[Fig5SA_lag3$Quintile=="Q4", ]
Fig5SA_lag3.Q5 <- Fig5SA_lag3[Fig5SA_lag3$Quintile=="Q5", ]

##Qintile fixed effects models
model7_lag.Q1 <- plm(DRD ~ Spend, data= Fig5SA_lag3.Q1, index=c("LA","Year"), model="within")
model7_lag.Q2 <- plm(DRD ~ Spend, data= Fig5SA_lag3.Q2, index=c("LA","Year"), model="within")
model7_lag.Q3 <- plm(DRD ~ Spend, data= Fig5SA_lag3.Q3, index=c("LA","Year"), model="within")
model7_lag.Q4 <- plm(DRD ~ Spend, data= Fig5SA_lag3.Q4, index=c("LA","Year"), model="within")
model7_lag.Q5 <- plm(DRD ~ Spend, data= Fig5SA_lag3.Q5, index=c("LA","Year"), model="within")

summary(model7_lag.Q1, vcovHC) 
confint_robust(model7_lag.Q1, level=0.95, HC_type="HC3")

summary(model7_lag.Q2, vcovHC) 
confint_robust(model7_lag.Q2, level=0.95, HC_type="HC3")

summary(model7_lag.Q3, vcovHC) 
confint_robust(model7_lag.Q3, level=0.95, HC_type="HC3")

summary(model7_lag.Q4, vcovHC) 
confint_robust(model7_lag.Q4, level=0.95, HC_type="HC3")

summary(model7_lag.Q5, vcovHC) 
confint_robust(model7_lag.Q5, level=0.95, HC_type="HC3")

rm(model7_lag, model7_lag.Q1, model7_lag.Q2, model7_lag.Q3, model7_lag.Q4, model7_lag.Q5, Fig5SA_lag3, Fig5SA_lag3.Q1, Fig5SA_lag3.Q2,
   Fig5SA_lag3.Q3, Fig5SA_lag3.Q4, Fig5SA_lag3.Q5) 

##### *16. Sensitivity Analysis - Mortality Lagged By A Year END* #####

##-->-- End of Code --<--##