### IMPORTANT: For micronutrients other than "B1", please edit the variable name in line 35 ### # Last updated: October 4 2021 # Ben Nichols, University of Glasgow # This R code will output z-scores for patient and control data # It will also output centile plots for patients, controls and a combination of both # Model selection is described in the paper: # "Development of an age-dependent micronutrient centile charts and their utility in children with chronic gastrointestinal conditions at risk of deficiencies: A proof-of-concept study", Al Fify et al. 2021 # Requires two csv files named "cases.csv" and "controls.csv" # A copy of "controls.csv" using our data is available # Both files must have columns named "ID", "Age", and at least one of these micronutrients: # "B1", "B2", "B6", "ZnRBC", "CuRBC", "SeRBC", "MgRBC", "Zn", "Cu", "Se", "Mg" # Spelling and capitalisation must be exactly as above # Both files must be saved in the current working directory # For micronutrients other than "B1", please edit the variable name in line 35 # No further editing should be necessary but users are welcome to adapt and redistribute (with credit) # Install required packages # If there are errors then please consult documentation for 'gamlss', 'reshape', or 'ggplot2' if(!require('gamlss')) install.packages("gamlss") if(!require('reshape')) install.packages("reshape") if(!require('ggplot2')) install.packages("ggplot2") rm(list=ls()) oldw <- getOption("warn") options(warn = -1) library(gamlss) library(reshape) library(ggplot2) # Specify the names of the variables of interest control_file <- "controls.csv" case_file <- "cases.csv" x_variable <- "Age" y_variable <- "B1" cent_vals <- c(2,9,25,50,75,91,98) max_centile <- 99.99999 # Read in model data and case data dfHealthy <- read.csv(control_file) dfCase <- read.csv(case_file) dfHealthy[,y_variable] <- as.numeric(as.character(dfHealthy[,y_variable])) dfCase[,y_variable] <- as.numeric(as.character(dfCase[,y_variable])) dfHealthy[,x_variable] <- as.numeric(as.character(dfHealthy[,x_variable])) dfCase[,x_variable] <- as.numeric(as.character(dfCase[,x_variable])) Var <- dfHealthy[,y_variable] Var_x <- dfHealthy[,x_variable] dfHealthy <- cbind(dfHealthy,Var,Var_x) Var <- dfCase[,y_variable] Var_x <- dfCase[,x_variable] dfCase <- cbind(dfCase,Var,Var_x) myvals <- c("ID","Var","Var_x") # Drop NA records dfHealthy <- dfHealthy[myvals] dfHealthy_output <- dfHealthy dfHealthy <- subset(dfHealthy , (!is.na(Var)), drop=T) dfHealthy <- subset(dfHealthy , (!is.na(Var_x)), drop=T) dfCase <- dfCase[myvals] dfCase_output <- dfCase dfCase <- subset(dfCase , (!is.na(Var)), drop=T) dfCase <- subset(dfCase , (!is.na(Var_x)), drop=T) # Define models for M/F data if(y_variable == "B1") mod <- gamlss(Var ~ Var_x, data = dfHealthy, family = "IG") if(y_variable == "B2") mod <- gamlss(Var ~ Var_x, data = dfHealthy, family = "IG") if(y_variable == "B6") mod <- gamlss(Var ~ Var_x, data = dfHealthy, family = "IG") if(y_variable == "ZnRBC") mod <- gamlss(Var ~ pb(Var_x, df = 1), data = dfHealthy, family = "BCCG") if(y_variable == "CuRBC") mod <- gamlss(Var ~ pb(Var_x, df = 1), data = dfHealthy, family = "BCCG") if(y_variable == "SeRBC") mod <- gamlss(Var ~ cs(Var_x, df = 1), data = dfHealthy, family = "BCCG") if(y_variable == "MgRBC") mod <- gamlss(Var ~ cs(Var_x, df = 1), data = dfHealthy, family = "BCCG") if(y_variable == "Zn") mod <- gamlss(Var ~ cs(Var_x, df = 0), data = dfHealthy, family = "BCCG") if(y_variable == "Cu") mod <- gamlss(Var ~ Var_x, data = dfHealthy, family = "GA") if(y_variable == "Se") mod <- gamlss(Var ~ pb(Var_x, df = 1), data = dfHealthy, family = "BCCG") if(y_variable == "Mg") mod <- gamlss(Var ~ pb(Var_x, df = 1), data = dfHealthy, family = "BCCG") # z-scores for model dfHealthy$VarZScores <- resid(mod, what=c("z-scores")) dfHealthy$Cent <- pnorm(dfHealthy$VarZScores)*100 Centiles <- centiles.pred(mod, xname="Var_x",xvalues=dfHealthy$Var_x,yval=dfHealthy$Var, type="centiles",cent=cent_vals) colnames(Centiles)[1] <- x_variable cent.melted <- melt(Centiles,id.vars = x_variable) cent.melted$Centile <- cent.melted$variable colnames(cent.melted)[1] <- "Var_x" cols <- c("red","blue","green","black","green","blue","red") # Predict z-scores for sick patient data dfCase$VarZScores <- centiles.pred(mod, xname="Var_x",xvalues=dfCase$Var_x,yval=dfCase$Var, type="z-scores") summary(dfCase$VarZScores) # Get centile information for sick patients dfCase$Cent <- pnorm(dfCase$VarZScores)*100 dfHealthy$Cent[dfHealthy$Cent > max_centile] <- max_centile dfHealthy$Cent[dfHealthy$Cent < (100-max_centile)] <- (100-max_centile) dfHealthy$VarZScores[dfHealthy$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfHealthy$VarZScores[dfHealthy$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfCase$Cent[dfCase$Cent > max_centile] <- max_centile dfCase$Cent[dfCase$Cent < (100-max_centile)] <- (100-max_centile) dfCase$VarZScores[dfCase$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfCase$VarZScores[dfCase$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfHealthy_output$VarZScores <- NA dfHealthy_output[rownames(dfHealthy),]$VarZScores <- dfHealthy$VarZScores dfHealthy_output$Cent <- NA dfHealthy_output[rownames(dfHealthy),]$Cent <- dfHealthy$Cent dfCase_output$VarZScores <- NA dfCase_output[rownames(dfCase),]$VarZScores <- dfCase$VarZScores dfCase_output$Cent <- NA dfCase_output[rownames(dfCase),]$Cent <- dfCase$Cent # Output z-scores colnames(dfHealthy_output) <- c("ID",y_variable,x_variable,"ZScore","Cent") colnames(dfCase_output) <- c("ID",y_variable,x_variable,"ZScore","Cent") write.csv(dfHealthy_output, file = paste0(y_variable, "_ZScores_Healthy.csv"), row.names=F, na="*") write.csv(dfCase_output, file = paste0(y_variable, "_ZScores_Sick.csv"), row.names=F, na="*") # Plots p <- ggplot(cent.melted,aes(Var_x)) + theme_bw() p <- p + geom_line(aes(Var_x,value,color=Centile)) p <- p + scale_color_manual(values = cols) p <- p + xlab(x_variable) + ylab(y_variable) p1 <- p + geom_point(data = dfCase, aes(Var_x,Var),size=1,alpha=0.75) p2 <- p + geom_point(data = dfHealthy, aes(Var_x,Var),size=1,alpha=0.75) dfAll <- rbind(dfCase,dfHealthy) dfAll$Type <- "Healthy" dfAll$Type[1:nrow(dfCase)] <- "Sick" p3 <- p + geom_point(data = dfAll, aes(Var_x,Var,fill=Type),shape=21,size=1.5,alpha=0.75) p3 <- p3 + scale_fill_manual(values=c("white","red")) pdf(paste0(y_variable,"_Plot_Sick.pdf")) plot(p1) dev.off() pdf(paste0(y_variable,"_Plot_Healthy.pdf")) plot(p2) dev.off() pdf(paste0(y_variable,"_Plot_All.pdf")) plot(p3) dev.off()