# Last updated: March 29 2021 # Ben Nichols, University of Glasgow # This R code will output z-scores for patient and control data split into M and F # It will also output centile plots for patients, controls and a combination of both # Model selection is described in the paper: # "Handgrip strength as a surrogate marker of lean mass and risk of malnutrition in paediatric patients", Mckirdy et al. 2021 # Requires two csv files named "cases.csv" and "controls.csv" # A copy of "controls.csv" using our data is supplied # Both files must have columns named "ID","Gender","Max Grip Strength", and "Height" # Both files must be saved in the current working directory # This script will run as is, without editing, but users are welcome to adapt and redistribute (with credit) # Install required packages # If there are errors then please consult documentation for 'gamlss', 'reshape', or 'ggplot2 if(!require('gamlss')) install.packages("gamlss") if(!require('reshape')) install.packages("reshape") if(!require('ggplot2')) install.packages("ggplot2") rm(list=ls()) oldw <- getOption("warn") options(warn = -1) library(gamlss) library(reshape) library(ggplot2) # Specify the names of the variables of interest control_file <- "controls.csv" case_file <- "cases.csv" x_variable <- "Height" y_variable <- "Max.Grip.Strength" cent_vals <- c(2,9,25,50,75,91,98) max_centile <- 99.99999 # Read in model data and case data dfHealthy <- read.csv(control_file) dfCase <- read.csv(case_file) dfHealthy$Gender <- as.factor(trimws(as.character(dfHealthy$Gender))) dfCase$Gender <- as.factor(trimws(as.character(dfCase$Gender))) dfHealthy[,y_variable] <- as.numeric(as.character(dfHealthy[,y_variable])) dfCase[,y_variable] <- as.numeric(as.character(dfCase[,y_variable])) dfHealthy[,x_variable] <- as.numeric(as.character(dfHealthy[,x_variable])) dfCase[,x_variable] <- as.numeric(as.character(dfCase[,x_variable])) Var <- dfHealthy[,y_variable] Var_x <- dfHealthy[,x_variable] dfHealthy <- cbind(dfHealthy,Var,Var_x) Var <- dfCase[,y_variable] Var_x <- dfCase[,x_variable] dfCase <- cbind(dfCase,Var,Var_x) myvals <- c("ID","Gender","Var","Var_x") # Drop NA records dfHealthy <- dfHealthy[myvals] dfHealthy_output <- dfHealthy dfHealthy <- subset(dfHealthy , (!is.na(Var)), drop=T) dfHealthy <- subset(dfHealthy , (!is.na(Var_x)), drop=T) dfCase <- dfCase[myvals] dfCase_output <- dfCase dfCase <- subset(dfCase , (!is.na(Var)), drop=T) dfCase <- subset(dfCase , (!is.na(Var_x)), drop=T) # Split into M/F dfHealthy.M <- subset(dfHealthy, (Gender %in% c("M", "Male", "m", "male", "MALE")), drop=T) dfHealthy.F <- subset(dfHealthy, (Gender %in% c("F", "Female", "f", "female", "FEMALE")), drop=T) dfCase.M <- subset(dfCase, (Gender %in% c("M", "Male", "m", "male", "MALE")), drop=T) dfCase.F <- subset(dfCase, (Gender %in% c("F", "Female", "f", "female", "FEMALE")), drop=T) # Define models for M/F data mod.M <- gamlss(Var ~ pb(Var_x, df = 3), data = dfHealthy.M, family = "BCT") mod.F <- gamlss(Var ~ pb(Var_x, df = 1), data = dfHealthy.F, family = "BCT") # z-scores for model dfHealthy.M$VarZScores <- resid(mod.M, what=c("z-scores")) dfHealthy.M$Cent <- pnorm(dfHealthy.M$VarZScores)*100 Centiles.M <- centiles.pred(mod.M, xname="Var_x",xvalues=dfHealthy.M$Var_x,yval=dfHealthy.M$Var, type="centiles",cent=cent_vals) colnames(Centiles.M)[1] <- x_variable cent.melted <- melt(Centiles.M,id.vars = x_variable) cent.melted$Centile <- cent.melted$variable colnames(cent.melted)[1] <- "Var_x" cols <- c("red","blue","green","black","green","blue","red") # Predict z-scores for sick patient data dfCase.M$VarZScores <- centiles.pred(mod.M, xname="Var_x",xvalues=dfCase.M$Var_x,yval=dfCase.M$Var, type="z-scores") summary(dfCase.M$VarZScores) # Get centile information for sick patients dfCase.M$Cent <- pnorm(dfCase.M$VarZScores)*100 dfHealthy.M$Cent[dfHealthy.M$Cent > max_centile] <- max_centile dfHealthy.M$Cent[dfHealthy.M$Cent < (100-max_centile)] <- (100-max_centile) dfHealthy.M$VarZScores[dfHealthy.M$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfHealthy.M$VarZScores[dfHealthy.M$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfCase.M$Cent[dfCase.M$Cent > max_centile] <- max_centile dfCase.M$Cent[dfCase.M$Cent < (100-max_centile)] <- (100-max_centile) dfCase.M$VarZScores[dfCase.M$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfCase.M$VarZScores[dfCase.M$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfHealthy.M_output <- dfHealthy_output[as.character(dfHealthy_output$Gender) %in% c("M", "Male", "m", "male", "MALE"), ] dfHealthy.M_output$VarZScores <- NA dfHealthy.M_output[rownames(dfHealthy.M),]$VarZScores <- dfHealthy.M$VarZScores dfHealthy.M_output$Cent <- NA dfHealthy.M_output[rownames(dfHealthy.M),]$Cent <- dfHealthy.M$Cent dfCase.M_output <- dfCase_output[dfCase_output$Gender %in% c("M", "Male", "m", "male", "MALE"), ] dfCase.M_output$VarZScores <- NA dfCase.M_output[rownames(dfCase.M),]$VarZScores <- dfCase.M$VarZScores dfCase.M_output$Cent <- NA dfCase.M_output[rownames(dfCase.M),]$Cent <- dfCase.M$Cent # Output z-scores colnames(dfHealthy.M_output) <- c("ID","Gender",y_variable,x_variable,"ZScore","Cent") colnames(dfCase.M_output) <- c("ID","Gender",y_variable,x_variable,"ZScore","Cent") write.csv(dfHealthy.M_output, file = "ZScoresM_Healthy.csv", row.names=F, na="*") write.csv(dfCase.M_output, file = "ZScoresM_Sick.csv", row.names=F, na="*") # Plots p <- ggplot(cent.melted,aes(Var_x)) + theme_bw() p <- p + geom_line(aes(Var_x,value,color=Centile)) p <- p + scale_color_manual(values = cols) p <- p + xlab("Height (cm)") + ylab("Grip Strength (kg) - Boys") p1 <- p + geom_point(data = dfCase.M, aes(Var_x,Var),size=1,alpha=0.75) p2 <- p + geom_point(data = dfHealthy.M, aes(Var_x,Var),size=1,alpha=0.75) dfAll.M <- rbind(dfCase.M,dfHealthy.M) dfAll.M$Type <- "Healthy" dfAll.M$Type[1:nrow(dfCase.M)] <- "Sick" p3 <- p + geom_point(data = dfAll.M, aes(Var_x,Var,fill=Type),shape=21,size=1.5,alpha=0.75) p3 <- p3 + scale_fill_manual(values=c("white","red")) pdf("PlotM_Sick.pdf") plot(p1) dev.off() pdf("PlotM_Healthy.pdf") plot(p2) dev.off() pdf("PlotM_All.pdf") plot(p3) dev.off() ### Repeat using female only data ### # z-scores and centiles for model dfHealthy.F$VarZScores <- resid(mod.F, what=c("z-scores")) dfHealthy.F$Cent <- pnorm(dfHealthy.F$VarZScores)*100 Centiles.F <- centiles.pred(mod.F, xname="Var_x",xvalues=dfHealthy.F$Var_x,yval=dfHealthy.F$Var, type="centiles",cent=cent_vals) summary(dfHealthy.F$VarZScores) colnames(Centiles.F)[1] <- x_variable cent.melted <- melt(Centiles.F,id.vars = x_variable) cent.melted$Centile <- cent.melted$variable colnames(cent.melted)[1] <- "Var_x" cols <- c("red","blue","green","black","green","blue","red") Centiles.F.SD <- centiles.pred(mod.F, xname="Var_x",xvalues=dfHealthy.F$Var_x,yval=dfHealthy.F$Var, type="centiles",cent=c(0.135,2.275,15.865,50,84.135,97.725,99.865)) # Predict z-scores for sick patient data dfCase.F$VarZScores <- centiles.pred(mod.F, xname="Var_x",xvalues=dfCase.F$Var_x,yval=dfCase.F$Var, type="z-scores") summary(dfCase.F$VarZScores) # Get centile information for sick patients dfCase.F$Cent <- pnorm(dfCase.F$VarZScores)*100 dfHealthy.F$Cent[dfHealthy.F$Cent > max_centile] <- max_centile dfHealthy.F$Cent[dfHealthy.F$Cent < (100-max_centile)] <- (100-max_centile) dfHealthy.F$VarZScores[dfHealthy.F$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfHealthy.F$VarZScores[dfHealthy.F$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfCase.F$Cent[dfCase.F$Cent > max_centile] <- max_centile dfCase.F$Cent[dfCase.F$Cent < (100-max_centile)] <- (100-max_centile) dfCase.F$VarZScores[dfCase.F$VarZScores > qnorm(max_centile/100)] <- qnorm(max_centile/100) dfCase.F$VarZScores[dfCase.F$VarZScores < qnorm(1-max_centile/100)] <- qnorm(1-max_centile/100) dfHealthy.F_output <- dfHealthy_output[as.character(dfHealthy_output$Gender) %in% c("F", "Female", "f", "female", "FEMALE"), ] dfHealthy.F_output$VarZScores <- NA dfHealthy.F_output[rownames(dfHealthy.F),]$VarZScores <- dfHealthy.F$VarZScores dfHealthy.F_output$Cent <- NA dfHealthy.F_output[rownames(dfHealthy.F),]$Cent <- dfHealthy.F$Cent dfCase.F_output <- dfCase_output[dfCase_output$Gender %in% c("F", "Female", "f", "female", "FEMALE"), ] dfCase.F_output$VarZScores <- NA dfCase.F_output[rownames(dfCase.F),]$VarZScores <- dfCase.F$VarZScores dfCase.F_output$Cent <- NA dfCase.F_output[rownames(dfCase.F),]$Cent <- dfCase.F$Cent # Output z-scores colnames(dfHealthy.F_output) <- c("ID","Gender",y_variable,x_variable,"ZScore","Cent") colnames(dfCase.F_output) <- c("ID","Gender",y_variable,x_variable,"ZScore","Cent") write.csv(dfHealthy.F_output, file = "ZScoresF_Healthy.csv", row.names=F, na="*") write.csv(dfCase.F_output, file = "ZScoresF_Sick.csv", row.names=F, na="*") # Plots p <- ggplot(cent.melted,aes(Var_x)) + theme_bw() p <- p + geom_line(aes(Var_x,value,color=Centile)) p <- p + scale_color_manual(values = cols) p <- p + xlab("Height (cm)") + ylab("Grip Strength (kg) - Girls") p1 <- p + geom_point(data = dfCase.F, aes(Var_x,Var),size=1,alpha=0.75) p2 <- p + geom_point(data = dfHealthy.F, aes(Var_x,Var),size=1,alpha=0.75) dfAll.F <- rbind(dfCase.F,dfHealthy.F) dfAll.F$Type <- "Healthy" dfAll.F$Type[1:nrow(dfCase.F)] <- "Sick" p3 <- p + geom_point(data = dfAll.F, aes(Var_x,Var,fill=Type),shape=21,size=1.5,alpha=0.75) p3 <- p3 + scale_fill_manual(values=c("white","red")) pdf("PlotF_Sick.pdf") plot(p1) dev.off() pdf("PlotF_Healthy.pdf") plot(p2) dev.off() pdf("PlotF_All.pdf") plot(p3) dev.off() options(warn = oldw)