# gravity-hurdle model of preliminary pink slip data # started 25th July 2017 # clear objects from memory, reset graphics devices rm(list = ls()) graphics.off() Sys.setenv(TZ="Europe/London") library(pscl) library(splines) library(piecewiseSEM) library(parallel) library(GLMMmisc) library(scales) library(ggmap) library(MASS) library(glmmTMB) # functions # show 20 random rows of a table sample.rows <- function(x, n = 20) x[sort(sample(nrow(x), n)), ] # adapted from http://www.r-bloggers.com/great-circle-distance-calculations-in-r/ # Calculates the geodesic distance between two points specified by radian latitude/longitude using the # Spherical Law of Cosines (slc) gcd.slc <- function(long, lat) { R <- 6371 # Earth mean radius [km] latr <- lat*pi/180 longr <- long*pi/180 x <- prod(sin(latr)) + prod(cos(latr)) * cos(diff(longr)) x <- min(x, 1) x <- max(x, -1) d <- acos(x) * R return(d) # Distance in km } # wald z-test function # (adapted from http://stijnruiter.nl/blog/?p=309 - broken link) # useful for GLMMs when you want to test for inclusion in the model of a # categorical variable that has more than 2 levels. in a linear regression model # you could use a likelihood ratio test, but this is not recommended for GLMMs. # However this Wald test method is also dubious at low n because it doesn't take account of the # degrees of freedom. # (see Bolker et al. Trends in Ecology & Evolution, Volume 24, Issue 3, 127-135, 29 January 2009). # 2017-07-02: adapted to deal with glmmTMB fits Wald<- function(object, R, q = NULL, gmmTMB.model = "cond") { require(lme4) if (!is.matrix(R)) stop("Restrictions must be a matrix") if(is.null(q)) q <- rep(0, nrow(R)) b <- fixef(object) if(class(b) == "fixef.glmmTMB") b <- b[[gmmTMB.model]] vc <- vcov(object) if("vcov.glmmTMB" %in% class(vc)) vc <- vc[[gmmTMB.model]] w <- t(R %*% b - q) %*% solve(R %*% vc %*% t(R)) %*% (R %*% b - q) pw <- pchisq(w[1], length(q), lower.tail = FALSE) cat("*************\n* Wald Test *\n*************\n") cat("lme4 fixed effects:\n") print(fixef(object)) cat("\nRestrictions:\n") print(R) cat("\nq = ",q) cat("\nChi-square:", round(w[1],3), " df = ", length(q)) cat("\nProb x>chisq:", round(pw, 5), "\n") return(pw) } # funtion to adjust binomial GLMM for jensen's inequality jensen.adjust <- function(p, V, method = "mcculloch", inverse = FALSE) { stopifnot(!(method == "mcculloch" & inverse)) Beta <- qlogis(p) if(method == "mcculloch") { return(plogis(Beta - 0.5 * V * tanh(Beta * (1 + 2 * exp(-0.5 * V))/6))) } if(method == "zeger") { if(inverse) return(plogis(Beta * sqrt(256 * V / (75 * pi^2) + 1))) else return(plogis(Beta/sqrt(1 + ((16 * sqrt(3))/(15 * pi))^2 * V))) } } # function to capitalise initial letters of words. # this function is adapted from the examples in the R Help for toupper() capwords<- function(s,strict=FALSE,first.only=FALSE) { cap<- function(s) paste( toupper(substring(s,1,1)), {s <- substring(s,2); if(strict) tolower(s) else s}, sep = "", collapse = " " ) if(first.only) cap(s) else sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s))) } # read in ward data wards <- read.csv("~/OneDrive - University of Glasgow/Projects/SEEDZ/mapping/WardSpatialData.csv", stringsAsFactors = FALSE, quote = "") dim(wards) dim(unique(wards)) head(wards) sum(is.na(wards)) for(n in c("name", "district", "region")) { wards[, n] <- gsub(" ", "", gsub("[^[:alpha:] ]", "", tolower(wards[, n]))) wards[!grepl('^[A-Za-z]+$', wards[, n]), n] <- NA } ; rm(n) sum(is.na(wards)) wards$name <- paste(wards$region, wards$district, wards$name, sep = "/") head(wards) table(wards$region) # select study regions include.regions <- c("arusha", "manyara", "kilimanjaro") wards <- droplevels( wards[wards$region %in% include.regions, ]) length(wards$name) length(unique(wards$name)) # load movement data file.info("movt.analysis.data.csv") dat.all <- read.csv("movt.analysis.data.csv", stringsAsFactors = FALSE) dim(dat.all) head(dat.all) apply(is.na(dat.all), 2, sum) dat.all[is.na(dat.all$date_yyyy), ] # year dat.all$year <- factor(dat.all$date_yyyy) # year and month dat.all$year.month <- factor(paste(dat.all$date_yyyy, formatC(dat.all$date_mm, flag = "0", width = 2), sep = "-")) grep("NA", dat.all$year.month) dat.all$month <- factor(dat.all$date_mm) # half-year and year dat.all$half <- factor(dat.all$date_mm < 6.5, c(TRUE, FALSE), c("1st", "2nd")) table(dat.all$half, exclude = NULL) dat.all$year.half <- factor(paste(dat.all$year, dat.all$half, sep = "-")) table(dat.all$year.half, exclude = NULL) # quarter-year and year dat.all$quarter <- cut(dat.all$date_mm, c(0, 3.5, 6.5, 9.5, 12.5), labels = c("1st", "2nd", "3rd", "4th")) table(dat.all$quarter) plot(table(dat.all$date_mm)) table(dat.all$quarter, exclude = NULL) dat.all$year.quarter <- factor(paste(dat.all$year, dat.all$quarter, sep = "-")) table(dat.all$year.quarter, exclude = NULL) # clear up NAs dat.all$year[grep("NA", dat.all$year)] <- NA dat.all$year <- droplevels(dat.all$year) levels(dat.all$year) grep("NA", dat.all$year.month) dat.all$year.month[grep("NA", dat.all$year.month)] <- NA dat.all$year.month <- droplevels(dat.all$year.month) grep("NA", dat.all$year.month) levels(dat.all$year.month) dat.all$year.half[grep("NA", dat.all$year.half)] <- NA dat.all$year.half <- droplevels(dat.all$year.half) levels(dat.all$year.half) dat.all$year <- droplevels(dat.all$year) grep("NA", dat.all$year.half) table(dat.all$year.quarter, exclude = NULL) dat.all$year.quarter[grep("NA", dat.all$year.quarter)] <- NA dat.all$year.quarter <- droplevels(dat.all$year.quarter) levels(dat.all$year.quarter) grep("NA", dat.all$year.quarter) table(dat.all$year.quarter, exclude = NULL) head(dat.all) # choose time period to aggregate over dat.all$period <- dat.all$year.month plot(table(dat.all$year.month), las = 3, cex.axis = 0.7) dim(dat.all) dat <- droplevels( dat.all[dat.all$fr.match.type == "exact" & dat.all$to.match.type == "exact" & !is.na(dat.all$period), ]) dat[sample(nrow(dat), 10), ] rm(dat.all) dim(dat) apply(is.na(dat), 2, sum) apply(is.na(dat), 2, mean) levels(dat$year.month) # type of transport table(dat$to.match.type) dat$transport <- factor(dat$transport, exclude = NULL, labels = c("Foot", "Motor", "Not rec.")) dat$species <- factor(dat$species, c("Cattle", "Sheep/goats", "Mixed")) dat[sample(nrow(dat), 10), ] table(is.na(dat$date)) table(is.na(dat$period)) # impute zero distances as 0.5 km sort(unique(dat$distance))[1:10] dat[dat$distance == 0, ] sort(dat$distance)[1:200] dat$distance[dat$distance == 0] <- 0.5 # create data set with all possible movements from the origin wards (complete list) # to the destinations fr.wards <- unique(dat$fr.ward) to.wards <- unique(wards$name) setdiff(fr.wards, to.wards) # should return character(0) # how large (max) will the data set be? length(fr.wards) * length(to.wards) * nlevels(dat$period) # make template data set of all possible ward movements in each period all(fr.wards %in% to.wards) datw.full <- expand.grid(fr.ward = to.wards, to.ward = to.wards, period = levels(dat$period)) datw.full$month <- as.numeric(substr(datw.full$period, 6, 7)) - 1 # -1 so that Jan is ref datw.full$year <- factor(substr(datw.full$period, 1, 4)) str(datw.full) dim(datw.full) # remove rows where origin = destination; we're not interested in within ward movement datw.full <- droplevels(datw.full[as.character(datw.full$fr.ward) != as.character(datw.full$to.ward), ]) dim(datw.full) # give rows unique names including origin, destination and period datw.full$fr.to.date <- factor(paste(datw.full$fr.ward, datw.full$to.ward, datw.full$period, sep = ".")) rownames(datw.full) <- datw.full$fr.to.date # make a factor for market-period too datw.full$fr.date <- factor(paste(datw.full$fr.ward, datw.full$period, sep = ".")) # create the same variable in the source data set to allow matching dat$fr.to.date <- paste(dat$fr.ward, dat$to.ward, dat$period, sep = ".") # add origin and destination labels (these are the names that are presented in tables and figures, # e.g. "kilimanjaroairport" rather than "kilimanjarointernationalairport") #datw.full[1, ] #match.label <- # unique(cbind(c(dat$fr.ward, dat$to.ward), c(dat$fr.match.label, dat$to.match.label))) #dim(match.label) #head(match.label) #rownames(match.label) <- match.label[, 1] #datw.full$fr.match.label <- dat$fr.match.label[match(as.character(datw.full$fr.to.date), dat$fr.to.date)] #table(is.na(datw.full$fr.match.label)) #dat$fr.match[dat$fr.match.label == "kilimanjaroairport"] #dat$to.match.label # number of animals moved of each species dat$n.permits <- 1 spp <- c("animals", "cattle", "caprine") animals.sum.tab <- do.call("rbind", by(dat[dat$fr.to.date %in% datw.full$fr.to.date, c(spp, "n.permits")], dat$fr.to.date[dat$fr.to.date %in% datw.full$fr.to.date], colSums)) head(animals.sum.tab) datw.full[, c(spp, "n.permits")] <- 0 datw.full[rownames(animals.sum.tab), c(spp, "n.permits")] <- animals.sum.tab[, c(spp, "n.permits")] rm(animals.sum.tab) # note that now not all fr.wards produce movemnents in datw.full, because datw.full # excludes origins that only send animals out of the 3 study regions # e.g. dat[dat$fr.ward == "manyara/hanang/simbay", ] sum(datw.full$animals[datw.full$fr.ward == "manyara/hanang/simbay"]) # examine animals variable in datw.full summary(datw.full$animals) sum(is.na(datw.full$animals)) sum(dat$animals) sum(datw.full$animals) # fewer, as movements into and out of the 3 regions excluded plot(tapply(datw.full$animals, datw.full$period, sum)) mean(datw.full$animals) mean(datw.full$animals[datw.full$animals > 0.5]) mean(datw.full$cattle[datw.full$cattle > 0.5]) sum(datw.full$cattle)/sum(datw.full$animals) table(datw.full$n.permits) sum(datw.full$n.permits == 1) / sum(datw.full$n.permits > 0) # ...so if we break down movements into months, 58% of movements are single permits # add human and livestock population size, population density, area, status # define continuous variables cont.vars <- c("area.km2", "pop2012", "popdens2012", paste0("pop.", spp), paste0("popdens.", spp)) wards[1:3, cont.vars] # add ward status from the census datw.full$fr.status <- factor(wards$status[match(datw.full$fr.ward, wards$name)], c("Rural Ward", "Mixed Ward", "Urban Ward")) datw.full$to.status <- factor(wards$status[match(datw.full$to.ward, wards$name)], c("Rural Ward", "Mixed Ward", "Urban Ward")) table(datw.full$fr.status, datw.full$to.status, exclude = NULL) # add Will de Glanville's production system classification datw.full$fr.prodsys <- factor(wards$prodsys[match(datw.full$fr.ward, wards$name)], c("Pastoral", "Agropastoral", "Smallholder", "Urban")) datw.full$to.prodsys <- factor(wards$prodsys[match(datw.full$to.ward, wards$name)], c("Pastoral", "Agropastoral", "Smallholder", "Urban")) table(datw.full$fr.prodsys, datw.full$to.prodsys, exclude = NULL) table(datw.full$fr.status, datw.full$fr.prodsys, exclude = NULL) table(datw.full$to.status, datw.full$to.prodsys, exclude = NULL) table(wards$prodsys, wards$status) # identify wards containing primary and secondary markets # binary indicators for presence of 1ary and 2ary markets in origin and destination wards datw.full$fr.primary <- as.integer(wards$market[match(datw.full$fr.ward, wards$name)] == "Primary") datw.full$fr.secondary <- as.integer(wards$market[match(datw.full$fr.ward, wards$name)] == "Secondary") table(datw.full$fr.primary, datw.full$fr.secondary) datw.full$to.primary <- as.integer(wards$market[match(datw.full$to.ward, wards$name)] == "Primary") datw.full$to.secondary <- as.integer(wards$market[match(datw.full$to.ward, wards$name)] == "Secondary") table(datw.full$to.primary, datw.full$to.secondary) # factors for presence of 1ary and 2ary markets in origin and destination wards datw.full$to.market.f <- wards$market[match(datw.full$to.ward, wards$name)] datw.full$fr.market.f <- wards$market[match(datw.full$fr.ward, wards$name)] table(datw.full$fr.market.f) table(datw.full$to.market.f) table(datw.full$fr.market.f, datw.full$to.market.f) # factor for type of journey: primary to primary, primary to secondary, etc datw.full$market.f <- factor(paste(datw.full$fr.market.f, datw.full$to.market.f, sep = "-")) levels(datw.full$market.f) table(datw.full$market.f) # factors for presence of 1ary and 2ary markets in origin and destination wards tapply(datw.full$animals, datw.full$market.f, mean) tapply(datw.full$animals > 0, datw.full$market.f, sum) round(100 * tapply(datw.full$animals > 0, datw.full$market.f, mean), 2) # add variable with distance to primary and secondary market datw.full$fr.primary5 <- as.integer(wards$mindist.primary[match(datw.full$fr.ward, wards$name)] < 5) mean(datw.full$fr.primary5) datw.full$fr.primary5.f <- factor(datw.full$fr.primary5, 0:1, c("<5km", ">5km")) datw.full$to.primary5 <- as.integer(wards$mindist.primary[match(datw.full$to.ward, wards$name)] < 5) mean(datw.full$to.primary5) datw.full$to.primary5.f <- factor(datw.full$to.primary5, 0:1, c("<5km", ">5km")) datw.full$fr.secondary10 <- as.integer(wards$mindist.secondary[match(datw.full$fr.ward, wards$name)] < 10) mean(datw.full$fr.secondary10) datw.full$fr.secondary10.f <- factor(datw.full$fr.secondary10, 0:1, c("<10km", ">10km")) datw.full$to.secondary10 <- as.integer(wards$mindist.secondary[match(datw.full$to.ward, wards$name)] < 10) mean(datw.full$to.secondary10) datw.full$to.secondary10.f <- factor(datw.full$to.secondary10, 0:1, c("<10km", ">10km")) sum(datw.full$to.secondary10) table(datw.full$market.f, datw.full$fr.primary5.f) table(datw.full$market.f, datw.full$to.primary5.f) table(datw.full$market.f, datw.full$fr.secondary10.f) table(datw.full$market.f, datw.full$to.secondary10.f) # how do total cattle numbers compare with LINKS total volume? # quick check of secondary markets (meserani, themi, weruweru) from LINKS (http://www.lmistz.net). # also plot distribution of data over the period to identify missing chunks of time # make a secondary markets plot comparing LINKS and pink slip data over years secmkt <- expand.grid( year = levels(dat$year), market = c("meserani", "themi", "weruweru", "dosidosi", "mgagao")) secmkt$cattle.permits.est <- sapply(1:nrow(secmkt), function(i) { sum(dat$cattle[dat$fr.match.label == secmkt$market[i] & dat$year == secmkt$year[i]]) }) * 20 / length(unique(dat$batch)) secmkt$permits.months <- sapply(1:nrow(secmkt), function(i) { length(unique(na.omit(dat$date_mm[dat$fr.match.label == secmkt$market[i] & dat$year == secmkt$year[i]]))) }) secmkt$cattle.links[secmkt$year == "2009" & secmkt$market == "meserani"] <- 16271 # 10 secmkt$cattle.links[secmkt$year == "2011" & secmkt$market == "meserani"] <- 26905 # 12 secmkt$cattle.links[secmkt$year == "2013" & secmkt$market == "meserani"] <- 27748 # 12 secmkt$cattle.links[secmkt$year == "2015" & secmkt$market == "meserani"] <- 18635 # 11 secmkt$cattle.links[secmkt$year == "2009" & secmkt$market == "themi"] <- NA secmkt$cattle.links[secmkt$year == "2011" & secmkt$market == "themi"] <- 6949 # 12 secmkt$cattle.links[secmkt$year == "2013" & secmkt$market == "themi"] <- 6870 # 12 secmkt$cattle.links[secmkt$year == "2015" & secmkt$market == "themi"] <- 3449 # 10 secmkt$cattle.links[secmkt$year == "2009" & secmkt$market == "weruweru"] <- 1209 # 3 months secmkt$cattle.links[secmkt$year == "2011" & secmkt$market == "weruweru"] <- 2206 # 4 months secmkt$cattle.links[secmkt$year == "2013" & secmkt$market == "weruweru"] <- NA secmkt$cattle.links[secmkt$year == "2015" & secmkt$market == "weruweru"] <- NA secmkt$cattle.links[secmkt$year == "2009" & secmkt$market == "dosidosi"] <- 8436 # 9 secmkt$cattle.links[secmkt$year == "2011" & secmkt$market == "dosidosi"] <- 3681 # 4 secmkt$cattle.links[secmkt$year == "2013" & secmkt$market == "dosidosi"] <- 748 # 1 month secmkt$cattle.links[secmkt$year == "2015" & secmkt$market == "dosidosi"] <- 8510 # 6 months secmkt$cattle.links[secmkt$year == "2009" & secmkt$market == "mgagao"] <- 5120 # 12 secmkt$cattle.links[secmkt$year == "2011" & secmkt$market == "mgagao"] <- 5080 # 12 secmkt$cattle.links[secmkt$year == "2013" & secmkt$market == "mgagao"] <- 3775 # 11 months secmkt$cattle.links[secmkt$year == "2015" & secmkt$market == "mgagao"] <- 4734 # 12 months of records # missingness in links -- how many months have any total cattle numbers recorded? secmkt$links.months <- NA secmkt$links.months[secmkt$year == "2009" & secmkt$market == "meserani"] <- 10 secmkt$links.months[secmkt$year == "2011" & secmkt$market == "meserani"] <- 12 secmkt$links.months[secmkt$year == "2013" & secmkt$market == "meserani"] <- 12 secmkt$links.months[secmkt$year == "2015" & secmkt$market == "meserani"] <- 11 secmkt$links.months[secmkt$year == "2009" & secmkt$market == "themi"] <- NA secmkt$links.months[secmkt$year == "2011" & secmkt$market == "themi"] <- 12 secmkt$links.months[secmkt$year == "2013" & secmkt$market == "themi"] <- 12 secmkt$links.months[secmkt$year == "2015" & secmkt$market == "themi"] <- 10 secmkt$links.months[secmkt$year == "2009" & secmkt$market == "weruweru"] <- 3 secmkt$links.months[secmkt$year == "2011" & secmkt$market == "weruweru"] <- 4 secmkt$links.months[secmkt$year == "2013" & secmkt$market == "weruweru"] <- NA secmkt$links.months[secmkt$year == "2015" & secmkt$market == "weruweru"] <- NA secmkt$links.months[secmkt$year == "2009" & secmkt$market == "dosidosi"] <- 9 secmkt$links.months[secmkt$year == "2011" & secmkt$market == "dosidosi"] <- 4 secmkt$links.months[secmkt$year == "2013" & secmkt$market == "dosidosi"] <- 1 secmkt$links.months[secmkt$year == "2015" & secmkt$market == "dosidosi"] <- 6 secmkt$links.months[secmkt$year == "2009" & secmkt$market == "mgagao"] <- 12 secmkt$links.months[secmkt$year == "2011" & secmkt$market == "mgagao"] <- 12 secmkt$links.months[secmkt$year == "2013" & secmkt$market == "mgagao"] <- 11 secmkt$links.months[secmkt$year == "2015" & secmkt$market == "mgagao"] <- 12 secmkt$permits.months plot(jitter(secmkt$links.months, factor = 0.5), secmkt$cattle.links/secmkt$cattle.permits.est) plot(jitter(secmkt$links.months, factor = 0.5), jitter(secmkt$permits.months, factor = 0.5)) cor.test(secmkt$links.months, secmkt$permits.months, method = "spearman", use = "pairwise.complete.obs") secmkt <- droplevels(na.omit(secmkt[secmkt$permits.months >= 8 & secmkt$links.months >= 8, ])) hist(secmkt$cattle.links/secmkt$cattle.permits.est, nclass = 50) plot(cattle.permits.est ~ cattle.links, data = secmkt, pch = as.numeric(year), col = as.numeric(market) * 2)#, cex = sqrt(links.months/permits.months) + 0.2) abline(0, 1) abline(0, 0.5, lty = 2) with(secmkt, legend("topleft", paste0(market, " ", year, " (",links.months, "/", permits.months, ")"), pch = as.numeric(year), col = as.numeric(market) * 2)) text(x = 20000, y = 20000, expression(permits==links)) text(x = 20000, y = 20000/2, expression(permits==links/2)) # primary markets, 2015 # dosidosi, LINKS total volume: 8510 sum(dat$cattle[dat$fr.match.label == "dosidosi" & dat$year == "2015"])/8510 sum(dat$cattle[dat$to.match.label == "dosidosi" & dat$year == "2015"])/8510 hist(dat$date.dec[dat$fr.match.label == "dosidosi" & dat$year == "2015"], xlim = c(2015, 2016)) # NB permit data for dosidosi covers whole of 2015, LINKS only 2015-06-15 to 2015-11-16 # mgagao, LINKS total volume: 4734 sum(dat$cattle[dat$fr.match.label == "mgagao" & dat$year == "2015"])/4734 sum(dat$cattle[dat$to.match.label == "mgagao" & dat$year == "2015"])/4734 hist(dat$date.dec[dat$fr.match.label == "dosidosi" & dat$year == "2015"], xlim = c(2015, 2016)) # mbulu - no LINKS data for 2015 sort(table(dat$fr.match.label)) sort(table(dat$to.match.label)) round(c(0.01, 0.02, 0.5) * c(70, 5, 15)) # back to the permit data... # add coordinates datw.full$fr.lat <- wards$lat[match(datw.full$fr.ward, wards$name)] datw.full$fr.long <- wards$long[match(datw.full$fr.ward, wards$name)] datw.full$to.lat <- wards$lat[match(datw.full$to.ward, wards$name)] datw.full$to.long <- wards$long[match(datw.full$to.ward, wards$name)] # calculate distance in units of km datw.full$dist.km <- round( apply(datw.full[, c("fr.long", "to.long", "fr.lat", "to.lat")], 1, function(x) gcd.slc(long = x[1:2], lat = x[3:4])), 2) summary(datw.full$dist.km) sort(unique(datw.full$dist.km))[1:10] # the shortest distances # are all the zero distances within-ward journeys and vice versa? table(as.character(datw.full$fr.ward) == as.character(datw.full$to.ward), datw.full$dist.km < 0.00001) # yes any(is.na(datw.full$dist.km)) # impute zero distances as the minimum between ward distance datw.full$dist.km[datw.full$dist.km < 0.0000001] <- NA datw.full$dist.km[is.na(datw.full$dist.km)] <- min(datw.full$dist.km, na.rm = TRUE) datw.full$log10.dist.km <- log10(datw.full$dist.km) datw.full$dist.km.cat <- cut(datw.full$dist.km, c(-Inf, quantile(datw.full$dist.km, seq(0.1, 0.9, by = 0.1)), Inf)) table(datw.full$dist.km.cat, exclude = NULL) head(datw.full) hist(datw.full$dist.km) hist(datw.full$animals, nclass = 1000, ylim = c(0, 100)) datw.full[sample(nrow(datw.full), 10), ] # a way to deal with missing data # fit a zero-inflated model where the response is the number of permits produced by # an origin in a given month. significant zero-inflation suggests that the zeroes are not # due to low and fluctuating numbers of outward movements, but some other process # such as loss of permit books. # first, assume that origins that never produced a permit are obligate zeroes and remove them fr.wards.study <- as.character(sort(unique(datw.full$fr.ward[datw.full$animals > 0]))) origins <- data.frame( fr.date = as.character(unique(datw.full$fr.date[datw.full$fr.ward %in% fr.wards.study])), stringsAsFactors = FALSE) origins$year <- factor(substr(origins$fr.date, nchar(origins$fr.date) - 6, nchar(origins$fr.date) - 3)) origins$month <- factor(substr(origins$fr.date, nchar(origins$fr.date) - 6, nchar(origins$fr.date))) origins$ward <- factor(substr(origins$fr.date, 1, nchar(origins$fr.date) - 8)) dim(origins) origins$n.permits <- with(droplevels(datw.full[datw.full$fr.ward %in% fr.wards.study, ]), tapply(n.permits, fr.date, sum))[origins$fr.date] table(origins$n.permits) origins <- origins[order(origins$ward, origins$month), ] # find wards that have any months with no permits fr.wards.study.z <- levels(origins$ward)[!tapply(origins$n.permits != 0, origins$ward, all)] # fit negative binomial GLMs with and without zero-inflation, and test for zero-inflation zi.res <- t(sapply(fr.wards.study.z, function(w) { # w <- fr.wards.study.z[15] # w <- "arusha/meru/kikatiti" # w <- "manyara/babati/mwada" print(w) y <- as.vector(droplevels(origins[origins$ward == w, ])$n.permits) print(table(y)) zero.prop <- mean(y == 0) if(sum(y != 0) <= 4) return(c(zero.prop = zero.prop, zi.prop = NA, p.val = 1, mean.y = mean(y), n.nonzero = sum(y != 0), exp.true.zeroes = NA)) fit.zi <- zeroinfl(y ~ 1, dist = ifelse(all(y %in% 0:1), "poisson", "negbin")) zi.prop <- as.vector(plogis(coef(fit.zi)["zero_(Intercept)"])) fit.nb <- if(all(y %in% 0:1)) glm(y ~ 1, family = "poisson") else fit.nb <- glm.nb(y ~ 1, init.theta = 1) # control = glm.control(maxit = 25, trace = 3), p.val <- pchisq(2 * (logLik(fit.zi) - logLik(fit.nb)), 1, lower.tail = FALSE) c(zero.prop = zero.prop, zi.prop = zi.prop, p.val = p.val, mean.y = mean(y), n.nonzero = sum(y != 0), exp.true.zeroes = round((zero.prop - zi.prop) * length(y), 1)) })) zi.res <- data.frame(zi.res) # which wards are significantly zero-inflated (using P < 0.05)? table(zi.res$p.val < 0.05) zi.res[zi.res$p.val < 0.05, ] sort(zi.res$zi.prop[zi.res$p.val < 0.05] / zi.res$zero.prop[zi.res$p.val < 0.05]) range(zi.res$exp.true.zeroes[zi.res$p.val < 0.05]) # for the 16 significantly ZI wards, the proportion of zeroes that are "FALSE" # zeroes (suggesting that these zeroes are due to, e.g., missing forms rather than fewer # movements) the vast majority of zeroes are false (87-100%), so that the number of the 48 months with # true zeroes is never >2. # proceed by treating all zeroes from these 16 wards as false (so delete these rows). drop.zero.fr.date <- origins$fr.date[origins$ward %in% rownames(zi.res)[zi.res$p.val < 0.05] & origins$n.permits == 0] datw <- droplevels( datw.full[ !(datw.full$fr.date %in% drop.zero.fr.date) & (datw.full$fr.ward %in% fr.wards.study), ]) dim(datw.full) dim(datw) table(datw$fr.ward) # OPTIONALLY temporarily subsample data to allow multiple models to be fitted using laptop #set.seed(1234) #datw <- droplevels(datw[sample(nrow(datw), sort(ceiling(nrow(datw)/10))), ]) #table(datw$cattle > 0, datw$fr.prodsys) # centre log10.dist.km and other continuous variables on mean. # this needs to be done on the reduced data set that will be used for modelling # then used to centre the full data set on the mean of the reduced data set for(cv in cont.vars) { datw[, paste0("fr.", cv)] <- wards[match(datw$fr.ward, wards$name), cv] datw[, paste0("to.", cv)] <- wards[match(datw$to.ward, wards$name), cv] datw.full[, paste0("fr.", cv)] <- wards[match(datw.full$fr.ward, wards$name), cv] datw.full[, paste0("to.", cv)] <- wards[match(datw.full$to.ward, wards$name), cv] datw[, paste0("log10.fr.", cv, ".cent")] <- scale(log10(datw[, paste0("fr.", cv)]), scale = FALSE) datw[, paste0("log10.to.", cv, ".cent")] <- scale(log10(datw[, paste0("to.", cv)]), scale = FALSE) datw.full[, paste0("log10.fr.", cv, ".cent")] <- log10(datw.full[, paste0("fr.", cv)]) - attr(datw[, paste0("log10.fr.", cv, ".cent")], "scaled:center") datw.full[, paste0("log10.to.", cv, ".cent")] <- log10(datw.full[, paste0("to.", cv)]) - attr(datw[, paste0("log10.to.", cv, ".cent")], "scaled:center") }; rm(cv) datw$log10.dist.km.cent <- scale(datw$log10.dist.km, scale = FALSE) datw.full$log10.dist.km.cent <- datw.full$log10.dist.km - attr(datw$log10.dist.km.cent, "scaled:center") ####### MODEL FITTING ######## # select species to model (sp <- spp[2]) any.sp <- paste0("any.", sp) # create a binary indicator of "any movement" datw[, any.sp] <- as.integer(datw[, sp] > 0.5) # create data set for zero-truncated count model by dropping pairs of wards with no traffic datw1 <- datw[datw[, any.sp] == 1, ] nrow(datw1) hist(datw1[, sp], nclass = 1000, xlim = c(1, max(datw1$animals))) datw1[datw1$animals == max(datw1$animals), ] sum(datw1$cattle) # select the fixed effects for both models # list the effects # these will be used to fit drop1 models, so care must be taken # with interactions # the main effect can't be dropped without also dropping the interaction # create spline fixed effects cont.fe <- c("log10.dist.km.cent", "log10.fr.pop2012.cent", "log10.to.pop2012.cent", paste0("log10.fr.pop.", sp,".cent"), paste0("log10.to.pop.", sp,".cent"), "log10.fr.area.km2.cent", "log10.to.area.km2.cent", "month") # get knots and boundary knots for both parts of hurdle model, z and c spline.z <- lapply(datw[, cont.fe], ns, df = 3) spline.c <- lapply(datw1[, cont.fe], ns, df = 3) spline.z.attr <- lapply(spline.z, function(x) attributes(x)[c("knots", "Boundary.knots")]) spline.c.attr <- lapply(spline.c, function(x) attributes(x)[c("knots", "Boundary.knots")]) spline.fe.z <- paste0("ns(", cont.fe, ", knots = c(", apply(round(sapply(spline.z.attr[cont.fe], "[[", 1), 5), 2, paste, collapse = ", "), "), Boundary.knots = c(", apply(round(sapply(spline.z.attr[cont.fe], "[[", 2), 3), 2, paste, collapse = ", "), "))") spline.fe.c <- paste0("ns(", cont.fe, ", knots = c(", apply(round(sapply(spline.c.attr[cont.fe], "[[", 1), 5), 2, paste, collapse = ", "), "), Boundary.knots = c(", apply(round(sapply(spline.c.attr[cont.fe], "[[", 2), 3), 2, paste, collapse = ", "), "))") fac.fe <- c("year", "market.f", "fr.prodsys", "to.prodsys") fixed.eff.z <- c(spline.fe.z, fac.fe) fixed.eff.c <- c(spline.fe.c, fac.fe) fixed.eff.z.fac <- fixed.eff.z[fixed.eff.z %in% names(datw[, sapply(datw, is.factor)])] fixed.eff.c.fac <- fixed.eff.c[fixed.eff.c %in% names(datw1[, sapply(datw1, is.factor)])] # make up model formulae for the full model and all "drop1" models form.rhs.z <- sapply((length(fixed.eff.z) + 1):1, function(i) { if(i > length(fixed.eff.z)) return(paste(fixed.eff.z[-i], collapse = " + ")) drop.fe <- fixed.eff.z[i] keep.fe <- fixed.eff.z[-grep(gsub("\\)", "\\\\)", gsub("\\(", "\\\\(", drop.fe)), fixed.eff.z)] paste(keep.fe, collapse = " + ") }) length(form.rhs.z) form.rhs.c <- sapply((length(fixed.eff.c) + 1):1, function(i) { if(i > length(fixed.eff.c)) return(paste(fixed.eff.c[-i], collapse = " + ")) drop.fe <- fixed.eff.c[i] keep.fe <- fixed.eff.c[-grep(gsub("\\)", "\\\\)", gsub("\\(", "\\\\(", drop.fe)), fixed.eff.c)] paste(keep.fe, collapse = " + ") }) length(form.rhs.c) (form.z <- paste(any.sp, "~ ", form.rhs.z)) (form.c <- paste(sp, "~ ", form.rhs.c)) names(form.rhs.z) <- names(form.z) <- c("full", rev(fixed.eff.z)) names(form.rhs.c) <- names(form.c) <- c("full", rev(fixed.eff.c)) # any factor levels that have no variation in the response? invar.lev.list.z <- lapply(fixed.eff.z.fac, function(fac) { out <- by(datw[, sp], datw[, fac], var) invar.lev <- names(out)[out == 0] which(datw[, fac] %in% invar.lev) }) names(invar.lev.list.z) <- fixed.eff.z.fac invar.lev.list.z invar.lev.list.c <- lapply(fixed.eff.c.fac, function(fac) { out <- by(datw1[, sp], datw1[, fac], var) invar.lev <- names(out)[out == 0] which(datw1[, fac] %in% invar.lev) }) names(invar.lev.list.c) <- fixed.eff.c.fac invar.lev.list.c nlevels(datw$period) # fit the "zero" (binary) part of the hurdle model # fit full ZI model start.time <- Sys.time() fit.z <- glmmTMB(formula(paste(form.z["full"], "+ (1 | fr.ward) + (1 | to.ward) + (1 | period)")), family = binomial, data = datw) print(Sys.time() - start.time) # fit all drop1 ZI models dim(datw) start.time <- Sys.time() fit.z.stats <- t(sapply(form.z[fixed.eff.z], function(form) { print(paste("Fitting model", match(form, form.z), "of", length(form.z))) fit <- glmmTMB(formula(paste(form, "+ (1 | fr.ward) + (1 | to.ward) + (1 | period)")), family = binomial, data = datw) n <- nrow(model.matrix(fit.z)) fe.var.full <- var(model.matrix(fit.z) %*% fixef(fit.z)$cond) * (n - 1) / n fe.var.red <- var(model.matrix(fit) %*% fixef(fit)$cond) * (n - 1) / n print(Sys.time() - start.time) c(p = anova(fit.z, fit)[2, "Pr(>Chisq)"], fe.var.red = fe.var.red, re.var.red = sum(sapply(VarCorr(fit)$cond, sum)), eff.size = 1 - fe.var.red/fe.var.full, aic.d1 = AIC(fit) - AIC(fit.z), bic.d1 = BIC(fit) - BIC(fit.z)) })) # fit full count model start.time <- Sys.time() fit.c <- glmmTMB(formula(paste(form.c["full"], "+ (1 | fr.ward) + (1 | to.ward) + (1 | period)")), family = "truncated_nbinom2", data = datw1) print(Sys.time() - start.time) # fit all drop1 count models start.time <- Sys.time() fit.c.stats <- t(sapply(form.c[fixed.eff.c], function(form) { print(paste("Fitting model", match(form, form.c), "of", length(form.c))) fit <- glmmTMB(formula(paste(form, "+ (1 | fr.ward) + (1 | to.ward) + (1 | period)")), family = "truncated_nbinom2", data = datw1) n <- nrow(model.matrix(fit.c)) fe.var.full <- var(model.matrix(fit.c) %*% fixef(fit.c)$cond) * (n - 1) / n fe.var.red <- var(model.matrix(fit) %*% fixef(fit)$cond) * (n - 1) / n print(Sys.time() - start.time) c(p = anova(fit.c, fit)[2, "Pr(>Chisq)"], fe.var.red = fe.var.red, re.var.red = sum(sapply(VarCorr(fit)$cond, sum)), eff.size = 1 - fe.var.red/fe.var.full, aic.d1 = AIC(fit) - AIC(fit.c), bic.d1 = BIC(fit) - BIC(fit.c)) })) # make table reporting p-values and summary stats for both zero and count models fit.z.stats[fixed.eff.z, ] names(fixed.eff.z) <- sapply(strsplit(gsub("ns\\(", "", fixed.eff.z), ","), "[", 1) names(fixed.eff.c) <- sapply(strsplit(gsub("ns\\(", "", fixed.eff.c), ","), "[", 1) res.tab <- data.frame( predictor.z = names(fixed.eff.z), eff.size.z = paste0(as.vector(round(100 * fit.z.stats[fixed.eff.z, "eff.size"])), "%"), p.z = as.vector(round(fit.z.stats[fixed.eff.z, "p"], 3)), predictor.c = names(fixed.eff.c), eff.size.c = paste0(as.vector(round(100 * fit.c.stats[fixed.eff.c, "eff.size"])), "%"), p.c = as.vector(round(fit.c.stats[fixed.eff.c, "p"], 3)) ) all(res.tab$predictor.z == res.tab$predictor.c) res.tab[, -4] table(datw$caprine > 0, datw$cattle > 0) by(datw$dist.km, datw$caprine > 0, mean) by(datw$dist.km, datw$cattle > 0, mean) sum(datw$dist.km * datw$cattle) / sum(datw$cattle) sum(datw$dist.km * datw$caprine) / sum(datw$caprine) # check correlations between predictor variables #cormat <- abs(cor(model.matrix(fit.z)[, -1])) #cormat <- abs(cor(model.matrix(fit.c)[, -1])) #diag(cormat) <- 0 #summary(c(cormat)) #hist(cormat[lower.tri(cormat)], nclass = 200) #which(cormat > 0.8, arr.ind = TRUE) #which(cormat > 0.5, arr.ind = TRUE) # ...there are a few over 0.5, but nothing concerning, spline sections and # productions system levels -- we expect strongish negative correlations among # factor levels # wald test for the fixed effects eff.names.z <- names(fixef(fit.z)$cond) p.vals.z <- sapply(fixed.eff.z, function(fe) { print(fe) p.names <- grep(fe, eff.names.z, value = TRUE, fixed = TRUE) R <- t(sapply(p.names, function(en) grep(en, eff.names.z, value = TRUE, fixed = TRUE) == eff.names.z)) + 0 Wald(fit.z, R) }) p.vals.z round(p.vals.z, 3) # wald test for the fixed effects eff.names.c <- names(fixef(fit.c)$cond) p.vals.c <- sapply(fixed.eff.c, function(fe) { print(fe) p.names <- grep(fe, eff.names.c, value = TRUE, fixed = TRUE) R <- t(sapply(p.names, function(en) grep(en, eff.names.c, value = TRUE, fixed = TRUE) == eff.names.c)) + 0 Wald(fit.c, R) }) p.vals.c round(p.vals.c, 3) round(cbind(p.vals.z, p.vals.c), 3) # how do the Wald and LRT p-values compare? plot(fit.z.stats[, "p"], p.vals.z) abline(0, 1) plot(fit.c.stats[, "p"], p.vals.c) abline(0, 1) # where sp == "animals" # model proportion that are sheep and goats if(sp == "animals" & FALSE) { library(lme4) datw1$more.caprine <- as.integer(datw1$caprine/datw1$animals > 0.5) nrow(datw1) length(unique(datw1$fr.to.date)) fit.p <- glmmTMB(cbind(caprine, cattle) ~ year + (1 | fr.ward) + (1 | to.ward), family = binomial, data = datw1) summary(fit.p) fit.p <- glmer(more.caprine ~ year + (1 | fr.ward) + (1 | to.ward), family = binomial, data = datw1) summary(fit.p) fit.p <- glmer(paste("more.caprine ~", form.rhs.c["full"], "+ (1 | fr.ward) + (1 | to.ward)"), family = binomial, data = datw1, control = glmerControl(optimizer = "bobyqa")) summary(fit.p) } # save point -- saved workspace here # save.image(file = paste0("SEEDZ_pinkslip_gravitymodel_v03DRAFT_", sp, "_", Sys.Date(), ".RData")) # save and load here # load("SEEDZ_pinkslip_gravitymodel_v03DRAFT_cattle_2018-09-15.RData") ### load("SEEDZ_pinkslip_gravitymodel_v03DRAFT_cattle_2018-06-14.RData") ### load("SEEDZ_pinkslip_gravitymodel_v03DRAFT_caprine_2018-06-16.RData") #ranef(fit.z) summary(fit.z) ranef.z.fr.ward <- ranef(fit.z)$cond$fr.ward[, "(Intercept)"] names(ranef.z.fr.ward) <- rownames(ranef(fit.z)$cond$fr.ward) qqnorm(ranef.z.fr.ward) qqline(ranef.z.fr.ward) quiet.origins <- names(sort(ranef.z.fr.ward)[1:3]) wards[wards$fullname %in% quiet.origins, c("fullname", "lat", "long", "area.km2", "status", "popdens2012", "popdensglw.cattle", "prodsys", "market")] busy.origins <- names(sort(ranef.z.fr.ward)[length(ranef.z.fr.ward) - (0:2)]) wards[wards$fullname %in% busy.origins, c("fullname", "lat", "long", "area.km2", "status", "popdens2012", "popdensglw.cattle", "prodsys", "market")] sapply(quiet.origins, function(qo) mean(datw[datw$fr.ward == qo, any.sp])) sapply(busy.origins, function(qo) mean(datw[datw$fr.ward == qo, any.sp])) ranef.z.to.ward <- ranef(fit.z)$cond$to.ward[, "(Intercept)"] names(ranef.z.to.ward) <- rownames(ranef(fit.z)$cond$to.ward) qqnorm(ranef.z.to.ward) qqline(ranef.z.to.ward) quiet.destinations <- names(sort(ranef.z.to.ward)[1:3]) wards[wards$fullname %in% quiet.destinations, c("fullname", "area.km2", "status", "popdens2012", "popdensglw.cattle", "prodsys", "market")] sapply(quiet.destinations, function(qo) mean(datw[datw$to.ward == qo, any.sp])) busy.destinations <- names(sort(ranef.z.to.ward)[length(ranef.z.to.ward) - (0:2)]) wards[wards$fullname %in% busy.destinations, c("fullname", "long", "area.km2", "status", "popdens2012", "popdensglw.cattle", "prodsys", "market")] sapply(busy.destinations, function(qo) mean(datw[datw$to.ward == qo, any.sp])) # calculate an R-squared on the liability scale for the zero model # (latent-scale marginal R2_GLMM) n <- nrow(model.matrix(fit.z)) re.var.full <- sum(sapply(VarCorr(fit.z)$cond, sum)) fe.var.full <- var(model.matrix(fit.z) %*% fixef(fit.z)$cond) * (n - 1) / n fe.var.full / (re.var.full + re.var.full) # cattle: 40% # calculate an R-squared on the latent scale for the count model # (latent-scale marginal R2_GLMM) n <- nrow(model.matrix(fit.c)) re.var.full <- sum(sapply(VarCorr(fit.c)$cond, sum)) fe.var.full <- var(model.matrix(fit.c) %*% fixef(fit.c)$cond) * (n - 1) / n fe.var.full / (re.var.full + re.var.full) # cattle: 24% datw.full[ intersect( intersect(grep("kikatiti", datw.full$fr.ward), grep("kiusa", datw.full$to.ward)), grep("2015-01", datw.full$period)), "cattle"] datw.full[ intersect( intersect(grep("poli", datw.full$fr.ward), grep("kiusa", datw.full$to.ward)), grep("2015-01", datw.full$period)), "cattle"] datw.full["arusha/arusha/bwawani.arusha/arushaurban/elerai.2009-01", "cattle"] datw.full["arusha/monduli/meserani.arusha/arushaurban/elerai.2009-01", "cattle"] datw.full["manyara/simanjiro/naberera.arusha/arushaurban/elerai.2009-01", "cattle"] datw.full["arusha/arusha/bwawani.arusha/meru/kikatiti.2009-01", "cattle"] datw.full["arusha/monduli/meserani.arusha/meru/kikatiti.2009-01", "cattle"] datw.full["manyara/simanjiro/naberera.arusha/meru/kikatiti.2009-01", "cattle"] datw.full["arusha/arusha/bwawani.arusha/arusha/kisongo.2009-01", "cattle"] datw.full["arusha/monduli/meserani.arusha/arusha/kisongo.2009-01", "cattle"] datw.full["manyara/simanjiro/naberera.arusha/arusha/kisongo.2009-01", "cattle"] head(datw1) # plot predictions from the models pred.plot <- function(fit, data, newdata, cent, minmax = min, pos = -minmax(-2, -4), offset = 0, xcat.col = NULL, xcat.lty = NULL, x.at = sort(c(10^(-6:6), 5 * 10^(-6:6))), jit.fac = 50, rug.lwd = 0.5, x.fn = log10, ...) { nlev <- sapply(newdata, nlevels) xcat <- names(nlev)[nlev > 1.5] xvar <- sapply(newdata[, names(nlev)[nlev < 0.5]], var) xcont <- names(xvar)[xvar > 0] Var <- sum(unlist(VarCorr(fit)$cond)) X <- model.matrix(lm(terms(fit), data = data), data = newdata) newdata$eta <- X %*% fixef(fit)$cond if(family(fit)$link == "logit" & family(fit)$family == "binomial") { newdata$pred.resp <- plogis(newdata$eta - 0.5 * Var * tanh(newdata$eta * (1 + 2 * exp(-0.5 * Var))/6)) } if(family(fit)$link == "log" & family(fit)$family == "truncated_nbinom2") { lambda <- exp(newdata$eta + 0.5 * Var) newdata$pred.resp <- lambda / (1 - dnbinom(0, mu = lambda, size = sigma(fit))) } ylim <- c(0, max(newdata$pred.resp)) * 1.1 print(ylim) xlim <- range(data[, xcont]) + c(-1, 1) * diff(range(data[, xcont])) / 40 plot(formula(paste("pred.resp ~", xcont)), data = newdata, type = "n", axes = FALSE, ylim = ylim, xlim = xlim, ...) rug(jitter(data[fit$frame[[1]] > 0.5, xcont], factor = jit.fac), col = alpha("black", 0.1), side = 3, lwd = rug.lwd) rug(jitter(data[fit$frame[[1]] < 0.5, xcont], factor = jit.fac), col = alpha("black", 0.1), side = 1, lwd = rug.lwd) axis(2) axis(1, labels = x.at, at = x.fn(x.at) - cent) box() lapply(1:nlevels(newdata[, xcat]), function(i) { xcat.lev <- levels(newdata[, xcat])[i] points(formula(paste("pred.resp ~", xcont)), data = newdata[newdata[, xcat] == xcat.lev, ], type = "l", lty = ifelse(is.null(xcat.lty), i, xcat.lty[i]), col = ifelse(is.null(xcat.col), "black", xcat.col[i]) ) text(x = minmax(newdata[, xcont]), y = newdata$pred.resp[newdata[, xcont] == minmax(newdata[, xcont]) & newdata[, xcat] == xcat.lev], labels = xcat.lev, pos = pos, offset = offset) }) } # distance & market.f newdata <- expand.grid( log10.dist.km.cent = seq(min(datw$log10.dist.km.cent), max(datw$log10.dist.km.cent), length.out = 100), log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", fr.prodsys = "Agropastoral", # 57% of origin wards are agropastoral to.prodsys = "Agropastoral", # 40% of destination wards are agropastoral market.f = levels(datw$market.f), fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, any.sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) fr.to.mkt <- do.call("rbind", strsplit(as.character(newdata$market.f), "-")) xcat.col <- c("black", "black", "black", "red", "red", "red", "blue", "blue", "blue") xcat.lty <- rep(1:3, 3) levels(newdata$market.f) pred.plot(fit = fit.z, data = datw, cent = attr(datw$log10.dist.km.cent, "scaled:center"), ylab = "Probability of livestock movement", xlab = expression(Distance ~ (km)), rug.lwd = 0.05, minmax = min, newdata = newdata, xcat.col = xcat.col, xcat.lty = xcat.lty, col = match(fr.to.mkt[, 1], unique(fr.to.mkt[, 1])), offset = 1000) title("Gravity-model-predicted probability of cattle\nmovement, by straight-line distance and market type") legend("topright", legend = c("", levels(newdata$market.f)), lty = c(0, xcat.lty), col = c("white", xcat.col), bty = "n", cex = 0.8) rm(newdata) # distance & fr.prodsys newdata <- expand.grid( log10.dist.km.cent = seq(min(datw$log10.dist.km.cent), max(datw$log10.dist.km.cent), length.out = 100), log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", fr.prodsys = levels(datw$to.prodsys), # 57% of origin wards are agropastoral to.prodsys = "Agropastoral", # 40% of destination wards are agropastoral market.f = levels(datw$market.f)[4], fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, any.sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.z, data = datw, cent = attr(datw$log10.dist.km.cent, "scaled:center"), ylab = "Probability of livestock movement", xlab = expression(Distance ~ (km)), minmax = min, newdata = newdata) title("Gravity-model-predicted probability of cattle\nmovement, by straight-line distance and origin production system") rm(newdata) # distance & to.prodsys newdata <- expand.grid( log10.dist.km.cent = seq(min(datw$log10.dist.km.cent), max(datw$log10.dist.km.cent), length.out = 100), log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", fr.prodsys = "Agropastoral", # 57% of origin wards are agropastoral to.prodsys = levels(datw$to.prodsys), # 40% of destination wards are agropastoral market.f = levels(datw$market.f)[4], fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, any.sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.z, data = datw, cent = attr(datw$log10.dist.km.cent, "scaled:center"), ylab = "Probability of livestock movement", xlab = expression(Distance ~ (km)), minmax = min, newdata = newdata) title("Gravity-model-predicted probability of cattle\nmovement, by straight-line distance and destination production system") rm(newdata) # destination population density & market type newdata <- expand.grid( log10.dist.km.cent = 0, log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = seq(min(datw$log10.to.pop2012.cent), max(datw$log10.to.pop2012.cent), length.out = 100), log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", secondary.f = levels(datw$secondary.f), # fr.status = "Rural Ward", # to.status = "Urban Ward", fr.ward = fr.wards[1], to.ward = to.wards[1]) newdata[, any.sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.z, data = datw, cent = attr(datw$log10.to.pop2012.cent, "scaled:center"), ylab = "Probability of livestock movement", xlab = expression(Ward ~ population ~ density ~ (km^-2)), minmax = max, x.at = c(500, 1000, 2000, 5000, 10000, 20000, 50000, 100000), newdata = newdata) title("Gravity-model-predicted probability of livestock movement,\nby destination population density and origin market type") rm(newdata) # month and year newdata <- expand.grid( log10.dist.km.cent = -0.391, # -0.391 equates to 50 km log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, market.f = levels(datw$market.f)[5], month = 0:11, year = levels(datw$year), fr.prodsys = "Agropastoral", # 57% of origin wards are agropastoral to.prodsys = "Agropastoral", # 40% of destination wards are agropastoral fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, any.sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.z, data = datw, cent = 0, ylab = "Probability of livestock movement", xlab = "Month", minmax = max, x.at = 0:11, x.fn = I, jit.fac = 2, rug.lwd = 0.05, newdata = newdata) title("Gravity-model-predicted probability of livestock movement,\nby calendar month and year") rm(newdata) # plot predictions from count model # fr.prodsys and distance newdata <- expand.grid( log10.dist.km.cent = seq(min(datw1$log10.dist.km.cent), max(datw1$log10.dist.km.cent), length.out = 100), log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", fr.prodsys = "Agropastoral", # 57% of origin wards are agropastoral to.prodsys = levels(datw$to.prodsys), # 40% of destination wards are agropastoral market.f = levels(datw$market.f)[4], fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.c, data = datw1, cent = attr(datw$log10.dist.km.cent, "scaled:center"), ylab = "Number of livestock moved", xlab = expression(Distance ~ (km)), minmax = max, pos = 2, newdata = newdata) title("Gravity-model-predicted number of livestock moved,\nby straight-line distance and destination production system") rm(newdata) # to.prodsys and distance newdata <- expand.grid( log10.dist.km.cent = seq(min(datw1$log10.dist.km.cent), max(datw1$log10.dist.km.cent), length.out = 100), log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", fr.prodsys = levels(datw$to.prodsys), # 57% of origin wards are agropastoral to.prodsys = "Agropastoral", # 40% of destination wards are agropastoral market.f = levels(datw$market.f)[4], fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw$period)[1]) newdata[, sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.c, data = datw1, cent = attr(datw$log10.dist.km.cent, "scaled:center"), ylab = "Number of livestock moved", xlab = expression(Distance ~ (km)), minmax = max, pos = 2, newdata = newdata) title("Gravity-model-predicted number of livestock moved,\nby straight-line distance and origin production system") rm(newdata) # fr.status & distance newdata <- expand.grid( log10.dist.km.cent = seq(min(datw1$log10.dist.km.cent), max(datw1$log10.dist.km.cent), length.out = 100), log10.fr.popdens2012.cent = 0, log10.to.popdens2012.cent = 0, log10.fr.pop.cattle.cent = 0, log10.to.pop.cattle.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, month = 0, year = "2015", secondary.f = levels(datw1$secondary.f)[1], fr.status = levels(datw1$fr.status), to.status = "Urban Ward", period = rev(levels(datw1$period))[1], fr.ward = fr.wards[1], to.ward = to.wards[1]) newdata[, sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.c, data = datw1, cent = attr(datw$log10.fr.area.km2.cent, "scaled:center"), ylab = "Number of livestock moved", xlab = expression(Distance ~ (km)), minmax = max, pos = 2, newdata = newdata) title("Gravity-model-predicted number of livestock moved,\nby straight-line distance and type of origin") rm(newdata) # to.status & distance newdata <- expand.grid( log10.dist.km.cent = seq(min(datw1$log10.dist.km.cent), max(datw1$log10.dist.km.cent), length.out = 100), log10.fr.popdens2012.cent = 0, log10.to.popdens2012.cent = 0, log10.fr.pop.cattle.cent = 0, log10.to.pop.cattle.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, year = "2015", month = 0, secondary.f = levels(datw$secondary.f)[1], fr.status = "Rural Ward", to.status = levels(datw1$to.status), period = rev(levels(datw1$period))[1], fr.ward = fr.wards[1], to.ward = to.wards[1]) newdata[, sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.c, data = datw1, cent = attr(datw$log10.fr.area.km2.cent, "scaled:center"), ylab = "Number of livestock moved", xlab = expression(Distance ~ (km)), minmax = max, pos = 2, newdata = newdata) title("Gravity-model-predicted number of livestock moved,\nby straight-line distance and type of destination") rm(newdata) # month and year newdata <- expand.grid( log10.dist.km.cent = -0.391, # -0.391 equates to 50 km log10.fr.pop2012.cent = 0, log10.to.pop2012.cent = 0, log10.fr.pop.animals.cent = 0, log10.to.pop.animals.cent = 0, log10.fr.area.km2.cent = 0, log10.to.area.km2.cent = 0, market.f = levels(datw1$market.f)[5], month = 0:11, year = levels(datw1$year), fr.prodsys = "Agropastoral", # 57% of origin wards are agropastoral to.prodsys = "Agropastoral", # 40% of destination wards are agropastoral fr.ward = fr.wards[1], to.ward = to.wards[1], period = levels(datw1$period)[1]) newdata[, sp] <- 1 names(newdata) <- gsub("animals", sp, names(newdata)) pred.plot(fit = fit.c, data = datw1, cent = 0, ylab = "Number of livestock moved", xlab = "Month", minmax = max, x.at = 0:11, x.fn = I, pos = 1, jit.fac = 1, newdata = newdata) title("Gravity-model-predicted number of livestock moved,\nby calendar month and year") rm(newdata) #boxplot(popdens2012 ~ factor(status, levels(datw$fr.status)), data = wards, log = "y") #boxplot(pop2012 ~ factor(status, levels(datw$fr.status)), data = wards, log = "y") #boxplot(area.km2 ~ factor(status, levels(datw$fr.status)), data = wards, log = "y") #boxplot(fr.popdens2012 ~ fr.status, data = datw1, log = "y") #boxplot(to.popdens2012 ~ to.status, data = datw1, log = "y") #boxplot(fr.pop2012 ~ fr.status, data = datw1, log = "y") #boxplot(to.pop2012 ~ to.status, data = datw1, log = "y") cor(datw1$fr.popdens2012, as.numeric(datw1$fr.status), method = "spearman") cor(datw1$to.popdens2012, as.numeric(datw1$to.status), method = "spearman") cor(wards$popdens2012, as.numeric(factor(wards$status, levels(datw$fr.status))), method = "spearman") cor(wards$pop2012, as.numeric(factor(wards$status, levels(datw$fr.status))), method = "spearman") cor(wards$area.km2, as.numeric(factor(wards$status, levels(datw$fr.status))), method = "spearman") cor(datw1$fr.pop2012, as.numeric(datw1$fr.status), method = "spearman") cor(datw1$to.pop2012, as.numeric(datw1$to.status), method = "spearman") #plot(animals ~ to.popdens2012, data = datw1, log = "xy") #plot(animals ~ factor(round(log10.to.popdens2012.cent)), data = datw1, log = "y") table(factor(round(datw1$log10.to.popdens2012.cent))) ############### # simulations # ############### # random effects and dispersion parameter of negative binomial Sigma2.z <- VarCorr(fit.z)$cond Sigma2.c <- VarCorr(fit.c)$cond theta <- sigma(fit.c) # date-hour for output files time.stamp <- gsub(" ", "-", substr(Sys.time(), 1, 13)) #set.seed(round(as.numeric(Sys.time()) * 1000 - 1528300000000)) # output movement network parameters from hurdle model for 12 months in 2015 sim.monthly2015 <- lapply(sort(unique(datw$month)), function(m) { # m <- 0 # make subset of full data set datw.p <- (datw.full[datw.full$month == m & datw.full$year == "2015", ]) dim(datw.p) # condition on the origin and destination wards and the period ranef.cond <- c("fr.ward", "to.ward", "period") # define response variables (any animal and number of animals given >= 1 animal) any.sp.sim <- paste0(any.sp, ".sim") sp.sim <- paste0(sp, ".sim") sum(datw.p[, sp]) datw.p[, any.sp] <- as.integer(datw.p[, sp] > 0.5) table(datw.p[, any.sp]) # binomial part # fixed effects part MM.z <- model.matrix(formula(form.z), data = datw.p) rownames(MM.z) <- rownames(datw.p) B.z <- fixef(fit.z)$cond #[-grep("period", names(fixef(fit.z)$cond))] eta.f <- c(MM.z %*% B.z) names(eta.f) <- rownames(MM.z) sum(plogis(eta.f)) # random effects part re.tab <- sapply(names(Sigma2.z), function(re) { if(!re %in% ranef.cond) { out <- rnorm(nlevels(datw.p[, re]), sd = sqrt(Sigma2.z[[re]])) names(out) <- levels(datw.p[, re]) } else { out <- ranef(fit.z)$cond[[re]][, "(Intercept)"] names(out) <- levels(fit.z$frame[, re]) } out[as.character(datw.p[, re])] }) rownames(re.tab) <- rownames(datw.p) dim(re.tab) colSums(is.na(re.tab)) head(re.tab) re.tab[is.na(re.tab[, "fr.ward"]), "fr.ward"] <- -Inf # because plogis(-Inf) returns 0 eta <- rowSums(cbind(eta.f, re.tab)) summary(eta) #eta.adj <- sum(jensen.adjust(plogis(eta), sum(unlist(Sigma2.z[-match(ranef.cond, names(Sigma2.z))]))) ) # sum(plogis(eta[rownames(datw)[rownames(datw) %in% names(eta)]])) # sum(rbinom(length(eta[rownames(datw)[rownames(datw) %in% names(eta)]]), 1, # plogis(eta[rownames(datw)[rownames(datw) %in% names(eta)]]))) # sum(rbinom(length(eta.f[rownames(datw)[rownames(datw) %in% names(eta.f)]]), 1, # plogis(eta.f[rownames(datw)[rownames(datw) %in% names(eta.f)]]))) # sum(datw.p[, any.sp]) datw.p$pred.prob.mvt <- plogis(eta) rm(eta, eta.f, re.tab) # negative binomial part # simulate animal numbers moved for all rows MM.c <- model.matrix(formula(form.c), data = datw.p)#[1:30,] eta.f <- c(MM.c %*% fixef(fit.c)$cond) # random part of model re.tab <- sapply(names(Sigma2.c), function(re) { if(!re %in% ranef.cond) { out <- rnorm(nlevels(datw.p[, re]), sd = sqrt(Sigma2.c[[re]])) names(out) <- levels(datw.p[, re]) } else { out <- ranef(fit.c)$cond[[re]][, "(Intercept)"] names(out) <- rownames(ranef(fit.c)$cond[[re]]) } out <- out[as.character(datw.p[, re])] if(any(is.na(out))) { names(out) <- as.character(datw.p[, re]) imputed.re.names <- unique(names(out[is.na(out)])) imputed.re <- rnorm(length(imputed.re.names), sd = sqrt(Sigma2.c[[re]])) names(imputed.re) <- imputed.re.names out[names(out) %in% imputed.re.names] <- imputed.re[names(out[names(out) %in% imputed.re.names])] } out }) rownames(re.tab) <- NULL dim(re.tab) colSums(is.na(re.tab)) head(re.tab) re.tab[rowSums(is.na(re.tab)) > 0, ] eta <- rowSums(cbind(eta.f, re.tab)) datw.p$pred.n.mvt <- exp(eta) # scale up the probilities and rates to account for the length(unique(dat$batch)) / 20 # subsample (infl.fac <- 20 / length(unique(dat$batch))) # how would increasing the number of permits affect # (a) the probability of seeing any permits, p? # (b) the number of permits found, given any permits are found? # at the extremes, all the effect will be in either # (a) infl.fac times as many directed edges will have permits (assuming p is very small) # (b) the same number of edges will have permits but the number of animals will be times infl.fac. # if probability of detecting movement with in t time is p, the probability of # detecting it in k*t time is: # probability of not detecting mvt in t is 1 - p # probability of not detecting mvt in k*t is (1 - p)^k # probability of detecting mvt in k*t time is 1 - (1 - p)^k # start by inflating the probability of finding any permit datw.p$pred.prob.mvt.adj <- 1 - (1 - datw.p$pred.prob.mvt)^infl.fac # how much has the number of permits increased by? mean(datw.p$pred.prob.mvt.adj) / mean(datw.p$pred.prob.mvt) # make up the difference infl.fac.c <- infl.fac / (mean(datw.p$pred.prob.mvt.adj) / mean(datw.p$pred.prob.mvt)) datw.p$pred.n.mvt.adj <- datw.p$pred.n.mvt * infl.fac.c # overall inflation: mean(datw.p$pred.prob.mvt.adj) / mean(datw.p$pred.prob.mvt) * infl.fac.c # simulate any movement from binomial datw.p[, any.sp.sim] <- rbinom(nrow(datw.p), 1, datw.p$pred.prob.mvt.adj) table(datw.p[, any.sp.sim], exclude = NULL) mean(datw.p[, any.sp.sim]) mean(datw.p[, any.sp]) mean(datw.p[rownames(datw)[rownames(datw) %in% rownames(datw.p)], any.sp.sim]) mean(datw.p[rownames(datw)[rownames(datw) %in% rownames(datw.p)], any.sp]) table(datw.p[, any.sp.sim], datw.p[, any.sp]) cor(datw.p[, any.sp.sim], datw.p[, any.sp]) # simulate from zero-truncated negative binomial datw.p$temp.count <- rnbinom(n = nrow(datw.p), size = sigma(fit.c), mu = datw.p$pred.n.mvt.adj) while(any(datw.p$temp.count == 0)) { zero.counts <- datw.p$temp.count == 0 n.zero.counts <- sum(zero.counts) print(n.zero.counts) datw.p$temp.count[zero.counts] <- rnbinom(n = n.zero.counts, size = sigma(fit.c), mu = datw.p$pred.n.mvt[zero.counts]) } datw.p[, sp.sim] <- datw.p[, any.sp.sim] * datw.p$temp.count cor(datw.p[, sp], datw.p[, sp.sim], method = "spearman") #abline(0, 1) mean(datw.p[, sp.sim]) #boxplot(list(sim = datw$animals.sim, real = datw$animals)) if(FALSE) { out.degree <- tapply(datw.p[, any.sp], datw.p$fr.ward, sum) in.degree <- tapply(datw.p[, any.sp], datw.p$to.ward, sum) out.degree.sim <- tapply(datw.p[, any.sp.sim], datw.p$fr.ward, sum) in.degree.sim <- tapply(datw.p[, any.sp.sim], datw.p$to.ward, sum) par(mfrow = c(3, 2)) xlim.out <- c(0, max(out.degree,out.degree.sim)) xlim.in <- c(0, max(in.degree, in.degree.sim)) hist(out.degree, nclass = 50, xlim = xlim.out) hist(in.degree, nclass = 50, xlim = xlim.in) hist(out.degree.sim, nclass = 50, xlim = xlim.out) hist(in.degree.sim, nclass = 50, xlim = xlim.in) cor(out.degree.sim, out.degree, method = "spearman") plot(out.degree, out.degree.sim) cor(in.degree.sim, in.degree, method = "spearman") plot(in.degree, in.degree.sim) summary(in.degree) summary(in.degree.sim) summary(out.degree) summary(out.degree.sim) } # create empty matrix of wards nlevels(datw.p$fr.ward) nlevels(datw.p$to.ward) length(unique(c(levels(datw.p$fr.ward), levels(datw.p$to.ward)))) mu1.z <- structure( rep(NA, nlevels(datw.p$to.ward)^2), dim = c(nlevels(datw.p$to.ward), nlevels(datw.p$to.ward)), dimnames = list(outmove = levels(datw.p$to.ward), inmove = levels(datw.p$to.ward))) mu1.z[1:10, 1:10] dim(mu1.z) sim.moves <- mu1.c <- mu1.z # fill probabilities, movement rates and simulated moves # (takes < 30 s) for(i in 1:nrow(datw.p)) { mu1.z[as.character(datw.p$fr.ward[i]), as.character(datw.p$to.ward[i])] <- datw.p$pred.prob.mvt.adj[i] mu1.c[as.character(datw.p$fr.ward[i]), as.character(datw.p$to.ward[i])] <- datw.p$pred.n.mvt.adj[i] # sim.moves[as.character(datw.p$fr.ward[i]), as.character(datw.p$to.ward[i])] <- # datw.p[i, sp.sim] }; rm(i) mu1.z[is.na(mu1.z)] <- 0 mu1.c[is.na(mu1.c)] <- 0 #sim.moves[is.na(sim.moves)] <- 0 # write the parameter matrices to disc mtxt <- formatC(m + 1, flag = "0", width = 2) write.csv(round(mu1.z, 6), paste0("networks/simmoves.monthly.2015/", sp, "/", sp, ".month", mtxt, ".mvt.matrix.prob.", time.stamp,".csv")) write.csv(round(mu1.c, 4), paste0("networks/simmoves.monthly.2015/", sp, "/", sp, ".month", mtxt, ".mvt.matrix.rate.", time.stamp,".csv")) write.csv(cbind(theta = round(theta, 4)), paste0("networks/simmoves.monthly.2015/", sp, "/", sp, ".month", mtxt, ".mvt.matrix.rate.theta.", time.stamp,".csv"), row.names = FALSE) # write the simulated movements to disc #write.csv(sim.moves, paste0("networks/simmoves.monthly.2015/", sp, "/", sp, ".month", mtxt, ".mvt.matrix.simmoves.", time.stamp,".csv")) list( sp = sp, year = "2015", calendar.month = mtxt, time.stamp = time.stamp, mu.z = mu1.z, mu.c = mu1.c, theta = theta) #, sim.moves = sim.moves) }) # calculate expected number of animals for each row sp.pred <- paste0(sp, ".pred") datw$pred.z <- predict(fit.z) datw$pred.c <- predict(fit.c, newdata = datw) datw[, sp.pred] <- datw$pred.z * datw$pred.c boxplot(formula(paste(sp.pred, "~ factor(fr.ward)")) , data = datw[datw$fr.secondary == 1 & (datw[, any.sp] == 1), ], log = "y", ylim = c(min(datw[datw[, any.sp] == 1, sp.pred]), max(datw[, sp]))) boxplot(formula(paste(sp, "~ factor(fr.ward)")) , data = datw[datw$fr.secondary == 1 & (datw[, any.sp] == 1), ], log = "y", ylim = c(min(datw[datw[, any.sp] == 1, sp.pred]), max(datw[, sp]))) cor(datw[, sp.pred], datw[, sp], method = "spearman") sum(datw[, sp.pred]) sum(datw[, sp]) tapply(datw[datw$fr.secondary == 1, sp.pred], factor(datw$fr.ward[datw$fr.secondary == 1]), sum) tapply(datw[datw$fr.secondary == 1, sp], factor(datw$fr.ward[datw$fr.secondary == 1]), sum) # draw a couple of maps of the simulated data map.fr <- get_googlemap( center = apply(datw[datw[, any.sp] == 1, c("fr.long", "fr.lat")], 2, function(x) mean(range(x, na.rm = TRUE))), zoom = 7, size = c(350, 410), maptype = "terrain", color = "bw", scale = 2) sim.or.real <- sample(c(any.sp, rep(any.sp.sim, 1))) lapply(1:length(sim.or.real), function(i) { movemapzoom <- ggmap(map.fr) + geom_segment(data = datw[datw[, sim.or.real[i]] == 1, ], mapping = aes(x = jitter(fr.long, factor = 5), y = jitter(fr.lat, factor = 5), xend = jitter(to.long, factor = 5), yend = jitter(to.lat, factor = 5), colour = period), arrow = arrow(length = unit(0.01, "npc")), size = 0.1) ggsave(paste0("network.map.", sim.or.real[i], i, ".jpg"), movemapzoom) }) table(datw[, any.sp]) table(datw[, any.sp.sim]) # fit models to simulated data, compare estimates and residuals to real data fit.z.sim <- update(fit.z, paste0(any.sp.sim, "~ ."), data = datw) summary(fit.z.sim) plot(fixef(fit.z)$cond, fixef(fit.z.sim)$cond, cex = 2 * (1 - coef(summary(fit.z.sim))$cond[, "Pr(>|z|)"])) abline(0, 1) # residuals plot, slow (< 1 min) plot(fitted(fit.z), resid(fit.z)) points(fitted(fit.z.sim), resid(fit.z.sim), col = "red", pch = 3, cex = 0.6) fit.c.sim <- update(fit.c, paste0(sp.sim, "~ ."), data = datw[datw$any.animals.sim == 1, ]) summary(fit.c.sim) plot(fixef(fit.c)$cond, fixef(fit.c.sim)$cond, cex = 2 * (1 - coef(summary(fit.c.sim))$cond[, "Pr(>|z|)"])) abline(0, 1) # residuals plot plot(fitted(fit.c), resid(fit.c)) points(fitted(fit.c.sim), resid(fit.c.sim), col = "red", pch = 3, cex = 0.6) ###################################################################### # plots, maps, other non-essential code below this point # ###################################################################### if(F) { fit.hurdle <- hurdle(form.c, data = datw, dist = "negbin") AIC(fit.hurdle) summary(fit.hurdle) # compute an Rsq for the count part beta.c <- coef(fit.hurdle, model = "count") X.c <- model.matrix(fit.hurdle, model = "count") S2f.c <- var(X.c %*% beta.c) lambda <- mean(X.c %*% beta.c) theta <- fit.hurdle$theta S2e.c <- trigamma(1/(1/lambda + 1/theta)) (Rsq.c <- S2f.c / (S2e.c + S2f.c)) # compute an Rsq for the zero part beta.z <- coef(fit.hurdle, model = "zero") X.z <- model.matrix(fit.hurdle, model = "zero") S2f.z <- var(X.z %*% beta.z) p <- plogis(mean(X.c %*% beta.c)) # compare with mean(datw$any.animals) S2e.z <- 1/(p*(1-p)) (Rsq.z <- S2f.z / (S2e.z + S2f.z)) mean(predict(fit.hurdle, type = "prob")) # try fitting the binomial part in MCMCglmm prior <- list( R = list(V = 1, fix = 1), G = list( G1 = list(V = 1, nu = 0.002), G2 = list(V = 1, nu = 0.002))) library(MCMCglmm) fit.mcmc.z <- MCMCglmm(form.z, random = ~ fr.ward + to.ward, family = "categorical", prior = prior, data = datw, burnin = 30000, nitt = 130000, thin = 100) plot(fit.mcmc.z) est <- apply(fit.mcmc$Sol, 2, median) ci95 <- HPDinterval(fit.mcmc$Sol) data.frame(ss = i, b.name = names(unlist(B))[-2], b = unlist(B)[-2], est = est, ci95) prior <- list( R = list(V = 1, nu = 0.002)) fit.mcmc <- MCMCglmm( animals ~ log10.dist.km + factor(period), # random = ~ fr.ward + to.ward, family = "poisson", prior = prior, data = datw, burnin = 30000, nitt = 130000, thin = 100) plot(fit.mcmc) est <- apply(fit.mcmc$Sol, 2, median) ci95 <- HPDinterval(fit.mcmc$Sol) data.frame(ss = i, b.name = names(unlist(B))[-2], b = unlist(B)[-2], est = est, ci95) } # slow, so skip for now, but might be useful for partial R^2: if(FALSE) { } if(FALSE) { AIC(fit.glmmadmb.c) start.time <- Sys.time() fit.glmmadmb.c.ixn <- glmmadmb(animals ~ ns(log10.dist.km.cent, 3) + ns(log10.fr.popdens2012.cent, 3) * fr.status + ns(log10.to.popdens2012.cent, 3) * to.status + period + (1 | fr.ward) + (1 | to.ward), family = "truncnbinom", data = datw1) Sys.time() - start.time AIC(fit.glmmadmb.c.ixn) AIC(fit.glmmadmb.c.ixn) fit.glmmadmb.c <- fit.glmmadmb.c.ixn } summary(fit.glmmTMB.c) plot(animals ~ to.status, data = datw1, log = "y") plot(animals ~ period, data = datw1, log = "y") table(datw1$to.status) if(FALSE) { # check results against ZI-poisson in MCMCglmm prior <- list( R = list(V = 1, nu = 0.002), G = list( G1 = list(V = 1, nu = 0.002), G2 = list(V = 1, nu = 0.002))) library(MCMCglmm) fit.mcmc <- MCMCglmm(formula(form.c["full"]), random = ~ fr.ward + to.ward, family = "ztpoisson", prior = prior, data = datw1, burnin = 3000, nitt = 13000, thin = 10) plot(fit.mcmc) est <- apply(fit.mcmc$Sol, 2, median) plot(est, coef(fit.glmmadmb.c)) # pretty close abline(0, 1) #ci95 <- HPDinterval(fit.mcmc$Sol) #data.frame(ss = i, b.name = names(unlist(B))[-2], b = unlist(B)[-2], est = est, ci95) var(as.matrix(fit.mcmc$X) %*% est) var(as.matrix(fit.mcmc$X)[ , -11:-12] %*% est[-11:-12]) var(as.matrix(fit.mcmc$X)[ , c(-5:-7, -11:-12)] %*% est[c(-5:-7, -11:-12)]) } # slow, so skip for now, but might be useful for partial R^2 if(FALSE) { # fit full model and all drop1 models start.time <- Sys.time() fit.glmmadmb.c.list <- lapply(form.c, function(form) { print(form) glmmadmb(formula(paste(form, "+ (1 | fr.ward) + (1 | to.ward)")), family = "truncnbinom", data = datw1) })#, mc.cores = detectCores()) Sys.time() - start.time summary(fit.glmmadmb.c.list$full) # compare models to full model mod.comp.c <- sapply(fit.glmmadmb.c.list[fixed.eff], function(fit) { n <- fit.glmmadmb.c.list$full$n mss.full <- var(model.matrix(fit.glmmadmb.c.list$full) %*% fixef(fit.glmmadmb.c.list$full)) * (n - 1) mss.red <- var(model.matrix(fit) %*% fixef(fit)) * (n - 1) c(p = anova(fit, fit.glmmadmb.c.list$full)[2, "Pr(>Chi)"], eff.size = 1 - mss.red/mss.full) }) } plot(density(datw$dist.km[datw$caprine > 0], adjust = adjust), lty = 2, xlim = c(0, 350), main = "Distribution of movement distances", xlab = "kilometres") lines(density(datw$dist.km[datw$cattle > 0], adjust = adjust), lty = 1) legend("topright", legend = c("Cattle", "Sheep & goats"), lty = 1:2)