# Analysis script, Danks et al 2020, "Modelling mycorrhizal fungi dispersal by the mycophagous swamp wallaby (Wallabia bicolor)"
# Last modified: 2020 09 16
#
# C. E. Timothy Paine
# cetpaine@gmail.com
#
# This script predicts at what distance swamp wallabies defecate ingested fungal spores, based on excretion rates from Danks 2012, Fungal Ecology, and on their movement patterns.
# Approach: We extract all 69-hour chunks from each movement history. We calculate distances from start for each. Multiply those distances by the probability that spore defecation occurred at those times. Take the mean over the entire track, yielding a single distribution per individual. Then, take the mean (% CI) over the individual wallabies.
# Load some key libraries #
library(RColorBrewer)
library(rgdal)
#### Define a function used in preparing the probability distribution of defecatin times.
# THis function is modified from Paine et al (2012), Methods in Ecology & Evolution
# Two parameters:
# fit: a model fitted using nls, using a logistic form
# times: the times at which you want the predictions to be made
output.logis.nls <- function(fit, times){
coef <- coef(fit)
K <- coef[1]
r <- 1/(coef[3])
M0 <- K/(1 + exp(coef[2]/coef[3]))
rates = data.frame(
times = times,
M = (M0*K)/(M0+(K-M0)*exp(-r*times)),
AGR = (r*M0*K*(K-M0)*exp(-r*times))/(M0+(K-M0)*exp(-r*times))^2)
rates$RGRt <- rates$AGR/rates$M
rates$RGRm <- r*(1 - rates$M/K)
return(rates)
}
#### Map the movement patterns ####
# map the movement patterns to verify that they look appropriate and ready for analysis
tracks <- list.files('data/ready', full.names = T) # this folder contains the track data csv files
par(mfrow = c(3, 4), bty = 'o', mar = c(2, 5, 0, 0), oma = c(1, 1, 1, 1), xpd = NA)
for(i in 1:length(tracks)){
dat.i <- read.csv(tracks[i])
names(dat.i)[names(dat.i) == 'E'] <- 'Easting' # correct some column names
names(dat.i)[names(dat.i) == 'N'] <- 'Northing'# correct some column names
dist_travelled <- c(NA, sapply(1:(nrow(dat.i)-1), function(x){c(dist(dat.i[x:(x+1), c('Easting', 'Northing')]))}))
speed <- dist_travelled/dat.i$time
plot(Northing ~ Easting, data = dat.i, type = "p", pch = ifelse(speed > 0.1, 4, 1), col = 'black', asp = 1, axes = T, xlab = "", ylab = "", cex = 0.5)
mtext(gsub(".*/|\\.csv", "", tracks[i]), line = -2, adj = 0.05, cex = 0.8)
box()
}
#### Defecation ####
# read in the defecation data, and predict the likelihood of spore defecation as a function of time. Do this separately for the two individuals for which defecation was monitored.
poopdat <- read.csv("data/Extracted cumulative excretion from Danks 2012.csv")
nlsA <- nls(Proportion.excreted ~ SSlogis(Time..h., Asym, xmid, scal), data = poopdat[poopdat$Animal == 'A',])
nlsA.gomp <- nls(Proportion.excreted ~ SSgompertz(Time..h., Asym, xmid, scal), data = poopdat[poopdat$Animal == 'A',])
AIC(nlsA, nlsA.gomp) # gompertz is ever so slightly better
nlsB <- nls(Proportion.excreted ~ SSlogis(Time..h., Asym, xmid, scal), data = poopdat[poopdat$Animal == 'B',])
nlsB.gomp <- nls(Proportion.excreted ~ SSgompertz(Time..h., Asym, xmid, scal), data = poopdat[poopdat$Animal == 'B',]) # gompertz fails to fit.
# the logistic function provides a fine fit to the data. Use it gonig forward
# make predictions from the poop distribution
t <- 0:100
outA <- output.logis.nls(nlsA, times = t)
outB <- output.logis.nls(nlsB, times = t)
probdefA <- outA$RGRt/sum(outA$RGRt) # this is the derivative of the fitted curve with respect to time. It's scaled to a area under the curve of 1. In other words, it's an empirical probability distribution.
probdefB <- outB$RGRt/sum(outB$RGRt)
# Plot up the poop distribution.
par(mfrow = c(1, 2), bty = "L", mar = c(5, 5, 1, 1))
plot(t, probdefA, type = 'l', ylim = range(probdefA, probdefB), ylab = 'Instantaneous P(defecation)')
lines(t, probdefB, type = 'l', col = 'red')
legend('topright', inset = 0.05, legend = c('A, 21 kg male', 'B, 10 kg male'), col = 1:2, lwd = 1, title = 'Animal')
plot(Proportion.excreted ~ Time..h., data = poopdat, col = factor(Animal), ylab = 'Cumulative P(defecation)')
lines(t, outA$M, data = poopdat, col = 'black')
lines(t, outB$M, data = poopdat, col = 'red')
#### Tracking data ####
# the longest time until 100% defecation was 69 hours. therefore, we need to pull out all 69-hour-long windows of time out of the tracking data.
# Pull out all 69-hour periods out of each sample.
tracks <- list.files('data/ready', full.names = T) # this folder contains the data
max_time <- 69
output <- list()
dist_bin <- 5 # into how wide a class will dispersal distance be binned? 5m. previously was 10m. 2m is too noisy, it makes the output too jaggedy.
for(i in 1:length(tracks)){
print(tracks[i])
track <- read.csv(tracks[i])
# This next block re-formats the times, if they are present in a strange format.
if("HHMISS" %in% names(track)){
track$time <- track$HHMISS
track$date <- as.Date(paste0(0, as.character(track$YYMODD)), format = "%y%m%d")
track$hour <- unclass(track$time/(100*100) + 24 * (track$date - min (track$date)))
track <- track[track$Error == 0 & track$Modeb == 3,] # drop observations considered to be errors, or using a 2-D, rather than a 3-D postition fix
} else {
track$hour <- track$time/60 # time was in minutes. express it in decimal hours
}
track <- track[order(track$hour),]
track <- track[track$hour>1,] # drop off the first hour, to give the animal time to stop quivering and relax after being trapped.
starts.i <- track$hour[track$hour <= (max(track$hour) - max_time)]
if(length(starts.i) > 0){
output.i <- matrix(NA, ncol = length(starts.i), nrow = 1000) # this allows for dispersal distances of up to 2000 m, or 2 km. More than enough!
for(j in 1:length(starts.i)){
cat(j, " ")
track.j <- track[track$hour>=starts.i[j] & track$hour < (max_time+starts.i[j]),]
track.j$dist <-c(0, sapply(2:(nrow(track.j)), function(x){c(dist(track.j[c(1,x), c('Easting', 'Northing')]))}))
track.j$hour <- track.j$hour - min(track.j$hour)# reset the minimum to 0, to make it match the nls outputs
outA.j <- output.logis.nls(nlsA, times = track.j$hour)
outB.j <- output.logis.nls(nlsB, times = track.j$hour)
track.j$probdefA <- outA.j$RGRt/sum(outA.j$RGRt)
track.j$probdefB <- outB.j$RGRt/sum(outB.j$RGRt)
track_agg.j <- aggregate(track.j[,c("probdefA", "probdefB")], list(dist = dist_bin*round(track.j$dist/dist_bin)), sum, na.rm = T)
track_agg.j$probdefmean <- rowMeans(track_agg.j[,c("probdefA", 'probdefB')], na.rm = T)
output.i[match(track_agg.j$dist, 0:999*dist_bin),j] <- track_agg.j$probdefmean
}
output[[i]] <- output.i
} else {
output[[i]] <- NA
}
cat("\n")
}
names(output) <- c("WB5Jul16", "WB6Aug16", "WB6Oct16", "WB1Feb08", "WB2Apr08", "WB2Aug08", "WB2Jun08", "WB3Jun08", "WB4Aug08")
length(output) #9
output3 <- lapply(output, function(x){x[1:max(which(rowSums(x, na.rm = T) > 0)),]}) # this trims off any higher distances that are not used.
output3 <- output3[order(names(output3))]
str(output3)
XLIM <- dist_bin*c(0, max(sapply(output3, nrow)))
#### Summarise the predictions #####
# summarise the predictions by pulling out the total prediction per track
preds <- sapply(output, function(x){rowSums(x, na.rm = T)/(ncol(x))})
dim(preds)
mean_pred <- rowMeans(preds) # the mean prediction over the 9 tracks
preds <- preds[mean_pred > 0,]
dim(preds)
mean_pred <- rowMeans(preds) # the mean prediction over the 9 tracks
median_pred <- apply(preds, 1, median) # the mean prediction over the 9 tracks
CI <- apply(preds, 1, quantile, c(0.25, 0.75)) # the 50% CI around that estimate
preds <- data.frame(dist = dist_bin*1:nrow(preds), preds)
head(preds)
#### Summary statistics for the paper ####
# Number of location fixes, hours, and times between fixes in the combined tracking dataset ####
n_points <- numeric()
n_hours <- numeric()
all_fix_times <- numeric()
for(i in 1:length(tracks)){
track <- read.csv(tracks[i])
if("HHMISS" %in% names(track)){
track$time <- track$HHMISS
track$date <- as.Date(paste0(0, as.character(track$YYMODD)), format = "%y%m%d")
track$hour <- unclass(track$time/(100*100) + 24 * (track$date - min (track$date)))
track <- track[track$Error == 0 & track$Modeb == 3,] # drop observations considered to be errors, or useing a 2-D, rather than a 3-D postition fix
} else {
track$hour <- track$time/60 # time was in minutes. express it indecimal hours
}
track <- track[track$hour>1,] # drop off the first hour, to give the animal time to recover after being trapped.
starts.i <- track$hour[track$hour <= (max(track$hour) - max_time)]
if(length(starts.i) > 0){
n_points[i] <- nrow(track)
n_hours[i] <- max(track$hour) - min(track$hour)
all_fix_times <- c(all_fix_times, diff(sort(track$hour)))
}}
sum(n_points)
sum(n_hours)
mean(n_hours)
sd(n_hours)
summary(all_fix_times)*60
hist(all_fix_times, breaks = 100)
# distance travelled in each track
par(mfrow = c(3, 3))
for(i in 1:length(tracks)){
track <- read.csv(tracks[i])
if("HHMISS" %in% names(track)){
track$time <- track$HHMISS
track$date <- as.Date(paste0(0, as.character(track$YYMODD)), format = "%y%m%d")
track$hour <- unclass(track$time/(100*100) + 24 * (track$date - min (track$date)))
track <- track[track$Error == 0 & track$Modeb == 3,] # drop observations considered to be errors, or useing a 2-D, rather than a 3-D postition fix
} else {
track$hour <- track$time/60 # time was in minutes. express it indecimal hours
}
track <- track[track$hour>1,] # drop off the first hour
track$dist <-c(0, sapply(2:(nrow(track)), function(x){c(dist(track[c(1,x), c('Easting', 'Northing')]))}))
plot(dist ~ hour, data = track, type = "l")
print(range(track$dist))
}
#### Dispersal distances by track ####
par(mfrow = c(3, 3), mar = c(3, 3, 0.5, 0.5), pty = 'm', bty = 'L')
for(i in 1:length(output3)){
output.i <- output3[[i]]
# output.i <- output.i[-1,]
sums.i <- rowSums(output.i, na.rm = T)
plot(dist_bin*1:nrow(output.i), sums.i, xlab = 'Dispersal distance', ylab = 'Probability', xlim = c(0, 600), type = 'b')
mtext(names(output3)[i], line = -2, adj = 0.95, cex = 0.8)
}
#### Figure 1 ####
#pdf("figs/Figure 1 - dispersal probability 2020 06 10.pdf", paper = 'a4', width = 6, height = 6, useDingbats = T)
COL <- brewer.pal(8, "Accent")
par(bty = 'L', las = 1, xpd = F, las = 1, mfrow = c(1, 1))
matplot(x = preds$dist, log(0.01+preds[,-1]), type = 'l', xlim= c(0, 600), lty = 1, xlab = 'Dispersal distance (m)', ylab = 'Dispersal probability', yaxt = 'n', col = COL)
lines(x = preds$dist, log(0.01+median_pred), lwd = 2, col = 'black')
polygon(c(preds$dist, rev(preds$dist)), log(0.01+c(CI[1,], rev(CI[2,]))), col = '#00000020', border = NA)
legend('topright', col = COL, legend = names(output3), lwd = 1, title = 'Track', ncol = 2, cex = 0.8 ,inset = 0.1)
axis(2, at = log(0.01+ c(0, 0.02, 0.05, 0.1, 0.2, 0.4)), labels = c(0, 0.02, 0.05, 0.1, 0.2, 0.4))
mtext('Figure 1', line = 3, family = 'Times', adj = 0)
dev.off()
# Modal dispersal distance
preds$dist[which.max(median_pred)] # 25m
# maximal dispersal distance
max(preds$dist[median_pred > 0]) # 1265m
#### Figure 2 ####
#pdf("figs/Figure 2 - cumulative dispersal probability 2020 06 10.pdf", paper = 'a4', width = 6, height = 4, useDingbats = T)
par(bty = 'L', las = 1, xpd = F, mfrow = c(1, 1))
plot(preds$dist, cumsum(mean_pred), ylab = 'Cumulative dispersal probability', xlab = 'Dispersal distance (m)', type = 'l', lwd = 2, xlim = c(0, 600))
abline(v = preds$dist[which(cumsum(mean_pred) > 0.25)[1]], col = 'red', lty = 2) # 25 % of spores go less than 45m
abline(v = preds$dist[which(cumsum(mean_pred) > 0.50)[1]], col = 'red') # 50 % of spores go less than 110m
abline(v = preds$dist[which(cumsum(mean_pred) > 0.75)[1]], col = 'red', lty = 2) # 75 % of spores go less than 200m
abline(v = preds$dist[which(cumsum(mean_pred) > 0.95)[1]], col = 'red', lty = 3) # 95 % of spores go less than 450m. (ie, 5% go further)
mtext('25%', at = preds$dist[which(cumsum(mean_pred) > 0.25)[1]])
mtext('50%', at = preds$dist[which(cumsum(mean_pred) > 0.50)[1]])
mtext('75%', at = preds$dist[which(cumsum(mean_pred) > 0.75)[1]])
mtext('95%', at = preds$dist[which(cumsum(mean_pred) > 0.95)[1]])
mtext('Figure 2', line = 3, family = 'Times', adj = 0)
dev.off()