The code immediately below imports the data, manipulates the data, and adds some labels to some of the variables. Here, we’ll also use the AICcmodavg package in the model-selection work.
library(RMark) # for converting the input file
library(AICcmodavg) # for model-selection
library(ggplot2) # for plotting
library(visreg) # an alternative plotting package
# bring in the data file
fawns <- convert.inp("http://www.montana.edu/rotella/documents/502/lab02-fawns.inp",
covariates = c("area", "sex", "mass", "length"))
# add "length.sq" variable for use in quadratic models
fawns$length.sq <- (fawns$length)^2
head(fawns)
## ch freq area sex mass length length.sq
## 1 11 1 1 0 34.0 126.0 15876.00
## 2 11 1 1 0 33.4 128.5 16512.25
## 3 11 1 1 0 29.9 130.0 16900.00
## 4 11 1 1 0 31.0 113.0 12769.00
## 5 11 1 1 0 26.7 116.0 13456.00
## 6 11 1 1 0 23.9 124.0 15376.00
# convert area & sex variables to factors so that we can treat them as
# grouping variables in the analysis to ease working with output later
fawns$area <- factor(fawns$area,
levels = c(0, 1),
labels = c("control", "treatment"))
fawns$sex <- factor(fawns$sex,
levels = c(0, 1),
labels = c("female", "male"))
# Store versions of fate for use in final plotting
# first a factor version that has nice labels
fawns$fate <- factor(as.numeric(fawns$ch),
levels = c(11, 10),
labels = c("died", "lived"))
# second, a numeric version that's 0 = dead, 1 = lived
fawns$Fate <- 2 - as.numeric(fawns$fate)
summary(fawns)
## ch freq area sex mass
## Length:115 Min. :1 control :59 female:57 Min. :22.80
## Class :character 1st Qu.:1 treatment:56 male :58 1st Qu.:31.90
## Mode :character Median :1 Median :33.60
## Mean :1 Mean :34.38
## 3rd Qu.:1 3rd Qu.:36.60
## Max. :1 Max. :72.00
## length length.sq fate Fate
## Min. :108.0 Min. :11664 died :68 Min. :0.0000
## 1st Qu.:120.0 1st Qu.:14400 lived:47 1st Qu.:0.0000
## Median :124.0 Median :15376 Median :1.0000
## Mean :123.2 Mean :15217 Mean :0.5913
## 3rd Qu.:127.0 3rd Qu.:16129 3rd Qu.:1.0000
## Max. :135.5 Max. :18360 Max. :1.0000
# we know from other work on this dataset that there is 1 record with
# a very high value for mass, which we want to drop
fawns2 = fawns[fawns$mass < 70, ]
summary(fawns2)
## ch freq area sex mass
## Length:114 Min. :1 control :58 female:57 Min. :22.80
## Class :character 1st Qu.:1 treatment:56 male :57 1st Qu.:31.85
## Mode :character Median :1 Median :33.55
## Mean :1 Mean :34.05
## 3rd Qu.:1 3rd Qu.:36.58
## Max. :1 Max. :45.60
## length length.sq fate Fate
## Min. :108.0 Min. :11664 died :67 Min. :0.0000
## 1st Qu.:120.0 1st Qu.:14400 lived:47 1st Qu.:0.0000
## Median :124.0 Median :15376 Median :1.0000
## Mean :123.3 Mean :15232 Mean :0.5877
## 3rd Qu.:127.0 3rd Qu.:16129 3rd Qu.:1.0000
## Max. :135.5 Max. :18360 Max. :1.0000
The dataset now contains information on 114 fawns. Because we worked on the summarizing and evaluating the covariate information in an earlier exercise, we won’t do that work here and will get right to the analyses.
Here, we run the same set of models that were run in lab in MARK. The code below shows how to do that in glm in R.
S.dot <- glm(fate ~ 1, data = fawns2, family = binomial)
S.area <- glm(fate ~ area, data = fawns2, family = binomial)
S.mass <- glm(fate ~ mass, data = fawns2, family = binomial)
S.length <- glm(fate ~ length, data = fawns2, family = binomial)
S.length.sq <- glm(fate ~ length + length.sq, data = fawns2, family = binomial)
S.sex <- glm(fate ~ sex, data = fawns2, family = binomial)
S.area.length <- glm(fate ~ area + length, data = fawns2, family = binomial)
S.length.mass <- glm(fate ~ length + mass, data = fawns2, family = binomial)
# sex-specific intercepts & common slope
S.sex.length <- glm(fate ~ sex + length, data = fawns2, family = binomial)
# 1 intercept & sex-specific slopes
S.sex.by.length <- glm(fate ~ sex:length, data = fawns2, family = binomial)
# sex-specific intercepts & slopes
S.sex.x.length <- glm(fate ~ sex * length, data = fawns2, family = binomial)
##set up named list
Cand.mods <- list("S.dot" = S.dot,
"S.area" = S.area,
"S.mass" = S.mass,
"S.length" = S.length,
"S.length.sq" = S.length.sq,
"S.sex" = S.sex,
"S.area.length" = S.area.length,
"S.length.mass" = S.length.mass,
"S.sex.length" = S.sex.length,
"S.sex.by.length" = S.sex.by.length,
"S.sex.x.length" = S.sex.x.length)
##compute table
AICc.table <- aictab(cand.set = Cand.mods, second.ord = TRUE)
AICc.table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## S.sex.x.length 4 148.53 0.00 0.36 0.36 -70.08
## S.sex.by.length 3 149.12 0.58 0.27 0.63 -71.45
## S.sex.length 3 149.47 0.93 0.23 0.86 -71.62
## S.length.sq 3 152.11 3.57 0.06 0.92 -72.94
## S.length 2 154.08 5.55 0.02 0.95 -74.99
## S.sex 2 154.21 5.67 0.02 0.97 -75.05
## S.area.length 3 156.16 7.62 0.01 0.98 -74.97
## S.length.mass 3 156.19 7.66 0.01 0.98 -74.99
## S.mass 2 156.39 7.86 0.01 0.99 -76.14
## S.dot 1 156.55 8.01 0.01 1.00 -77.26
## S.area 2 158.50 9.97 0.00 1.00 -77.19
# make prediction data frame
dat.pr = expand.grid(sex = c("male", "female"),
length = seq(min(fawns2$length), max(fawns2$length), by = 1))
head(dat.pr)
## sex length
## 1 male 108
## 2 female 108
## 3 male 109
## 4 female 109
## 5 male 110
## 6 female 110
tail(dat.pr)
## sex length
## 51 male 133
## 52 female 133
## 53 male 134
## 54 female 134
## 55 male 135
## 56 female 135
# make predictions on log-odds scale
pred = predict(S.sex.x.length, newdata = dat.pr,
type = c("link"), se.fit = TRUE)
# calculate predicted values and 95% CIS on real parameter scale
dat.pr$fit = plogis(pred$fit)
dat.pr$lci.fit = plogis(pred$fit - 1.96 * pred$se.fit)
dat.pr$uci.fit = plogis(pred$fit + 1.96 * pred$se.fit)
# print table of covariate values and associated predicted values
head(dat.pr)
## sex length fit lci.fit uci.fit
## 1 male 108 0.20587023 0.037829586 0.6309043
## 2 female 108 0.06609841 0.009125851 0.3522928
## 3 male 109 0.21167362 0.043203689 0.6148940
## 4 female 109 0.07854093 0.012462779 0.3653520
## 5 male 110 0.21759578 0.049271442 0.5987884
## 6 female 110 0.09309219 0.016984997 0.3788089
tail(dat.pr)
## sex length fit lci.fit uci.fit
## 51 male 133 0.3842270 0.1762942 0.6452832
## 52 female 133 0.8806891 0.6178723 0.9711793
## 53 male 134 0.3925727 0.1679352 0.6742148
## 54 female 134 0.8988816 0.6319434 0.9787341
## 55 male 135 0.4009817 0.1595789 0.7023705
## 56 female 135 0.9145693 0.6454935 0.9843607
# make a plot
p <- ggplot(dat.pr, aes(x = length, y = fit, group = sex)) +
geom_line(size = 1.5, aes(colour = sex)) +
geom_ribbon(aes(ymin = lci.fit, ymax = uci.fit), alpha = 0.2) +
scale_colour_brewer(palette = "Set1") +
theme(legend.position = c(0.25, 0.75),
legend.justification = c(1, 0)) +
xlab("Body length (cm)") +
ylab("Estimated Survival Rate") +
geom_jitter(data = fawns2, aes(y = Fate, color = sex),
height = 0.025)
# print the plot
p
You can also use the ‘visreg’ package to get a plot quite simply. With this method, you don’t have to go to the trouble of making a prediction data.frame. This can be good (it’s fast) and bad (you might not be as sure of what happened!). I do like turning the ‘rug’ on: this shows you the observed values of the covariate. Here you see where you have mass observations for each sex of fawn and you see what the fates for each sex. The first plot presents estimated log-odds and the second plot presents estimated survival rates.
visreg(S.sex.x.length, "length", by = "sex", rug = 2)
visreg(S.sex.x.length, "length", by = "sex", scale = "response", rug = 2)