Lab 02 in R and R Markdown

Bring in the dataset and manipulate it slightly

The code immediately below imports the data, manipulates the data, and adds some labels to some of the variables. Here, we’ll also use the AICcmodavg package in the model-selection work.

library(RMark) # for converting the input file
library(AICcmodavg) # for model-selection
library(ggplot2) # for plotting
library(visreg) # an alternative plotting package

# bring in the data file
fawns <- convert.inp("http://www.montana.edu/rotella/documents/502/lab02-fawns.inp",
                     covariates = c("area", "sex", "mass", "length"))

# add "length.sq" variable for use in quadratic models
fawns$length.sq <- (fawns$length)^2
head(fawns)

##   ch freq area sex mass length length.sq
## 1 11    1    1   0 34.0  126.0  15876.00
## 2 11    1    1   0 33.4  128.5  16512.25
## 3 11    1    1   0 29.9  130.0  16900.00
## 4 11    1    1   0 31.0  113.0  12769.00
## 5 11    1    1   0 26.7  116.0  13456.00
## 6 11    1    1   0 23.9  124.0  15376.00

# convert area & sex variables to factors so that we can treat them as 
# grouping variables in the analysis to ease working with output later
fawns$area <- factor(fawns$area, 
                     levels = c(0, 1), 
                     labels = c("control", "treatment"))

fawns$sex  <- factor(fawns$sex,
                     levels = c(0, 1),
                     labels = c("female", "male"))

# Store versions of fate for use in final plotting
# first a factor version that has nice labels
fawns$fate <- factor(as.numeric(fawns$ch), 
                     levels = c(11, 10),
                     labels = c("died", "lived"))
# second, a numeric version that's 0 = dead, 1 = lived
fawns$Fate <- 2 - as.numeric(fawns$fate)

summary(fawns)

##       ch                 freq          area        sex          mass      
##  Length:115         Min.   :1   control  :59   female:57   Min.   :22.80  
##  Class :character   1st Qu.:1   treatment:56   male  :58   1st Qu.:31.90  
##  Mode  :character   Median :1                              Median :33.60  
##                     Mean   :1                              Mean   :34.38  
##                     3rd Qu.:1                              3rd Qu.:36.60  
##                     Max.   :1                              Max.   :72.00  
##      length        length.sq        fate         Fate       
##  Min.   :108.0   Min.   :11664   died :68   Min.   :0.0000  
##  1st Qu.:120.0   1st Qu.:14400   lived:47   1st Qu.:0.0000  
##  Median :124.0   Median :15376              Median :1.0000  
##  Mean   :123.2   Mean   :15217              Mean   :0.5913  
##  3rd Qu.:127.0   3rd Qu.:16129              3rd Qu.:1.0000  
##  Max.   :135.5   Max.   :18360              Max.   :1.0000

# we know from other work on this dataset that there is 1 record with 
#  a very high value for mass, which we want to drop
fawns2 = fawns[fawns$mass < 70, ]
summary(fawns2)

##       ch                 freq          area        sex          mass      
##  Length:114         Min.   :1   control  :58   female:57   Min.   :22.80  
##  Class :character   1st Qu.:1   treatment:56   male  :57   1st Qu.:31.85  
##  Mode  :character   Median :1                              Median :33.55  
##                     Mean   :1                              Mean   :34.05  
##                     3rd Qu.:1                              3rd Qu.:36.58  
##                     Max.   :1                              Max.   :45.60  
##      length        length.sq        fate         Fate       
##  Min.   :108.0   Min.   :11664   died :67   Min.   :0.0000  
##  1st Qu.:120.0   1st Qu.:14400   lived:47   1st Qu.:0.0000  
##  Median :124.0   Median :15376              Median :1.0000  
##  Mean   :123.3   Mean   :15232              Mean   :0.5877  
##  3rd Qu.:127.0   3rd Qu.:16129              3rd Qu.:1.0000  
##  Max.   :135.5   Max.   :18360              Max.   :1.0000

The dataset now contains information on 114 fawns. Because we worked on the summarizing and evaluating the covariate information in an earlier exercise, we won’t do that work here and will get right to the analyses.

Analyze the data

Here, we run the same set of models that were run in lab in MARK. The code below shows how to do that in glm in R.

S.dot <- glm(fate ~ 1, data = fawns2, family = binomial)
S.area <- glm(fate ~ area, data = fawns2, family = binomial)
S.mass <- glm(fate ~ mass, data = fawns2, family = binomial)
S.length <- glm(fate ~ length, data = fawns2, family = binomial)
S.length.sq <- glm(fate ~ length + length.sq, data = fawns2, family = binomial)
S.sex <- glm(fate ~ sex, data = fawns2, family = binomial)
S.area.length <- glm(fate ~ area + length, data = fawns2, family = binomial)
S.length.mass <- glm(fate ~ length + mass, data = fawns2, family = binomial)
# sex-specific intercepts & common slope
S.sex.length <- glm(fate ~ sex + length, data = fawns2, family = binomial)
# 1 intercept & sex-specific slopes
S.sex.by.length <- glm(fate ~ sex:length, data = fawns2, family = binomial)
# sex-specific intercepts & slopes
S.sex.x.length <- glm(fate ~ sex * length, data = fawns2, family = binomial)

##set up named list
Cand.mods <- list("S.dot" = S.dot, 
                  "S.area" = S.area, 
                  "S.mass" = S.mass, 
                  "S.length" = S.length,
                  "S.length.sq" = S.length.sq, 
                  "S.sex" = S.sex,
                  "S.area.length" = S.area.length,
                  "S.length.mass" = S.length.mass, 
                  "S.sex.length" = S.sex.length,
                  "S.sex.by.length" = S.sex.by.length,
                  "S.sex.x.length" = S.sex.x.length)

##compute table
AICc.table <- aictab(cand.set = Cand.mods, second.ord = TRUE)
AICc.table

## 
## Model selection based on AICc:
## 
##                 K   AICc Delta_AICc AICcWt Cum.Wt     LL
## S.sex.x.length  4 148.53       0.00   0.36   0.36 -70.08
## S.sex.by.length 3 149.12       0.58   0.27   0.63 -71.45
## S.sex.length    3 149.47       0.93   0.23   0.86 -71.62
## S.length.sq     3 152.11       3.57   0.06   0.92 -72.94
## S.length        2 154.08       5.55   0.02   0.95 -74.99
## S.sex           2 154.21       5.67   0.02   0.97 -75.05
## S.area.length   3 156.16       7.62   0.01   0.98 -74.97
## S.length.mass   3 156.19       7.66   0.01   0.98 -74.99
## S.mass          2 156.39       7.86   0.01   0.99 -76.14
## S.dot           1 156.55       8.01   0.01   1.00 -77.26
## S.area          2 158.50       9.97   0.00   1.00 -77.19

Plot predictions from top model

# make prediction data frame
dat.pr = expand.grid(sex = c("male", "female"),
                     length = seq(min(fawns2$length), max(fawns2$length), by = 1))
head(dat.pr)

##      sex length
## 1   male    108
## 2 female    108
## 3   male    109
## 4 female    109
## 5   male    110
## 6 female    110

tail(dat.pr)

##       sex length
## 51   male    133
## 52 female    133
## 53   male    134
## 54 female    134
## 55   male    135
## 56 female    135

# make predictions on log-odds scale
pred = predict(S.sex.x.length, newdata = dat.pr,
               type = c("link"), se.fit = TRUE)

# calculate predicted values and 95% CIS on real parameter scale
dat.pr$fit = plogis(pred$fit)
dat.pr$lci.fit = plogis(pred$fit - 1.96 * pred$se.fit)
dat.pr$uci.fit = plogis(pred$fit + 1.96 * pred$se.fit)

#  print table of covariate values and associated predicted values
head(dat.pr)

##      sex length        fit     lci.fit   uci.fit
## 1   male    108 0.20587023 0.037829586 0.6309043
## 2 female    108 0.06609841 0.009125851 0.3522928
## 3   male    109 0.21167362 0.043203689 0.6148940
## 4 female    109 0.07854093 0.012462779 0.3653520
## 5   male    110 0.21759578 0.049271442 0.5987884
## 6 female    110 0.09309219 0.016984997 0.3788089

tail(dat.pr)

##       sex length       fit   lci.fit   uci.fit
## 51   male    133 0.3842270 0.1762942 0.6452832
## 52 female    133 0.8806891 0.6178723 0.9711793
## 53   male    134 0.3925727 0.1679352 0.6742148
## 54 female    134 0.8988816 0.6319434 0.9787341
## 55   male    135 0.4009817 0.1595789 0.7023705
## 56 female    135 0.9145693 0.6454935 0.9843607

# make a plot
p <- ggplot(dat.pr, aes(x = length, y = fit, group = sex)) +
  geom_line(size = 1.5, aes(colour = sex)) +
  geom_ribbon(aes(ymin = lci.fit, ymax = uci.fit), alpha = 0.2) +
  scale_colour_brewer(palette = "Set1") +
  theme(legend.position = c(0.25, 0.75),
  legend.justification = c(1, 0)) +
  xlab("Body length (cm)") +
  ylab("Estimated Survival Rate") + 
  geom_jitter(data = fawns2, aes(y = Fate, color = sex),
              height = 0.025)
  # print the plot
p

You can also use the ‘visreg’ package to get a plot quite simply. With this method, you don’t have to go to the trouble of making a prediction data.frame. This can be good (it’s fast) and bad (you might not be as sure of what happened!). I do like turning the ‘rug’ on: this shows you the observed values of the covariate. Here you see where you have mass observations for each sex of fawn and you see what the fates for each sex. The first plot presents estimated log-odds and the second plot presents estimated survival rates.

visreg(S.sex.x.length, "length", by = "sex", rug = 2)

visreg(S.sex.x.length, "length", by = "sex", scale = "response", rug = 2)

Lab 02 in R and R Markdown

WILD 502 - Jay Rotella

Bring in the dataset and manipulate it slightly

Analyze the data

Plot predictions from top model