#StackBounty: #r #correlation #statistical-significance #biostatistics #lags Interpreting differing results from correlation plots, cor…

Bounty: 50

I’m trying to get some advice on the results of the following statistical tests. I’m including the data and the code for said steps for clarity. I’m trying to analyze whether a strong YOY (young of the year) class translates to a higher adult abundance. And if so, is it only lagged by one year or some other amount. I have data that I believe the code below it sets it up for a one year lag analysis.

My questions are:
Did I set this up correctly?
If yes, then why do all the results differ?

Here is the data …

> dput(as.data.frame(wi.age.count2))
structure(list(Year = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("2007", 
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018"), class = "factor"), Age = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 1, 2, 3, 0, 1, 2, 3, 4, 5, 
6, 1, 2, 3, 4, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 
6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 
2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 9, 0, 1, 2, 3, 4, 5, 6, 
0, 1, 2, 3, 4, 5, 6, 7), n = c(130L, 28L, 34L, 77L, 170L, 18L, 
3L, 22L, 43L, 50L, 151L, 1L, 8L, 17L, 1L, 4L, 19L, 1L, 1L, 46L, 
37L, 52L, 5L, 1L, 1L, 19L, 41L, 15L, 16L, 1L, 1L, 13L, 4L, 26L, 
12L, 11L, 1L, 1L, 1L, 1L, 87L, 15L, 13L, 27L, 13L, 17L, 1L, 1L, 
32L, 30L, 3L, 4L, 1L, 1L, 1L, 1L, 24L, 15L, 23L, 6L, 2L, 1L, 
2L, 2L, 4L, 18L, 13L, 31L, 28L, 3L, 3L, 6L, 1L, 4L, 6L, 1L, 5L, 
9L, 1L, 1L, 1L, 16L, 16L, 8L, 1L, 1L, 4L, 1L, 12L, 4L, 7L, 2L, 
1L, 2L, 1L), id = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Adult", "YOY"), class = "factor"), 
    Cohort = c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
    2016, 2017, 2018, 2007, 2006, 2005, 2002, 2007, 2006, 2005, 
    2009, 2008, 2007, 2006, 2005, 2004, 2003, 2009, 2008, 2007, 
    2006, 2001, 2011, 2010, 2009, 2008, 2007, 2006, 2005, 2004, 
    2003, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2005, 2013, 
    2012, 2011, 2010, 2009, 2008, 2007, 2006, 2014, 2013, 2012, 
    2011, 2010, 2009, 2008, 2007, 2006, 2015, 2014, 2013, 2012, 
    2011, 2010, 2009, 2008, 2006, 2015, 2014, 2013, 2012, 2011, 
    2010, 2007, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2018, 
    2017, 2016, 2015, 2014, 2013, 2012, 2011)), row.names = c(NA, 
-95L), class = "data.frame")

And the code to transform it to a matrix lagged by one year. (So 2008 Juveniles are being compared to 2009 1 year old adults … and so on…) This code also runs the linear model and the correlation matrix.

Years = sort(unique(as.numeric(wi.age.count2$Year)))
ivs = data.frame(Year = NA)
for (i in 1:(length(Years)-1)) {
  ivs$dum = NA
  names(ivs)[ncol(ivs)] = paste("n.", i, sep="")
}

i = 0
while (i < length(Years)) {
  i = i + 1
  tmp = data.frame(Year = Years[i])
  j = i
  while (j < length(Years)) {
    j = j + 1
    tmp$dum = 0
if (nrow(wi.age.count2[which(wi.age.count2$Year == Years[j] & wi.age.count2$Age == Years[j] - Years[i]), ]) > 0) {
  tmp$dum = wi.age.count2[which(wi.age.count2$Year == Years[j] & wi.age.count2$Age == Years[j] - Years[i]), ]$n
}
names(tmp)[ncol(tmp)] = paste("n.", j - i, sep="")
  }
  k = 0
  while (k < i - 1) {
k = k + 1
tmp$dum = NA
    names(tmp)[ncol(tmp)] = paste("n.", j - i + k, sep="")
  }
  ivs = rbind(ivs, tmp)
}
ivs = ivs[-1, ]
ivs = ivs[-(nrow(ivs)), ]
ivs[is.na(ivs)] = 0

dv = wi.age.count2[which(wi.age.count2$id == "YOY"), c(1, 3)]

formula = ""
for (i in 2:4) formula = paste(formula, "+",  names(ivs)[i])
formula = paste("n ~", substr(formula, 4, nchar(formula)))
ivs$Year <- as.character(ivs$Year)
dv$Year <- as.character(dv$Year)
mat.age <- merge(dv, ivs)
l.fit = lm(formula, mat.age)
AIC.l.fit <- signif(AIC(l.fit), digits = 3)
summary(l.fit)
#plot(l.fit)

cor(mat.age[,2:9], method = "spearman", use="pairwise.complete.obs")

They summary from the linear model seems to disagree with the correlation matrix. And further neither one seems to agree with just plotting the numbers of adults by each year over the number of juveniles to visualize correlation as seen below.

enter image description here


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.