#StackBounty: #hierarchical-bayesian #beta-distribution #jags #dirichlet-distribution Trouble specifying a hierarchical dirichlet model…

Bounty: 50

I have a sampling design where samples (cores) are taken within plots. Those plots are then nested within sites. There are multiple sites. I would like to get a hierarchical site-level estimate of observation y, observed at the core level. Furthermore, my y observation is a multivariate relative abundance. It is the relative abundance of 3 species, and the total of those 3 relative abundances always sums to 1. Here is R code to simulate data, dat like this:

#1. generate pseudo data.----
#3 species relative abundances across 4 sites.
spp1 <- c(0.2,0.8,0.5,0.1)
spp2 <- c(0.3,0.1,0.4,0.1)
spp3 <- 1 - (spp1 + spp2)
y <- data.frame(spp1,spp2,spp3)
y <- as.matrix(y)
site_sd <- 0.02
truth <- y

#get some numbevr of pltos and cores within plots.
n.site <- nrow(y)
n.plot <- 15
n.core <- 10

#simulate from dirichlet distribution.
site.list <- list()
for(i in 1:n.site){
  plot.list <- list()
  for(j in 1:n.plot){
    cores <- DirichletReg::rdirichlet(n.core,y[i,])
    cores <- data.frame(cbind(rep(LETTERS[i],n.core),rep(LETTERS[j],n.core),LETTERS[1:n.core],cores))
    colnames(cores) <- c('siteID','plotID','coreID','spp1','spp2','spp3')
    plot.list[[j]] <- cores
  }
  site.list[[i]] <- do.call(rbind,plot.list)
}
dat <- do.call(rbind,site.list)
dat[,4:6] <- sapply(dat[4:6],as.character)
dat[,4:6] <- sapply(dat[4:6],as.numeric)
y <- dat[,4:6]

#core_plot, plot_site factors.
dat$plotID <- paste0(dat$siteID,'_',dat$plotID)
core_site = dat$siteID
core_plot = dat$plotID
plot_site = dat[seq(1, nrow(dat), 3), ]$siteID

Ignoring plots, I can get site level means using the following JAGS model:

#JAGS model for site only case.----
jags.model1 = "
model {
#get site level means.
for(i in 1:N.core){
  for(j in 1:N.spp){
    log(core.hat[i,j]) <- site_mu[core_site[i],j]
  }
  y[i,1:N.spp] ~ ddirch(core.hat[i,1:N.spp]) 
}
#prior
for(i in 1:N.site){
  for(j in 1:N.spp){
    site_mu[i,j] ~ dnorm(0,1E-3)
  }
}
}" #end jags model.

#JAGS data for site only case.----
jd1 <- list(y=as.matrix(y), N.site = n.site, N.core = nrow(y), N.spp = ncol(y), 
           core_site = as.factor(core_site))

#4. Run JAGS model.----
test <- run.jags(model = jags.model1,
                 data = jd1,
                 n.chains = 3,
                 monitor = c('site_mu'),
                 adapt = 200,
                 burnin = 1000,
                 sample = 1000)

This model fits well, as evidenced by the following plot of predicted vs. observed:

out <- summary(test)
spp.sum <- matrix(out[,4], nrow = n.site, ncol = ncol(y))
spp.sum <- boot::inv.logit(spp.sum) / rowSums(boot::inv.logit(spp.sum))
plot(as.vector(truth) ~ as.vector(spp.sum));mod<-lm(as.vector(truth) ~ as.vector(spp.sum));abline(mod)
mtext(paste0('R2=',round(summary(mod)$r.squared,3)), side = 3, adj = 0.05, line = -2)

enter image description here

This begins to fail when I add an intermediate plot-level hierarchy. Here is the JAGS model with a plot-level hierarchy as well as code to specify a JAGS data object and fit the model:

# JAGS model for hierarchical plot-site case.----
jags.model2 = "
model {
#get plot level means.
for(i in 1:N.core){
  for(j in 1:N.spp){
    log(core.hat[i,j]) <- plot_mu[core_plot[i],j]
  }
  y[i,1:N.spp] ~ ddirch(core.hat[i,1:N.spp]) 
}
#get site level means.
for(i in 1:N.plot){
  for(j in 1:N.spp){
    log(plot.hat[i,j]) <- site_mu[plot_site[i],j]
  }
  plot_mu[i,1:N.spp] ~ ddirch(plot.hat[i,1:N.spp])
}

#prior
for(i in 1:N.site){
  for(j in 1:N.spp){
    site_mu[i,j] ~ dnorm(0,1E-3)
  }
}
}" #end jags model.

#JAGS data for hierarchical plot-site case.----
jd2 <- list(y=as.matrix(y), N.site = n.site, N.plot =  length(plot_site), N.core = nrow(y), N.spp = ncol(y), 
            core_plot = as.factor(core_plot), plot_site=as.factor(plot_site))

#Run hierarchical plot-site model.----
test <- run.jags(model = jags.model2,
                 data = jd2,
                 n.chains = 3,
                 monitor = c('site_mu'),
                 adapt = 200,
                 burnin = 1000,
                 sample = 1000)

This model does not converge, and does not capture the site-level means. The correlation between predicted vs. observed is actually negative:

out <- summary(test)
spp.sum <- matrix(out[,4], nrow = n.site, ncol = ncol(y))
spp.sum <- boot::inv.logit(spp.sum) / rowSums(boot::inv.logit(spp.sum))
spp.sum;truth
plot(as.vector(truth) ~ as.vector(spp.sum));mod<-lm(as.vector(truth) ~ as.vector(spp.sum));abline(mod)
mtext(paste0('R2=',round(summary(mod)$r.squared,3)), side = 3, adj = 0.05, line = -2)

enter image description here

My question is: Where am I going wrong in the hierarchical model? How can I capture thee true site-level means while accounting for the hierarchical sampling design?


Get this bounty!!!

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.