#StackBounty: #javascript #r #web-scraping #rvest #rselenium scraping asp javascript paginated tables behind search with R

Bounty: 500

i’m trying to pull the content on https://www.askebsa.dol.gov/epds/default.asp with either rvest or RSelenium but not finding guidance when the javascript page begins with a search box? it’d be great to just get all of this content into a simple CSV file.

after that, pulling the data from individual filings like https://www.askebsa.dol.gov/mewaview/View/Index/6219 seems possible.. but i’d also appreciate a clean recommendation to do that. thanks


Get this bounty!!!

#StackBounty: #r #ggplot2 #heatmap How can I plot two geom_tile next to each-other so they align as in a Heatmap, using ggplot2?

Bounty: 50

I would like to plot a Heatmap including coloured annotation bars. A bit of background to the data.

I have simplified the example data below.

I have patient id’s, and a numerical measurement (value_mean) that I would like to plot per patient per “emm_type” in the form of a Heatmap. Each “emm_type” falls into a “cluster” and a “pattern”. So I would like the Heatmap to include a coloured panel delineating these variables aligned with their respective emm_type.

Here is s sample of my data

> dput(example)
structure(list(id = c("RF0475", "RF0504", "RF0475", "RF0504", 
"RF0475", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475", 
"RF0475", "RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0475", 
"RF0475", "RF0475", "RF0475", "RF0475", "RF0504", "RF0504", "RF0504", 
"RF0504", "RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0475", 
"RF0504", "RF0504", "RF0475", "RF0475", "RF0475", "RF0475", "RF0475", 
"RF0504", "RF0504", "RF0504", "RF0504", "RF0504", "RF0475", "RF0475", 
"RF0475", "RF0475", "RF0475", "RF0504", "RF0504", "RF0504", "RF0504", 
"RF0504", "RF0475", "RF0475", "RF0475", "RF0475", "RF0504", "RF0504", 
"RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475", 
"RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504"
), cluster = c("a-c2", "a-c2", "a-c3", "a-c3", "a-c4", "a-c4", 
"a-c5", "a-c5", "d1", "d1", "d2", "d2", "d2", "d2", "d3", "d3", 
"d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", 
"d4", "e1", "e1", "e2", "e2", "e2", "e2", "e3", "e3", "e3", "e3", 
"e3", "e3", "e3", "e3", "e3", "e3", "e4", "e4", "e4", "e4", "e4", 
"e4", "e4", "e4", "e4", "e4", "e6", "e6", "e6", "e6", "e6", "e6", 
"e6", "e6", "m19", "m19", "m218", "m218", "m233", "m233", "m6", 
"m6", "m74", "m74", "m95", "m95"), pattern = c("a-c", "a-c", 
"a-c", "a-c", "a-c", "a-c", "a-c", "a-c", "d", "d/a-c", "d", 
"e", "d", "e", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d", 
"d", "d", "d", "d", "e", "e", "e", "e", "e", "e", "e", "e", "e", 
"e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", 
"e", "e", "e", "e", "e", "d", "e", "d", "e", "d", "e", "d", "a-c", 
"a-c", "d/a-c", "d/a-c", "a-c", "a-c", "a-c", "a-c", "d", "d", 
"d", "d"), value_mean = c(1.82898259773807, 2.74970378862732, 
2.31836858483114, 1.76297558336274, 6.99379366342489, 2.15775104765085, 
9.81401417902465, 5.94493622813449, 6.42938334280903, 4.93258400244736, 
4.42293379133012, 35.7119300124525, 85.8843942732351, 6.11004188703959, 
4.46626647704635, 5.06748534630747, 2.34493589810343, 3.67864160152857, 
3.49413303648271, 4.54325723822265, 11.6241914407818, 6.52797483395025, 
2.29277958694861, 7.80004526681732, 2.69910122940354, 3.51802243804242, 
6.70909678383865, 4.99681912787639, 5.54367727879201, 9.26383310897086, 
4.57249586682161, 4.47787503848692, 12.3177425173967, 15.4240417229311, 
4.14187570530094, 32.2447795214283, 2.8171424279428, 3.62644580807153, 
79.8173447817745, 2.86868514917333, 4.13675844930625, 2.89891922608397, 
120, 5.07500759868863, 3.31961544500323, 9.76557528920087, 4.93060063573198, 
4.65192299498109, 66.3579869162384, 2.22596680234449, 5.70995502095345, 
4.26850758713846, 120, 25.6383266263976, 2.90543208425715, 8.40935809851042, 
2.31807635931822, 8.49055234623605, 3.29831448162297, 3.65068984963035, 
1.93567603146573, 2.49808722814557, 3.14095440681389, 2.08508075133288, 
3.08360524948663, 1.74613534854807, 1.91624362373354, 3.797786602908, 
3.06755845905157, 3.11530841942899, 2.06455239407449, 1.71396244231883, 
5.7985222607316, 3.74822367820585), group = c("case", "control", 
"case", "control", "case", "control", "case", "control", "case", 
"control", "case", "case", "control", "control", "case", "control", 
"case", "case", "case", "case", "case", "case", "control", "control", 
"control", "control", "control", "control", "case", "control", 
"case", "case", "control", "control", "case", "case", "case", 
"case", "case", "control", "control", "control", "control", "control", 
"case", "case", "case", "case", "case", "control", "control", 
"control", "control", "control", "case", "case", "case", "case", 
"control", "control", "control", "control", "case", "control", 
"case", "control", "case", "control", "case", "control", "case", 
"control", "case", "control"), emm_type = structure(c(1L, 1L, 
2L, 3L, 4L, 5L, 6L, 6L, 7L, 8L, 9L, 11L, 9L, 11L, 12L, 12L, 13L, 
15L, 17L, 19L, 21L, 23L, 13L, 15L, 17L, 19L, 21L, 23L, 24L, 24L, 
25L, 27L, 26L, 28L, 29L, 31L, 33L, 35L, 37L, 29L, 31L, 33L, 35L, 
37L, 38L, 40L, 42L, 44L, 46L, 38L, 40L, 42L, 44L, 46L, 47L, 49L, 
51L, 53L, 47L, 49L, 51L, 53L, 54L, 54L, 55L, 55L, 56L, 56L, 57L, 
57L, 58L, 58L, 59L, 59L), .Label = c("197", "1", "238.1", "12", 
"39.4", "3.1", "36.2", "54.1", "71", "100", "104", "123", "33", 
"41.2", "52", "53", "86", "91", "93.4", "101", "108.1", "116.1", 
"225", "4", "68", "76", "90.5", "92", "25", "44", "49", "58", 
"82", "87", "103", "113", "118", "2", "8", "22", "28", "77", 
"88", "89", "114", "232.1", "11", "42", "59.1", "65", "75", "81", 
"85", "19.4", "218.1", "233", "6", "74", "95"), class = "factor", scores = structure(c(`1` = 2, 
`2` = 12, `3.1` = 4, `4` = 9, `6` = 17, `8` = 12, `11` = 13, 
`12` = 3, `19.4` = 14, `22` = 12, `25` = 11, `28` = 12, `33` = 8, 
`36.2` = 5, `39.4` = 3, `41.2` = 8, `42` = 13, `44` = 11, `49` = 11, 
`52` = 8, `53` = 8, `54.1` = 5, `58` = 11, `59.1` = 13, `65` = 13, 
`68` = 10, `71` = 6, `74` = 18, `75` = 13, `76` = 10, `77` = 12, 
`81` = 13, `82` = 11, `85` = 13, `86` = 8, `87` = 11, `88` = 12, 
`89` = 12, `90.5` = 10, `91` = 8, `92` = 10, `93.4` = 8, `95` = 19, 
`100` = 6, `101` = 8, `103` = 11, `104` = 6, `108.1` = 8, `113` = 11, 
`114` = 12, `116.1` = 8, `118` = 11, `123` = 7, `197` = 1, `218.1` = 15, 
`225` = 8, `232.1` = 12, `233` = 16, `238.1` = 2), .Dim = 59L, .Dimnames = list(
    c("1", "2", "3.1", "4", "6", "8", "11", "12", "19.4", "22", 
    "25", "28", "33", "36.2", "39.4", "41.2", "42", "44", "49", 
    "52", "53", "54.1", "58", "59.1", "65", "68", "71", "74", 
    "75", "76", "77", "81", "82", "85", "86", "87", "88", "89", 
    "90.5", "91", "92", "93.4", "95", "100", "101", "103", "104", 
    "108.1", "113", "114", "116.1", "118", "123", "197", "218.1", 
    "225", "232.1", "233", "238.1"))))), row.names = c(NA, -74L
), class = c("tbl_df", "tbl", "data.frame"))

I have plotted a Heatmap for both cases and controls with the following code:

(cases_heatmap <- ggplot(filter(example, group == "case"), aes(id, factor(emm_type)))+geom_tile(aes(fill=value_mean), colour="white")+
    scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 60,limits=c(0,max(example$value_mean)))+
    scale_y_discrete(expand = c(0, 0)) +
    theme(axis.ticks=element_blank(),
          axis.text.x=element_text(angle = 90, vjust = 0.6),legend.position = "none")+
    coord_equal())

(cases_heatmap <- ggplot(filter(example, group == "control"), aes(id, factor(emm_type)))+geom_tile(aes(fill=value_mean), colour="white")+
    scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 60,limits=c(0,max(example$value_mean)))+
    scale_y_discrete(expand = c(0, 0)) +
    theme(axis.ticks=element_blank(),
          axis.text.x=element_text(angle = 90, vjust = 0.6),legend.position = "none")+
    coord_equal())

Which gives me something like this (one for cases and one for controls:

enter image description here

In order to plot the cluster and pattern along side it I adapt the data a bit to get a column I can plot (using “cluster_text” and “pattern_text” columns), as well as having a number to sort by (num_cluster):

example <- example%>%
  mutate(num_cluster = as.numeric(factor(example$cluster))) %>%
  mutate(num_pattern = as.numeric(factor(example$pattern))) %>%
  mutate(cluster_text = "Cluster") %>%
  mutate(pattern_text = "Pattern")
  [1]: https://i.stack.imgur.com/CO1eP.jpg

As I want the clusters to be grouped together I reorder the levels:

example$emm_type <- reorder(example$emm_type, example$cluster)

Then in order to get the annotation bars (of Cluster and Pattern) with colours which I would like to plot along side the Heatmap I plot another geom_tile, of the newly created “cluster_text” and “pattern_text” columns:

cluster_annotation <- ggplot(filter(example, group == "case"), aes(cluster_text, factor(emm_type)))+geom_tile(aes(fill=cluster), colour="white")+
  coord_equal()+
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank())


pattern_annotation <- ggplot(filter(example, group == "case"), aes(pattern_text, factor(emm_type)))+geom_tile(aes(fill=pattern), colour="white")+
  coord_equal()+
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank())

Which gives me the desired annotation tiles (this one for cluster, I get the same for pattern):

enter image description here

Now I would like all the tiles next to each other, or even plotted on the same geom_tile, so that the emm_types align with their respective pattern and cluster, but can not for the life of me figure out how to do it.

Here is a picture of my final graphs that I would like aligned next to each other when I used more of my data:

enter image description here

> sessionInfo()
R version 3.5.0 (2018-04-23)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] en_NZ.UTF-8/en_NZ.UTF-8/en_NZ.UTF-8/C/en_NZ.UTF-8/en_NZ.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] bindrcpp_0.2.2  cowplot_0.9.3   scales_0.5.0    forcats_0.3.0   stringr_1.3.1   dplyr_0.7.6     purrr_0.2.5     readr_1.1.1     tidyr_0.8.1     tibble_1.4.2   
[11] ggplot2_3.0.0   tidyverse_1.2.1 readxl_1.1.0   

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.18     cellranger_1.1.0 pillar_1.3.0     compiler_3.5.0   plyr_1.8.4       bindr_0.1.1      tools_3.5.0      digest_0.6.15    lubridate_1.7.4 
[10] jsonlite_1.5     nlme_3.1-137     gtable_0.2.0     lattice_0.20-35  pkgconfig_2.0.1  rlang_0.2.1      cli_1.0.0        rstudioapi_0.7   yaml_2.2.0      
[19] haven_1.1.2      withr_2.1.2      xml2_1.2.0       httr_1.3.1       hms_0.4.2        tidyselect_0.2.4 glue_1.3.0       R6_2.2.2         fansi_0.2.3     
[28] reshape2_1.4.3   modelr_0.1.2     magrittr_1.5     backports_1.1.2  rvest_0.3.2      assertthat_0.2.0 colorspace_1.3-2 labeling_0.3     utf8_1.1.4      
[37] stringi_1.2.4    lazyeval_0.2.1   munsell_0.5.0    broom_0.5.0      crayon_1.3.4  


Get this bounty!!!

#StackBounty: #r #mixed-model #lme4-nlme Mixed effects model: Compare random variance component across levels of a grouping variable

Bounty: 100

Suppose I have $N$ participants, each of whom gives a response $Y$ 20 times, 10 in one condition and 10 in another. I fit a linear mixed effects model comparing $Y$ in each condition. Here’s a reproducible example simulating this situation using the lme4 package in R:

library(lme4)
fml <- "~ condition + (condition | participant_id)"
d <- expand.grid(participant_id=1:40, trial_num=1:10)
d <- rbind(cbind(d, condition="control"), cbind(d, condition="experimental"))

set.seed(23432)
d <- cbind(d, simulate(formula(fml), 
                       newparams=list(beta=c(0, .5), 
                                      theta=c(.5, 0, 0), 
                                      sigma=1), 
                       family=gaussian, 
                       newdata=d))

m <- lmer(paste("sim_1 ", fml), data=d)
summary(m)

The model m yields two fixed effects (an intercept and slope for condition), and three random effects (a by-participant random intercept, a by-participant random slope for condition, and an intercept-slope correlation).

I would like to statistically compare the size of the by-participant random slope variance in the groups defined by condition (i.e., the variance component highlighted in red, evaluated at condition=”control” vs. at condition=”experimental”). How would I do this (preferably in R)?

enter image description here


BONUS

Let’s say the model is slightly more complicated: The participants each experience 10 stimuli 20 times each, 10 in one condition and 10 in another. Thus, there are two sets of crossed random effects: random effects for participant and random effects for stimulus. Here’s a reproducible example:

library(lme4)
fml <- "~ condition + (condition | participant_id) + (condition | stimulus_id)"
d <- expand.grid(participant_id=1:40, stimulus_id=1:10, trial_num=1:10)
d <- rbind(cbind(d, condition="control"), cbind(d, condition="experimental"))

set.seed(23432)
d <- cbind(d, simulate(formula(fml), 
                       newparams=list(beta=c(0, .5), 
                                      theta=c(.5, 0, 0, .5, 0, 0), 
                                      sigma=1), 
                       family=gaussian, 
                       newdata=d))

m <- lmer(paste("sim_1 ", fml), data=d)
summary(m)

I would like to statistically compare the magnitude of the random by-participant intercept variance in the groups defined by condition. How would I do that, and is the process any different from the one in the situation described above?


EDIT

To be a bit more specific about what I’m looking for, I want to know:

  1. Is the question, “are the random intercepts substantially different from each other, beyond what we would expect due to sampling error” a well-defined question (i.e., is this question even theoretically answerable)? If not, why not?
  2. If the answer to question (1) is yes, how would I answer it? I would prefer an R implementation, but I’m not tied to the lme4 package — for example, it seems as though the OpenMx package has the capability to accommodate multi-group and multi-level analyses (https://openmx.ssri.psu.edu/openmx-features), and this seems like the sort of question that ought to be answerable in an SEM framework.


Get this bounty!!!

#StackBounty: #r #c++11 #gdal #rgdal updating Rgdal in R.3.5.1 C++11 dependency… although C++11 is available

Bounty: 50

When I am updating (or at least trying to) the rgdal package by compiling from source after updating R from 3.4.4 to 3.5.1, I run into the odd issue that all goes well, but the namespace load fails due to an “undefined symbol” error:

** installing vignettes
** testing if installed package can be loaded
Error: package or namespace load failed for ‘rgdal’ in dyn.load(file, DLLpath = DLLpath, ...):
 unable to load shared object '/usr/local/lib/R/site-library/rgdal/libs/rgdal.so':
  /usr/local/lib/R/site-library/rgdal/libs/rgdal.so: undefined symbol: _ZNK10OGRFeature16GetFieldAsDoubleEi
Error: loading failed
Execution halted
ERROR: loading failed
* removing ‘/usr/local/lib/R/site-library/rgdal’
* restoring previous ‘/usr/local/lib/R/site-library/rgdal’

The downloaded source packages are in
        ‘/tmp/RtmpHu6D0N/downloaded_packages’
Warning message:
In install.packages("rgdal") :
  installation of package ‘rgdal’ had non-zero exit status

When running echo _ZNK10OGRFeature16GetFieldAsDoubleEi | c++filt I get as an output OGRFeature::GetFieldAsDouble(int) const which doesn’t learn me much. I have installed GDAL from https://trac.osgeo.org/gdal/wiki/DownloadSource , as suggested on the CRAN package site which shows that I have GDAL 2.3.1 installed.

It is odd that updates fail, as the package was already installed, so dependencies should have been met. Any pointers as to how to resolve this issue are warmly welcome.

As I got no suggestions in the past few weeks I decided to uninstall rgdal, and reinstall it. Interestingly, now I get a claim that my system (Ubuntu 16.04.5 LTS) does not support C++11, although the check says it is there (C++11 support available):

* installing *source* package ‘rgdal’ ...
** package ‘rgdal’ successfully unpacked and MD5 sums checked
configure: R_HOME: /usr/lib/R
configure: CC: gcc -std=gnu99
configure: CXX: g++
configure: C++11 support available
configure: rgdal: 1.3-4
checking for /usr/bin/svnversion... yes
configure: svn revision: 766
checking for gdal-config... /usr/local/bin/gdal-config
checking gdal-config usability... yes
configure: GDAL: 2.3.1
checking C++11 support for GDAL >= 2.3.0... yes
checking GDAL version >= 1.11.4... yes
checking gdal: linking with --libs only... no
checking gdal: linking with --libs and --dep-libs... no
In file included from /usr/local/include/gdal.h:45:0,
                 from gdal_test.cc:1:
/usr/local/include/cpl_port.h:187:6: error: #error Must have C++11 or newer.
 #    error Must have C++11 or newer.
      ^
In file included from /usr/local/include/gdal.h:49:0,
                 from gdal_test.cc:1:
/usr/local/include/cpl_minixml.h:202:47: error: expected template-name before '<' token
 class CPLXMLTreeCloser: public std::unique_ptr<CPLXMLNode, CPLXMLTreeCloserDeleter>
                                               ^
/usr/local/include/cpl_minixml.h:202:47: error: expected '{' before '<' token
/usr/local/include/cpl_minixml.h:202:47: error: expected unqualified-id before '<' token
In file included from /usr/local/include/ogr_api.h:45:0,
                 from /usr/local/include/gdal.h:50,
                 from gdal_test.cc:1:
/usr/local/include/ogr_core.h:79:28: error: expected '}' before end of line
/usr/local/include/ogr_core.h:79:28: error: expected declaration before end of line
In file included from /usr/local/include/gdal.h:45:0,
                 from gdal_test.cc:1:
/usr/local/include/cpl_port.h:187:6: error: #error Must have C++11 or newer.
 #    error Must have C++11 or newer.
      ^
In file included from /usr/local/include/gdal.h:49:0,
                 from gdal_test.cc:1:
/usr/local/include/cpl_minixml.h:202:47: error: expected template-name before '<' token
 class CPLXMLTreeCloser: public std::unique_ptr<CPLXMLNode, CPLXMLTreeCloserDeleter>
                                               ^
/usr/local/include/cpl_minixml.h:202:47: error: expected '{' before '<' token
/usr/local/include/cpl_minixml.h:202:47: error: expected unqualified-id before '<' token
In file included from /usr/local/include/ogr_api.h:45:0,
                 from /usr/local/include/gdal.h:50,
                 from gdal_test.cc:1:
/usr/local/include/ogr_core.h:79:28: error: expected '}' before end of line
/usr/local/include/ogr_core.h:79:28: error: expected declaration before end of line
configure: Install failure: compilation and/or linkage problems.
configure: error: GDALAllRegister not found in libgdal.
ERROR: configuration failed for package ‘rgdal’
* removing ‘/usr/local/lib/R/site-library/rgdal’

My devtools::session_info() is the following:

Session info ------------------------------------------------------------------
 setting  value
 version  R version 3.5.1 (2018-07-02)
 system   x86_64, linux-gnu
 ui       X11
 language en_US:en
 collate  en_US.UTF-8
 tz       Europe/Brussels
 date     2018-07-28

Packages ----------------------------------------------------------------------
 package   * version date       source
 base      * 3.5.1   2018-07-03 local
 compiler    3.5.1   2018-07-03 local
 datasets  * 3.5.1   2018-07-03 local
 devtools  * 1.13.6  2018-06-27 CRAN (R 3.5.1)
 digest      0.6.15  2018-01-28 CRAN (R 3.5.1)
 graphics  * 3.5.1   2018-07-03 local
 grDevices * 3.5.1   2018-07-03 local
 memoise     1.1.0   2017-04-21 CRAN (R 3.5.1)
 methods   * 3.5.1   2018-07-03 local
 stats     * 3.5.1   2018-07-03 local
 tools       3.5.1   2018-07-03 local
 utils     * 3.5.1   2018-07-03 local
 withr       2.1.2   2018-03-15 CRAN (R 3.5.1)


Get this bounty!!!

#StackBounty: #r #mixed-model #mlogit mlogit package fails to recover synthetic mixed logit model

Bounty: 100

I am generating data from the following synthetic mixed effects model for the utility of agent $h$ choosing transportation mode $j$:

$U_{hj} = beta_{text{price}} X_{h,text{price}j} + beta{text{time}} X_{h,text{time}j} + gamma_h X{h,text{bus}j}+
epsilon
{hj} \ gamma_h sim text{N}(0, 10)$

where $X_{h,text{price}j},X{h,text{time}j}$ are agent $h$’s price and time for transporation mode $j$. $X{h,text{bus}_j}$ is an indicator function which is equal to 1 if mode $j$ is a bus.

The true model has $beta_{text{price}} = beta_{text{time}} = -1$.

For each agent, I generate the price and time covariates ($X_{h,text{price}j},X{h,text{time}_j}$) for each mode of transportation (red bus, train, car) and I sample the random effect $gamma_h$ for each agent from $N(0,10)$, and then sample the transportation choice from the corresponding logit model.

Here’s the R code that generates the choice data:

library(mlogit)
set.seed(1987)
n.samples <- 10000
re.sd     <- 10 # random effect standard deviation

## generate price, time covariates
mode       <- c('red.bus', 'train', 'car')
is.bus     <- c(1, 0, 0)
price.data <- rnorm(3*n.samples) + c(2,5,10)
time.data  <- rnorm(3*n.samples) + c(10, 5, 2)
obs          <- data.frame(mode=mode,
                           price=price.data,
                           time=time.data,
                           bus=is.bus)


## generate random effect + choice data
obs$agent.id <- rep(1:n.samples, each=3)
obs$choice   <- NA
obs$util     <- NA
for( a.id in unique(obs$agent.id) ) {
    xx.sub <- obs[obs$agent.id == a.id,]
    obs$util[obs$agent.id == a.id]   <- xx.sub$price*-1 + xx.sub$time*-1 + xx.sub$bus*rnorm(1,sd=re.sd)
    uu <- obs$util[obs$agent.id == a.id]
    p.vec <- exp(uu)/sum(exp(uu))
    obs$choice[obs$agent.id == a.id] <- rmultinom(1, 1, p.vec)==1
}


logit.data <- mlogit.data(obs,
                          shape   = "long",
                          choice  = "choice",
                          varying = which( colnames(obs) %in% c('price', 'time', 'bus') ),
                          alt.var = 'mode')

Here are the first few rows of the generated dataset:

> head(logit.data)
             mode      price     time bus agent.id choice       util
1.red.bus red.bus  0.7113747 9.700200   1        1  FALSE -21.306406
1.train     train  5.2730015 5.908244   0        1   TRUE -11.181246
1.car         car 12.8485015 2.256558   0        1  FALSE -15.105060
2.red.bus red.bus  1.7021472 9.642485   1        2   TRUE  -7.677141
2.train     train  5.2443204 5.671528   0        2  FALSE -10.915848
2.car         car 10.2686018 2.250377   0        2  FALSE -12.518979

I attempt to fit the correctly specified model with the mlogit package, but I find that the estimates are wrong (the standard deviation of the random effect, in particular):

m.mixed <- mlogit(choice ~ price + time + bus | 0,
                  data=logit.data,
                  rpar= c(bus = 'n'),
                  R = 300, halton = NA)

summary(m.mixed)
> summary(m.mixed)

Call:
mlogit(formula = choice ~ price + time + bus | 0, data = logit.data, 
    rpar = c(bus = "n"), R = 300, halton = NA)

Frequencies of alternatives:
    car red.bus   train 
 0.1317  0.4084  0.4599 

bfgs method
8 iterations, 0h:1m:55s 
g'(-H)^-1g =   594 
last step couldn't find higher value 

Coefficients :
        Estimate Std. Error  t-value  Pr(>|t|)    
price  -1.253720   0.026698 -46.9592 < 2.2e-16 ***
time   -1.329888   0.032389 -41.0603 < 2.2e-16 ***
bus     1.402522   0.318802   4.3993 1.086e-05 ***
sd.bus 19.455180   2.107521   9.2313 < 2.2e-16 ***

The true sd.bus is 10, but mlogit estimates it at almost 20.

Why isn’t mlogit recovering the true model?


Get this bounty!!!

#StackBounty: #r #mixed-model #repeated-measures #lme4-nlme #multilevel-analysis Compute partial $eta^2$ for all fixed effects anovas …

Bounty: 50

Disclamer: I wasn’t sure where to post this question: CV or SO, but eventually decided to try here first

I’ve been asked by one of the reviewers to add effects sizes (preferably $eta^2_p$ which is standard in my field) for all the $F$ and $t$ tests reported in auxiliary analyses of my paper.

To be specific I’m mentioning a significant 4-way interaction (A:B:C:response) between fixed effects of a anova(lmer.model) call and explaining it with simple interactions and estimated marginal means using emmeans package and pairs() function (reproducible exemplary code is attached below).

I’m aware that there at least a debate on whether or not computing effect sizes for mixed effects models makes sense, but I’d like to satisfy the reviewer without any further discussion.

I’ve come across some sources mentioning effect sizes in the context of LMEM but I don’t feel strong enough with math to understand it.

My question here is twofold:

  1. If there is any citable way to produce $eta^2$, $eta^2_p$ or
    $omega^2$ for anovas $F$ tests how can I do it? An R package
    / function / script would be great help. Eventually hints for
    retrieving crucial values from a lmer() object in order to get
    eff.sizes by hand would be helpful as well

    Eg. I know that $eta^2 = {SS_{Effect}}/{SS_{Total}}$ and
    anova(lmer_obj) gives $SS_{Effect}$ but no $SS_{Total}$ and I have
    no clue how to get it with R

  2. If there is no way computing it – what paper (not blog post) can
    I cite when answering the reviewer why I insisted on skipping effect
    sizes.

Note that I’m not interested in random effects per se – I defined a maximum justified by design structure of random effects only to control more error variance and my only interest are the fixed effects – namely Fs from the anova table and corresponding marginal means.

Some exemplary results (corresponding to a real data structure by with random values) are as follow:

The A:B:C:response interaction is significant at $F(1; 12082,1)=4,60; p=.032, (eta^2_p=xxx$)

> anova(m0)
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)  
A               1.6717  1.6717     1   134.7  0.4210 0.51755  
B               0.1375  0.1375     1 11860.0  0.0346 0.85238  
C               7.1708  7.1708     1   133.5  1.8058 0.18129  
response        3.7775  3.7775     1 12070.3  0.9513 0.32940  
A:B             1.1291  1.1291     1 11872.8  0.2844 0.59387  
A:C             3.2427  3.2427     1   121.1  0.8166 0.36796  
B:C             4.1048  4.1048     1   121.8  1.0337 0.31130  
A:response      0.0000  0.0000     1 12080.8  0.0000 0.99828  
B:response      4.2350  4.2350     1 12078.9  1.0665 0.30176  
C:response      1.6567  1.6567     1 12082.3  0.4172 0.51834  
A:B:C           8.5249  8.5249     1   131.4  2.1469 0.14525  
A:B:response    0.8765  0.8765     1   132.0  0.2207 0.63926  
A:C:response    1.5150  1.5150     1   119.5  0.3815 0.53797  
B:C:response    0.5921  0.5921     1   122.5  0.1491 0.70005  
A:B:C:response 18.2803 18.2803     1 12082.1  4.6036 0.03192 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Follow-up comparisons show that the interaction of A:B:C is only valid for response=0: $F(1; 809,32)=4,68; p=.031, (eta^2_p=xxx$)

> joint_tests(m0, by="response")
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 12292) or larger,
but be warned that this may result in large computation time and memory use.
response = no:
 model term df1      df2 F.ratio p.value
 A            1   858.01   0.161  0.6884
 B            1 12177.54   0.530  0.4664
 C            1   837.27   0.212  0.6453
 A:B          1   785.54   0.365  0.5457
 A:C          1   507.23   0.872  0.3509
 B:C          1   476.41   0.145  0.7035
 A:B:C        1   809.32   4.680  0.0308

response = yes:
 model term df1      df2 F.ratio p.value
 A            1   184.95   0.361  0.5489
 B            1 11853.63   0.601  0.4381
 C            1   182.47   3.237  0.0736
 A:B          1   177.43   0.000  0.9874
 A:C          1   130.20   0.072  0.7882
 B:C          1   123.71   1.465  0.2285
 A:B:C        1   178.96   0.235  0.6281

In my real dataset there are more predicted effects but the idea is still the same. I’m reporting $F$s or $t$s following APA 6 ed. rules and i’m being asked to add some sort of effect sizes to them.

Any help with this matter will be greatly appreciated.

Full reproducible example

###############################################################
#Simulate data replicating the structure of a real life example
###############################################################

library(AlgDesign) #for generating all levels a factorial design)

set.seed(2)
df <-gen.factorial(c(16,2,2,2,100), factors = "all", 
                   varNames = c("rep", "A", "B", "C", "Subject"))
df$rep <- as.numeric(df$rep)
df$Subject <- as.numeric(df$Subject)
logRT <- rnorm(n=12800, m=7, sd=2)
trialno<- rep(1:128, times = 100)
response <- factor(sample(0:1, 12800, prob = c(0.3, 0.7), replace = T), 
                   labels= c("no", "yes"))

#Simulate some values to drop (set as missings) due to extremly low latency
missingRTs<- as.logical(sample(0:1, 12800, prob = c(0.96, 0.04), replace = T))
logRT[missingRTs==1] <- NA

df <- cbind(df, logRT, trialno, response)
df <- df[complete.cases(df),]

##########################
#Setup model with afex
########################## 

library(afex)
m0 <- mixed(logRT ~ A*B*C*response + (A*B*C*response||Subject), 
            data = df, return = 'merMod', method = "S", expand_re = TRUE)

##########################
#Get results for paper
########################## 

anova(m0)

emm_options(lmerTest.limit = 12292)
em0<-emmeans(m0, ~A*B*C*response)

joint_tests(m0, by="response")
joint_tests(m0, by=c("response", "B"))


Get this bounty!!!

#StackBounty: #r #mixed-model #repeated-measures #lme4-nlme #multilevel-analysis Compute partial $eta^2$ for all fixed effects anovas …

Bounty: 50

Disclamer: I wasn’t sure where to post this question: CV or SO, but eventually decided to try here first

I’ve been asked by one of the reviewers to add effects sizes (preferably $eta^2_p$ which is standard in my field) for all the $F$ and $t$ tests reported in auxiliary analyses of my paper.

To be specific I’m mentioning a significant 4-way interaction (A:B:C:response) between fixed effects of a anova(lmer.model) call and explaining it with simple interactions and estimated marginal means using emmeans package and pairs() function (reproducible exemplary code is attached below).

I’m aware that there at least a debate on whether or not computing effect sizes for mixed effects models makes sense, but I’d like to satisfy the reviewer without any further discussion.

I’ve come across some sources mentioning effect sizes in the context of LMEM but I don’t feel strong enough with math to understand it.

My question here is twofold:

  1. If there is any citable way to produce $eta^2$, $eta^2_p$ or
    $omega^2$ for anovas $F$ tests how can I do it? An R package
    / function / script would be great help. Eventually hints for
    retrieving crucial values from a lmer() object in order to get
    eff.sizes by hand would be helpful as well

    Eg. I know that $eta^2 = {SS_{Effect}}/{SS_{Total}}$ and
    anova(lmer_obj) gives $SS_{Effect}$ but no $SS_{Total}$ and I have
    no clue how to get it with R

  2. If there is no way computing it – what paper (not blog post) can
    I cite when answering the reviewer why I insisted on skipping effect
    sizes.

Note that I’m not interested in random effects per se – I defined a maximum justified by design structure of random effects only to control more error variance and my only interest are the fixed effects – namely Fs from the anova table and corresponding marginal means.

Some exemplary results (corresponding to a real data structure by with random values) are as follow:

The A:B:C:response interaction is significant at $F(1; 12082,1)=4,60; p=.032, (eta^2_p=xxx$)

> anova(m0)
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)  
A               1.6717  1.6717     1   134.7  0.4210 0.51755  
B               0.1375  0.1375     1 11860.0  0.0346 0.85238  
C               7.1708  7.1708     1   133.5  1.8058 0.18129  
response        3.7775  3.7775     1 12070.3  0.9513 0.32940  
A:B             1.1291  1.1291     1 11872.8  0.2844 0.59387  
A:C             3.2427  3.2427     1   121.1  0.8166 0.36796  
B:C             4.1048  4.1048     1   121.8  1.0337 0.31130  
A:response      0.0000  0.0000     1 12080.8  0.0000 0.99828  
B:response      4.2350  4.2350     1 12078.9  1.0665 0.30176  
C:response      1.6567  1.6567     1 12082.3  0.4172 0.51834  
A:B:C           8.5249  8.5249     1   131.4  2.1469 0.14525  
A:B:response    0.8765  0.8765     1   132.0  0.2207 0.63926  
A:C:response    1.5150  1.5150     1   119.5  0.3815 0.53797  
B:C:response    0.5921  0.5921     1   122.5  0.1491 0.70005  
A:B:C:response 18.2803 18.2803     1 12082.1  4.6036 0.03192 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Follow-up comparisons show that the interaction of A:B:C is only valid for response=0: $F(1; 809,32)=4,68; p=.031, (eta^2_p=xxx$)

> joint_tests(m0, by="response")
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 12292) or larger,
but be warned that this may result in large computation time and memory use.
response = no:
 model term df1      df2 F.ratio p.value
 A            1   858.01   0.161  0.6884
 B            1 12177.54   0.530  0.4664
 C            1   837.27   0.212  0.6453
 A:B          1   785.54   0.365  0.5457
 A:C          1   507.23   0.872  0.3509
 B:C          1   476.41   0.145  0.7035
 A:B:C        1   809.32   4.680  0.0308

response = yes:
 model term df1      df2 F.ratio p.value
 A            1   184.95   0.361  0.5489
 B            1 11853.63   0.601  0.4381
 C            1   182.47   3.237  0.0736
 A:B          1   177.43   0.000  0.9874
 A:C          1   130.20   0.072  0.7882
 B:C          1   123.71   1.465  0.2285
 A:B:C        1   178.96   0.235  0.6281

In my real dataset there are more predicted effects but the idea is still the same. I’m reporting $F$s or $t$s following APA 6 ed. rules and i’m being asked to add some sort of effect sizes to them.

Any help with this matter will be greatly appreciated.

Full reproducible example

###############################################################
#Simulate data replicating the structure of a real life example
###############################################################

library(AlgDesign) #for generating all levels a factorial design)

set.seed(2)
df <-gen.factorial(c(16,2,2,2,100), factors = "all", 
                   varNames = c("rep", "A", "B", "C", "Subject"))
df$rep <- as.numeric(df$rep)
df$Subject <- as.numeric(df$Subject)
logRT <- rnorm(n=12800, m=7, sd=2)
trialno<- rep(1:128, times = 100)
response <- factor(sample(0:1, 12800, prob = c(0.3, 0.7), replace = T), 
                   labels= c("no", "yes"))

#Simulate some values to drop (set as missings) due to extremly low latency
missingRTs<- as.logical(sample(0:1, 12800, prob = c(0.96, 0.04), replace = T))
logRT[missingRTs==1] <- NA

df <- cbind(df, logRT, trialno, response)
df <- df[complete.cases(df),]

##########################
#Setup model with afex
########################## 

library(afex)
m0 <- mixed(logRT ~ A*B*C*response + (A*B*C*response||Subject), 
            data = df, return = 'merMod', method = "S", expand_re = TRUE)

##########################
#Get results for paper
########################## 

anova(m0)

emm_options(lmerTest.limit = 12292)
em0<-emmeans(m0, ~A*B*C*response)

joint_tests(m0, by="response")
joint_tests(m0, by=c("response", "B"))


Get this bounty!!!

#StackBounty: #r #mixed-model #repeated-measures #lme4-nlme #multilevel-analysis Compute partial $eta^2$ for all fixed effects anovas …

Bounty: 50

Disclamer: I wasn’t sure where to post this question: CV or SO, but eventually decided to try here first

I’ve been asked by one of the reviewers to add effects sizes (preferably $eta^2_p$ which is standard in my field) for all the $F$ and $t$ tests reported in auxiliary analyses of my paper.

To be specific I’m mentioning a significant 4-way interaction (A:B:C:response) between fixed effects of a anova(lmer.model) call and explaining it with simple interactions and estimated marginal means using emmeans package and pairs() function (reproducible exemplary code is attached below).

I’m aware that there at least a debate on whether or not computing effect sizes for mixed effects models makes sense, but I’d like to satisfy the reviewer without any further discussion.

I’ve come across some sources mentioning effect sizes in the context of LMEM but I don’t feel strong enough with math to understand it.

My question here is twofold:

  1. If there is any citable way to produce $eta^2$, $eta^2_p$ or
    $omega^2$ for anovas $F$ tests how can I do it? An R package
    / function / script would be great help. Eventually hints for
    retrieving crucial values from a lmer() object in order to get
    eff.sizes by hand would be helpful as well

    Eg. I know that $eta^2 = {SS_{Effect}}/{SS_{Total}}$ and
    anova(lmer_obj) gives $SS_{Effect}$ but no $SS_{Total}$ and I have
    no clue how to get it with R

  2. If there is no way computing it – what paper (not blog post) can
    I cite when answering the reviewer why I insisted on skipping effect
    sizes.

Note that I’m not interested in random effects per se – I defined a maximum justified by design structure of random effects only to control more error variance and my only interest are the fixed effects – namely Fs from the anova table and corresponding marginal means.

Some exemplary results (corresponding to a real data structure by with random values) are as follow:

The A:B:C:response interaction is significant at $F(1; 12082,1)=4,60; p=.032, (eta^2_p=xxx$)

> anova(m0)
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)  
A               1.6717  1.6717     1   134.7  0.4210 0.51755  
B               0.1375  0.1375     1 11860.0  0.0346 0.85238  
C               7.1708  7.1708     1   133.5  1.8058 0.18129  
response        3.7775  3.7775     1 12070.3  0.9513 0.32940  
A:B             1.1291  1.1291     1 11872.8  0.2844 0.59387  
A:C             3.2427  3.2427     1   121.1  0.8166 0.36796  
B:C             4.1048  4.1048     1   121.8  1.0337 0.31130  
A:response      0.0000  0.0000     1 12080.8  0.0000 0.99828  
B:response      4.2350  4.2350     1 12078.9  1.0665 0.30176  
C:response      1.6567  1.6567     1 12082.3  0.4172 0.51834  
A:B:C           8.5249  8.5249     1   131.4  2.1469 0.14525  
A:B:response    0.8765  0.8765     1   132.0  0.2207 0.63926  
A:C:response    1.5150  1.5150     1   119.5  0.3815 0.53797  
B:C:response    0.5921  0.5921     1   122.5  0.1491 0.70005  
A:B:C:response 18.2803 18.2803     1 12082.1  4.6036 0.03192 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Follow-up comparisons show that the interaction of A:B:C is only valid for response=0: $F(1; 809,32)=4,68; p=.031, (eta^2_p=xxx$)

> joint_tests(m0, by="response")
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 12292) or larger,
but be warned that this may result in large computation time and memory use.
response = no:
 model term df1      df2 F.ratio p.value
 A            1   858.01   0.161  0.6884
 B            1 12177.54   0.530  0.4664
 C            1   837.27   0.212  0.6453
 A:B          1   785.54   0.365  0.5457
 A:C          1   507.23   0.872  0.3509
 B:C          1   476.41   0.145  0.7035
 A:B:C        1   809.32   4.680  0.0308

response = yes:
 model term df1      df2 F.ratio p.value
 A            1   184.95   0.361  0.5489
 B            1 11853.63   0.601  0.4381
 C            1   182.47   3.237  0.0736
 A:B          1   177.43   0.000  0.9874
 A:C          1   130.20   0.072  0.7882
 B:C          1   123.71   1.465  0.2285
 A:B:C        1   178.96   0.235  0.6281

In my real dataset there are more predicted effects but the idea is still the same. I’m reporting $F$s or $t$s following APA 6 ed. rules and i’m being asked to add some sort of effect sizes to them.

Any help with this matter will be greatly appreciated.

Full reproducible example

###############################################################
#Simulate data replicating the structure of a real life example
###############################################################

library(AlgDesign) #for generating all levels a factorial design)

set.seed(2)
df <-gen.factorial(c(16,2,2,2,100), factors = "all", 
                   varNames = c("rep", "A", "B", "C", "Subject"))
df$rep <- as.numeric(df$rep)
df$Subject <- as.numeric(df$Subject)
logRT <- rnorm(n=12800, m=7, sd=2)
trialno<- rep(1:128, times = 100)
response <- factor(sample(0:1, 12800, prob = c(0.3, 0.7), replace = T), 
                   labels= c("no", "yes"))

#Simulate some values to drop (set as missings) due to extremly low latency
missingRTs<- as.logical(sample(0:1, 12800, prob = c(0.96, 0.04), replace = T))
logRT[missingRTs==1] <- NA

df <- cbind(df, logRT, trialno, response)
df <- df[complete.cases(df),]

##########################
#Setup model with afex
########################## 

library(afex)
m0 <- mixed(logRT ~ A*B*C*response + (A*B*C*response||Subject), 
            data = df, return = 'merMod', method = "S", expand_re = TRUE)

##########################
#Get results for paper
########################## 

anova(m0)

emm_options(lmerTest.limit = 12292)
em0<-emmeans(m0, ~A*B*C*response)

joint_tests(m0, by="response")
joint_tests(m0, by=c("response", "B"))


Get this bounty!!!

#StackBounty: #r #mixed-model #repeated-measures #lme4-nlme #multilevel-analysis Compute partial $eta^2$ for all fixed effects anovas …

Bounty: 50

Disclamer: I wasn’t sure where to post this question: CV or SO, but eventually decided to try here first

I’ve been asked by one of the reviewers to add effects sizes (preferably $eta^2_p$ which is standard in my field) for all the $F$ and $t$ tests reported in auxiliary analyses of my paper.

To be specific I’m mentioning a significant 4-way interaction (A:B:C:response) between fixed effects of a anova(lmer.model) call and explaining it with simple interactions and estimated marginal means using emmeans package and pairs() function (reproducible exemplary code is attached below).

I’m aware that there at least a debate on whether or not computing effect sizes for mixed effects models makes sense, but I’d like to satisfy the reviewer without any further discussion.

I’ve come across some sources mentioning effect sizes in the context of LMEM but I don’t feel strong enough with math to understand it.

My question here is twofold:

  1. If there is any citable way to produce $eta^2$, $eta^2_p$ or
    $omega^2$ for anovas $F$ tests how can I do it? An R package
    / function / script would be great help. Eventually hints for
    retrieving crucial values from a lmer() object in order to get
    eff.sizes by hand would be helpful as well

    Eg. I know that $eta^2 = {SS_{Effect}}/{SS_{Total}}$ and
    anova(lmer_obj) gives $SS_{Effect}$ but no $SS_{Total}$ and I have
    no clue how to get it with R

  2. If there is no way computing it – what paper (not blog post) can
    I cite when answering the reviewer why I insisted on skipping effect
    sizes.

Note that I’m not interested in random effects per se – I defined a maximum justified by design structure of random effects only to control more error variance and my only interest are the fixed effects – namely Fs from the anova table and corresponding marginal means.

Some exemplary results (corresponding to a real data structure by with random values) are as follow:

The A:B:C:response interaction is significant at $F(1; 12082,1)=4,60; p=.032, (eta^2_p=xxx$)

> anova(m0)
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)  
A               1.6717  1.6717     1   134.7  0.4210 0.51755  
B               0.1375  0.1375     1 11860.0  0.0346 0.85238  
C               7.1708  7.1708     1   133.5  1.8058 0.18129  
response        3.7775  3.7775     1 12070.3  0.9513 0.32940  
A:B             1.1291  1.1291     1 11872.8  0.2844 0.59387  
A:C             3.2427  3.2427     1   121.1  0.8166 0.36796  
B:C             4.1048  4.1048     1   121.8  1.0337 0.31130  
A:response      0.0000  0.0000     1 12080.8  0.0000 0.99828  
B:response      4.2350  4.2350     1 12078.9  1.0665 0.30176  
C:response      1.6567  1.6567     1 12082.3  0.4172 0.51834  
A:B:C           8.5249  8.5249     1   131.4  2.1469 0.14525  
A:B:response    0.8765  0.8765     1   132.0  0.2207 0.63926  
A:C:response    1.5150  1.5150     1   119.5  0.3815 0.53797  
B:C:response    0.5921  0.5921     1   122.5  0.1491 0.70005  
A:B:C:response 18.2803 18.2803     1 12082.1  4.6036 0.03192 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Follow-up comparisons show that the interaction of A:B:C is only valid for response=0: $F(1; 809,32)=4,68; p=.031, (eta^2_p=xxx$)

> joint_tests(m0, by="response")
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 12292) or larger,
but be warned that this may result in large computation time and memory use.
response = no:
 model term df1      df2 F.ratio p.value
 A            1   858.01   0.161  0.6884
 B            1 12177.54   0.530  0.4664
 C            1   837.27   0.212  0.6453
 A:B          1   785.54   0.365  0.5457
 A:C          1   507.23   0.872  0.3509
 B:C          1   476.41   0.145  0.7035
 A:B:C        1   809.32   4.680  0.0308

response = yes:
 model term df1      df2 F.ratio p.value
 A            1   184.95   0.361  0.5489
 B            1 11853.63   0.601  0.4381
 C            1   182.47   3.237  0.0736
 A:B          1   177.43   0.000  0.9874
 A:C          1   130.20   0.072  0.7882
 B:C          1   123.71   1.465  0.2285
 A:B:C        1   178.96   0.235  0.6281

In my real dataset there are more predicted effects but the idea is still the same. I’m reporting $F$s or $t$s following APA 6 ed. rules and i’m being asked to add some sort of effect sizes to them.

Any help with this matter will be greatly appreciated.

Full reproducible example

###############################################################
#Simulate data replicating the structure of a real life example
###############################################################

library(AlgDesign) #for generating all levels a factorial design)

set.seed(2)
df <-gen.factorial(c(16,2,2,2,100), factors = "all", 
                   varNames = c("rep", "A", "B", "C", "Subject"))
df$rep <- as.numeric(df$rep)
df$Subject <- as.numeric(df$Subject)
logRT <- rnorm(n=12800, m=7, sd=2)
trialno<- rep(1:128, times = 100)
response <- factor(sample(0:1, 12800, prob = c(0.3, 0.7), replace = T), 
                   labels= c("no", "yes"))

#Simulate some values to drop (set as missings) due to extremly low latency
missingRTs<- as.logical(sample(0:1, 12800, prob = c(0.96, 0.04), replace = T))
logRT[missingRTs==1] <- NA

df <- cbind(df, logRT, trialno, response)
df <- df[complete.cases(df),]

##########################
#Setup model with afex
########################## 

library(afex)
m0 <- mixed(logRT ~ A*B*C*response + (A*B*C*response||Subject), 
            data = df, return = 'merMod', method = "S", expand_re = TRUE)

##########################
#Get results for paper
########################## 

anova(m0)

emm_options(lmerTest.limit = 12292)
em0<-emmeans(m0, ~A*B*C*response)

joint_tests(m0, by="response")
joint_tests(m0, by=c("response", "B"))


Get this bounty!!!

#StackBounty: #r #mixed-model #repeated-measures #lme4-nlme #multilevel-analysis Compute partial $eta^2$ for all fixed effects anovas …

Bounty: 50

Disclamer: I wasn’t sure where to post this question: CV or SO, but eventually decided to try here first

I’ve been asked by one of the reviewers to add effects sizes (preferably $eta^2_p$ which is standard in my field) for all the $F$ and $t$ tests reported in auxiliary analyses of my paper.

To be specific I’m mentioning a significant 4-way interaction (A:B:C:response) between fixed effects of a anova(lmer.model) call and explaining it with simple interactions and estimated marginal means using emmeans package and pairs() function (reproducible exemplary code is attached below).

I’m aware that there at least a debate on whether or not computing effect sizes for mixed effects models makes sense, but I’d like to satisfy the reviewer without any further discussion.

I’ve come across some sources mentioning effect sizes in the context of LMEM but I don’t feel strong enough with math to understand it.

My question here is twofold:

  1. If there is any citable way to produce $eta^2$, $eta^2_p$ or
    $omega^2$ for anovas $F$ tests how can I do it? An R package
    / function / script would be great help. Eventually hints for
    retrieving crucial values from a lmer() object in order to get
    eff.sizes by hand would be helpful as well

    Eg. I know that $eta^2 = {SS_{Effect}}/{SS_{Total}}$ and
    anova(lmer_obj) gives $SS_{Effect}$ but no $SS_{Total}$ and I have
    no clue how to get it with R

  2. If there is no way computing it – what paper (not blog post) can
    I cite when answering the reviewer why I insisted on skipping effect
    sizes.

Note that I’m not interested in random effects per se – I defined a maximum justified by design structure of random effects only to control more error variance and my only interest are the fixed effects – namely Fs from the anova table and corresponding marginal means.

Some exemplary results (corresponding to a real data structure by with random values) are as follow:

The A:B:C:response interaction is significant at $F(1; 12082,1)=4,60; p=.032, (eta^2_p=xxx$)

> anova(m0)
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)  
A               1.6717  1.6717     1   134.7  0.4210 0.51755  
B               0.1375  0.1375     1 11860.0  0.0346 0.85238  
C               7.1708  7.1708     1   133.5  1.8058 0.18129  
response        3.7775  3.7775     1 12070.3  0.9513 0.32940  
A:B             1.1291  1.1291     1 11872.8  0.2844 0.59387  
A:C             3.2427  3.2427     1   121.1  0.8166 0.36796  
B:C             4.1048  4.1048     1   121.8  1.0337 0.31130  
A:response      0.0000  0.0000     1 12080.8  0.0000 0.99828  
B:response      4.2350  4.2350     1 12078.9  1.0665 0.30176  
C:response      1.6567  1.6567     1 12082.3  0.4172 0.51834  
A:B:C           8.5249  8.5249     1   131.4  2.1469 0.14525  
A:B:response    0.8765  0.8765     1   132.0  0.2207 0.63926  
A:C:response    1.5150  1.5150     1   119.5  0.3815 0.53797  
B:C:response    0.5921  0.5921     1   122.5  0.1491 0.70005  
A:B:C:response 18.2803 18.2803     1 12082.1  4.6036 0.03192 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Follow-up comparisons show that the interaction of A:B:C is only valid for response=0: $F(1; 809,32)=4,68; p=.031, (eta^2_p=xxx$)

> joint_tests(m0, by="response")
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 12292) or larger,
but be warned that this may result in large computation time and memory use.
response = no:
 model term df1      df2 F.ratio p.value
 A            1   858.01   0.161  0.6884
 B            1 12177.54   0.530  0.4664
 C            1   837.27   0.212  0.6453
 A:B          1   785.54   0.365  0.5457
 A:C          1   507.23   0.872  0.3509
 B:C          1   476.41   0.145  0.7035
 A:B:C        1   809.32   4.680  0.0308

response = yes:
 model term df1      df2 F.ratio p.value
 A            1   184.95   0.361  0.5489
 B            1 11853.63   0.601  0.4381
 C            1   182.47   3.237  0.0736
 A:B          1   177.43   0.000  0.9874
 A:C          1   130.20   0.072  0.7882
 B:C          1   123.71   1.465  0.2285
 A:B:C        1   178.96   0.235  0.6281

In my real dataset there are more predicted effects but the idea is still the same. I’m reporting $F$s or $t$s following APA 6 ed. rules and i’m being asked to add some sort of effect sizes to them.

Any help with this matter will be greatly appreciated.

Full reproducible example

###############################################################
#Simulate data replicating the structure of a real life example
###############################################################

library(AlgDesign) #for generating all levels a factorial design)

set.seed(2)
df <-gen.factorial(c(16,2,2,2,100), factors = "all", 
                   varNames = c("rep", "A", "B", "C", "Subject"))
df$rep <- as.numeric(df$rep)
df$Subject <- as.numeric(df$Subject)
logRT <- rnorm(n=12800, m=7, sd=2)
trialno<- rep(1:128, times = 100)
response <- factor(sample(0:1, 12800, prob = c(0.3, 0.7), replace = T), 
                   labels= c("no", "yes"))

#Simulate some values to drop (set as missings) due to extremly low latency
missingRTs<- as.logical(sample(0:1, 12800, prob = c(0.96, 0.04), replace = T))
logRT[missingRTs==1] <- NA

df <- cbind(df, logRT, trialno, response)
df <- df[complete.cases(df),]

##########################
#Setup model with afex
########################## 

library(afex)
m0 <- mixed(logRT ~ A*B*C*response + (A*B*C*response||Subject), 
            data = df, return = 'merMod', method = "S", expand_re = TRUE)

##########################
#Get results for paper
########################## 

anova(m0)

emm_options(lmerTest.limit = 12292)
em0<-emmeans(m0, ~A*B*C*response)

joint_tests(m0, by="response")
joint_tests(m0, by=c("response", "B"))


Get this bounty!!!