##################################################################################################################### # This file uses the functions defined in "Functions_Rcpp.cpp" or "Functions_Rbase.r" to perform analyses presented in the manuscript. # "Functions_Rcpp.cpp" are written in C++ language, which can be integrated with R by the 'Rcpp' package (our code also requires the "RcppArmadillo" package). # We recommend to source "Functions_Rcpp.cpp", wihch is at least 10 times faster than sourcing "Functions_Rbase.r". # The first part implements several simulation studies and saves the results for further use. # The second part reproduces figures and tables in the manuscript and the Supplemental Material. ########################################################################################## ### Part I: Conduct simulation studies and save the results for further use ########################################################################################## library(Rcpp) sourceCpp("Functions_Rcpp.cpp") # source("Functions_Rbase.r") ############################# ### An auxiliary function ### ############################# ## Introduction of the function ## # The 'fn_Simlt()' function simulates datasets and applies the general method, the GEI method and the RGEI method to the # generated datasets, for different combinations of parameter setting and sample size. ## Input Variables ## # Setting: A J*7 matrix that specifies J parameter settings, with each row being (\beta_{0},\beta_{e},\beta_{g},\beta_{eg},\pi_{e},\pi_{g},\rho). # Size: A K*2 matrix that specifies K sample sizes, with the first column being the number of controls and the second being the number of cases. # NREP: The number of datasets to be generated for each combination of parameter setting and sample size. # Hypers: The prior hyper-parameters, (\sigma_{e},\sigma_{g},\sigma_{eg},\sigma_{\rho}), for the GEI/RGEI method. Note the GEI method fixes \sigma_{\rho}=0. ## Output ## # A list 'ResList', with ResList[[j]][[k]] being the simulation result for the j-th parameter setting and k-th sample size # ResList[[j]][[k]] is a list itself, containing the following elements: # GEN: a 3-column matrix for the general estimates. The first column is the point estimate, and the last two columns are the 95% CI # GEI: a 3-column matrix for the GEI estimates. The first column is the point estimate, and the last two columns are the 95% CI # RGEI: a 3-column matrix for the RGEI estimates. The first column is the point estimate, and the last two columns are the 95% CI # NumTheta: the number (0,1,or 2) of \theta that 'fit' the generated dataset under the GEI assumption # PropNonGEI: the proportion of mass that the general posterior distribution puts outside the GEI space fn_Simlt <- function(Setting, Size, NREP, Hypers) { J <- nrow(Setting) K <- nrow(Size) ResList <- NULL for(j in 1:J) { Betas <- Setting[j,1:4] prvle <- Setting[j,5] prvlg <- Setting[j,6] rho <- Setting[j,7] Cellprb <- fn_PhiToCellprb(Betas, prvle, prvlg, rho) rctr <- Cellprb$pctr/sum(Cellprb$pctr) rcas <- Cellprb$pcas/sum(Cellprb$pcas) tmpList <- NULL for(k in 1:K) { nctr <- Size[k,1] ncas <- Size[k,2] GEN <- matrix(NA, NREP, 3) GEI <- matrix(NA, NREP, 3) RGEI <- matrix(NA, NREP, 3) NumTheta <- rep(NA, NREP) PropNonGEI <- rep(NA, NREP) for(nlp in 1:NREP) { print(c(j,k,nlp)) dctr <- c(rmultinom(1, nctr, rctr)) dcas <- c(rmultinom(1, ncas, rcas)) NumTheta[nlp] <- length(fn_LambdaToTheta(dctr, dcas, rho)) # General method resGEN <- fn_MainGeneral(dctr, dcas, 10000) PropNonGEI[nlp] <- resGEN$propNonGEI beta3GEN <- resGEN$beta3GEN GEN[nlp, ] <- c(mean(beta3GEN), quantile(beta3GEN, probs=c(0.025, 0.975))) # GEI method resGEI <- fn_MainRGEI(dctr, dcas, 10000, Hypers[1], Hypers[2], Hypers[3], 0) beta3GEI <- with(resGEI, sample(Phi[,4], nrow(Phi), TRUE, Weight)) GEI[nlp, ] <- c(mean(beta3GEI), quantile(beta3GEI, probs=c(0.025, 0.975))) # RGEI method resRGEI <- fn_MainRGEI(dctr, dcas, 10000, Hypers[1], Hypers[2], Hypers[3], Hypers[4]) beta3RGEI <- with(resRGEI, sample(Phi[,4], nrow(Phi), TRUE, Weight)) RGEI[nlp, ] <- c(mean(beta3RGEI), quantile(beta3RGEI, probs=c(0.025, 0.975))) tmpList[[k]] <- list(GEN = GEN, GEI = GEI, RGEI = RGEI, NumTheta = NumTheta, PropNonGEI = PropNonGEI) } ResList[[j]] <- tmpList } } return(ResList) } ############################# ### Simulation Studies ### ############################# # This simulation study concerns seven parameter settings and there sample sizes. # Its results are presented in the Supplemental Material Setting <- rbind( c(log(0.05/0.95), log(1.2), 0 , log(1.5), 0.1, 0.1, 1), c(log(0.05/0.95), log(1.2), 0 , log(1.5), 0.1, 0.3, 1), c(log(0.05/0.95), log(1.2), 0 , log(1.5), 0.3, 0.1, 1), c(log(0.05/0.95), log(1.2), 0 , log(1.5), 0.3, 0.3, 1), c(log(0.05/0.95), log(1.2), log(1.2), log(1.5), 0.1, 0.1, 1), c(log(0.05/0.95), log(1.2), log(1.2), log(1.5), 0.1, 0.3, 1), c(log(0.05/0.95), log(1.2), log(1.2), log(1.5), 0.3, 0.3, 1)) Size <- matrix(rep(c(400,2000,4000), 2), 3, 2) ResList <- fn_Simlt(Setting, Size, NREP=2000, Hypers=c(2.5, 2.5, 3.5, 0.1)) save(ResList, file ="Simlt_AllSettings.RData") # This simulation study focuses on the fourth parameter setting of the previous simulation study, which is the one used in the manuscript. # It also considers three more larger sample sizes (20000/200000/2000000) to adress the asymptotic behavior of the GEI method. # For consistency, the result for the three smaller sample sizes (800/4000/8000) are directly extracted from the previous simulation. load("Simlt_AllSettings.RData") tmp1 <- ResList[[4]] rm(ResList) Setting <- rbind( c(log(0.05/(1-0.05)), log(1.2), 0, log(1.5), 0.3, 0.3, 1) ) Size <- matrix(rep(c(10000,100000,1000000), 2), 3, 2) tmp2 <- fn_Simlt(Setting, Size, NREP=2000, Hypers=c(2.5, 2.5, 3.5, 0.1))[[1]] ResList <- c(tmp1, tmp2) save(ResList, file="Simlt_MainSetting.RData") # This simulation study investigates the impact of the size of \beta_{eg} Setting <- rbind( c(log(0.05/0.95), log(1.2), 0, log(1.0), 0.3, 0.3, 1), c(log(0.05/0.95), log(1.2), 0, log(1.5), 0.3, 0.3, 1), c(log(0.05/0.95), log(1.2), 0, log(3.0), 0.3, 0.3, 1) ) Size <- matrix(rep(c(400,2000,4000), 2), 3, 2) ResList <- fn_Simlt(Setting, Size, NREP=2000, Hypers=c(2.5, 2.5, 3.5, 0.1)) save(ResList, file="Simlt_beta3Strength.RData") # This simulation study compares the general method, the GEI method and the RGEI method when the GEI assumption is violated. Setting <- rbind( c(log(0.05/0.95), log(1.2), 0, log(1.5), 0.3, 0.3, 1.1) ) Size <- matrix(rep(4000, 2), 1, 2) ResList <- fn_Simlt(Setting, Size, NREP=2000, Hypers=c(2.5, 2.5, 3.5, 0.1)) save(ResList, file="Simlt_nonGEI.RData") ########################################################################################## ### Part II: Reproduce figures and tables in the manuscript and the Supplemental Material ########################################################################################## ####################################### ### Tables in the main manuscript ### ####################################### ################ ### Table 2 ### ################ # setwd(Path4RData) load("Simlt_MainSetting.RData") MSE = CPB <- NULL for(k in 1:3) { res <- ResList[[k]] MSE <- with(res, rbind(MSE, c(mean((GEN[,1]-log(1.5))^2), mean((GEI[,1]-log(1.5))^2), mean((RGEI[,1]-log(1.5))^2) ) ) ) CPB <- with(res, rbind(CPB, c(mean(GEN[,2]