####################### # auxillary functions # ####################### expit <- function(x) {1/(1+exp(-x))} logit <- function(p) {log(p)-log(1-p)} ### equal-tailed interval from weighted MC sample eqtailed <- function(smp,wht,lvl=.95) { ndx <- order(smp) tmp.vl <- smp[ndx] tmp.wt <- wht[ndx] tmp.cm <- cumsum(tmp.wt) c(max(tmp.vl[tmp.cm<((1-lvl)/2)]), min(tmp.vl[tmp.cm>1-(1-lvl)/2])) } ################################ # tabular sensitivity analysis # # for unobserved confounding # ################################ sens.tsa <- function(dta, a.u=0, a.xu=0, a.yu=0) { ### MLE for fixed bias parameters ### dta is vector of cell counts, ### order (x,y)=(0,0),(0,1),(1,0),(1,1) ### bias parameters as per ### logit P(U=1|X,Y) = a.u + a.xu*X + a.yu*Y ### output is ML estimates of other 4 params in the log-linear ### model for the counts in the X,Y,U table (no 3-way term) ### in order beta0, betax, betay, betaxy ### expected counts i <- 1; edta <- NULL for (xvl in c(0,1)) { for (yvl in c(0,1)) { tmp <- expit(a.u + a.xu*xvl + a.yu*yvl) edta <- c(edta, dta[i]*c(1-tmp,tmp)) i <- i+1 }} ### maximization (i.e., m-step) x.des <- c(rep(0,4),rep(1,4)) y.des <- rep(c(0,0,1,1),2) u.des <- rep(c(0,1),4) tmp <- glm(edta~x.des+y.des+x.des:y.des, offset=a.u*u.des + a.xu*x.des*u.des + a.yu*y.des*u.des, family=poisson) ### return point estimate and 95% interval coef(tmp)[4]+ c(0,sqrt(summary(tmp)$cov.unscaled[4,4])*1.96*c(-1,1)) } #################################### # Monte Carlo sensitivity analysis # # for unobserved confounding # #################################### sens.mcsa <- function(dta, arng.u=c(0,0), arng.xu=c(0,0), arng.yu=c(0,0), PRISHP="unif", NREP=1000) { ### arng.? gives range of bias params a.u, a.xu, a.yu ### supports PRISHP="unif" on these ranges, or ### PRISHP="norm" - with 95% central interval matching unif ### dta is vector of cell counts, ### order (x,y)=(0,0),(0,1),(1,0),(1,1) ### bias parameters as per ### logit P(U=1|X,Y) = a.u + a.xu*X + a.yu*Y ### output is samsple of 7 parameters in the log-linear ### model for the counts in the X,Y,U table (no 3-way term) ### also gives weights to convert MCSA to Bayes ### cols will be 7 parameters plus weights ans <- matrix(NA, NREP, 8) for (mnlp in 1:NREP) { lwht <- 0 ### sample bias params if (PRISHP=="unif") { a.u <- runif(1,arng.u[1],arng.u[2]) a.xu <- runif(1,arng.xu[1],arng.xu[2]) a.yu <- runif(1,arng.yu[1],arng.yu[2]) } if (PRISHP=="norm") { mn <- sum(arng.u)/2 sd <- .95*(arng.u[2]-arng.u[1])/(2*1.96) a.u <- rnorm(1,mn,sd) mn <- sum(arng.xu)/2 sd <- .95*(arng.xu[2]-arng.xu[1])/(2*1.96) a.xu <- rnorm(1,mn,sd) mn <- sum(arng.yu)/2 sd <- .95*(arng.yu[2]-arng.yu[1])/(2*1.96) a.yu <- rnorm(1,mn,sd) } ### sample unobserved confounders i <- 1; edta <- NULL; lw.pro <- 0 for (xvl in c(0,1)) { for (yvl in c(0,1)) { tmp <- rbinom(1, size=dta[i], expit(a.u + a.xu*xvl + a.yu*yvl)) lw.pro <- lw.pro + log(dbinom(tmp,size=dta[i], expit(a.u + a.xu*xvl + a.yu*yvl))) edta <- c(edta, dta[i]-tmp, tmp) i <- i+1 }} ### sample beta parameters x.des <- c(rep(0,4),rep(1,4)) y.des <- rep(c(0,0,1,1),2) u.des <- rep(c(0,1),4) tmp <- glm(edta~x.des+y.des+x.des:y.des, offset=a.u*u.des+a.xu*x.des*u.des+a.yu*y.des*u.des, family=poisson) beta <- coef(tmp) + t(chol(summary(tmp)$cov.unscaled)) %*% rnorm(4) ### importance weights mn.bt <- coef(tmp) pr.bt <- solve(summary(tmp)$cov.unscaled) lw.pro <- lw.pro + sum(log(diag(chol(pr.bt)))) - .5*t(beta-mn.bt)%*%pr.bt%*%(beta-mn.bt) ### weight calculation for target tmp <- exp( cbind(1,x.des,y.des,x.des*y.des, u.des,x.des*u.des,y.des*u.des) %*% c(beta,a.u,a.xu,a.yu)) lw.trg <- sum(log(dpois(edta,tmp))) + sum(log(dnorm(beta, rep(0,4), rep(10^2,4)))) ### output for the present iteration ans[mnlp,] <- c(a.u, a.xu, a.yu, beta, lw.trg-lw.pro) } ### convert to importance weight scale ans[,8] <- exp(ans[,8]-max(ans[,8])) ans[,8] <- ans[,8]/sum(ans[,8]) ### output, rows correspond to replicates ans } # examples (commented out) implementing analysis in paper # dta <- c(88, 20, 555, 347) # ft <- sens.tsa(dta, a.u=-2, a.xu=-1, a.yu=-1) ### warnings about non-integer Poisson variable can be ignored, ### as discussed in paper # print(exp(ft)) # set.seed(13) # # ft <- sens.mcsa(dta, # arng.u=c(-2,-1), arng.xu=c(-1,1), arng.yu=c(-1,1), # NREP=50000) ### mcsa (ingores weights in 8th col) #print(exp(c(mean(ft[,7]),quantile(ft[,7],c(.025,.975))))) ### Bayes (uses weights) #print(exp(c(sum(ft[,7]*ft[,8]), eqtailed(ft[,7],ft[,8])))) #ft <- sens.mcsa(dta, # arng.u=c(-2,-1), arng.xu=c(-1,1), arng.yu=c(-1,1), # PRISHP="norm",NREP=50000) ### mcsa (ingores weights in 8th col) #print(exp(c(mean(ft[,7]),quantile(ft[,7],c(.025,.975))))) ### Bayes (uses weights) #print(exp(c(sum(ft[,7]*ft[,8]), eqtailed(ft[,7],ft[,8]))))