#######################
# auxillary functions #
#######################

expit <- function(x) {1/(1+exp(-x))}
logit <- function(p) {log(p)-log(1-p)}
### equal-tailed interval from weighted MC sample
eqtailed <- function(smp,wht,lvl=.95) {
  ndx <- order(smp)
  tmp.vl <- smp[ndx]
  tmp.wt <- wht[ndx]
  tmp.cm <- cumsum(tmp.wt)
  c(max(tmp.vl[tmp.cm<((1-lvl)/2)]),
    min(tmp.vl[tmp.cm>1-(1-lvl)/2]))
}    

################################
# tabular sensitivity analysis #
# for unobserved confounding   #
################################

sens.tsa <- function(dta, a.u=0, a.xu=0, a.yu=0) {

  ### MLE for fixed bias parameters
  ### dta is vector of cell counts,
  ### order (x,y)=(0,0),(0,1),(1,0),(1,1)
  ### bias parameters as per
  ### logit P(U=1|X,Y) = a.u + a.xu*X + a.yu*Y
  ### output is ML estimates of other 4 params in the log-linear
  ### model for the counts in the X,Y,U table (no 3-way term)
  ### in order beta0, betax, betay, betaxy

  ### expected counts
  i <- 1; edta <- NULL
  for (xvl in c(0,1)) { for (yvl in c(0,1)) {
    tmp <- expit(a.u + a.xu*xvl + a.yu*yvl)
    edta <- c(edta, dta[i]*c(1-tmp,tmp))
    i <- i+1
  }}

  ### maximization (i.e., m-step)
  x.des <- c(rep(0,4),rep(1,4))
  y.des <- rep(c(0,0,1,1),2)
  u.des <- rep(c(0,1),4)
  tmp <- glm(edta~x.des+y.des+x.des:y.des,
         offset=a.u*u.des + a.xu*x.des*u.des + a.yu*y.des*u.des,
         family=poisson)

  ### return point estimate and 95% interval
  coef(tmp)[4]+
  c(0,sqrt(summary(tmp)$cov.unscaled[4,4])*1.96*c(-1,1))

}

####################################
# Monte Carlo sensitivity analysis #
# for unobserved confounding       #
####################################
sens.mcsa <- function(dta, 
                  arng.u=c(0,0), arng.xu=c(0,0), arng.yu=c(0,0),
                  PRISHP="unif", NREP=1000) {

  ### arng.? gives range of bias params a.u, a.xu, a.yu
  ### supports PRISHP="unif" on these ranges, or 
  ### PRISHP="norm" - with 95% central interval matching unif
  ### dta is vector of cell counts,
  ### order (x,y)=(0,0),(0,1),(1,0),(1,1)
  ### bias parameters as per
  ### logit P(U=1|X,Y) = a.u + a.xu*X + a.yu*Y
  ### output is samsple of 7 parameters in the log-linear
  ### model for the counts in the X,Y,U table (no 3-way term)
  ### also gives weights to convert MCSA to Bayes

  ###  cols will be 7 parameters plus weights
  ans <- matrix(NA, NREP, 8)

  for (mnlp in 1:NREP) {
    lwht <- 0

    ### sample bias params 
    if (PRISHP=="unif") {
      a.u <- runif(1,arng.u[1],arng.u[2])
      a.xu <- runif(1,arng.xu[1],arng.xu[2])
      a.yu <- runif(1,arng.yu[1],arng.yu[2])
    }
    if (PRISHP=="norm") {
      mn <- sum(arng.u)/2
      sd <- .95*(arng.u[2]-arng.u[1])/(2*1.96)
      a.u <- rnorm(1,mn,sd)

      mn <- sum(arng.xu)/2
      sd <- .95*(arng.xu[2]-arng.xu[1])/(2*1.96)
      a.xu <- rnorm(1,mn,sd)

      mn <- sum(arng.yu)/2
      sd <- .95*(arng.yu[2]-arng.yu[1])/(2*1.96)
      a.yu <- rnorm(1,mn,sd)
    }

    ### sample unobserved confounders
    i <- 1; edta <- NULL; lw.pro <- 0
    for (xvl in c(0,1)) { for (yvl in c(0,1)) {
      tmp <- rbinom(1, size=dta[i], 
                    expit(a.u + a.xu*xvl + a.yu*yvl))
      lw.pro <- lw.pro + 
        log(dbinom(tmp,size=dta[i], 
                   expit(a.u + a.xu*xvl + a.yu*yvl)))
      edta <- c(edta, dta[i]-tmp, tmp)
      i <- i+1
    }}

    ### sample beta parameters 
    x.des <- c(rep(0,4),rep(1,4))
    y.des <- rep(c(0,0,1,1),2)
    u.des <- rep(c(0,1),4)
    tmp <- glm(edta~x.des+y.des+x.des:y.des,
           offset=a.u*u.des+a.xu*x.des*u.des+a.yu*y.des*u.des,
           family=poisson)
    beta <- coef(tmp) +
            t(chol(summary(tmp)$cov.unscaled)) %*% rnorm(4)
   
    ### importance weights
    mn.bt <- coef(tmp)
    pr.bt <- solve(summary(tmp)$cov.unscaled)
    lw.pro <- lw.pro + sum(log(diag(chol(pr.bt)))) -
              .5*t(beta-mn.bt)%*%pr.bt%*%(beta-mn.bt)

    ### weight calculation for target
    tmp <- exp(
      cbind(1,x.des,y.des,x.des*y.des,
            u.des,x.des*u.des,y.des*u.des) %*%
            c(beta,a.u,a.xu,a.yu))
   lw.trg <- sum(log(dpois(edta,tmp))) +
             sum(log(dnorm(beta, rep(0,4), rep(10^2,4))))

    ### output for the present iteration
    ans[mnlp,] <- c(a.u, a.xu, a.yu, beta, lw.trg-lw.pro)
  }

  ### convert to importance weight scale
  ans[,8] <- exp(ans[,8]-max(ans[,8]))
  ans[,8] <- ans[,8]/sum(ans[,8])

  ### output, rows correspond to replicates
  ans
}



# examples (commented out) implementing analysis in paper

# dta <- c(88, 20, 555, 347)

# ft <- sens.tsa(dta, a.u=-2, a.xu=-1, a.yu=-1)
### warnings about non-integer Poisson variable can be ignored,
### as discussed in paper
# print(exp(ft))

# set.seed(13)
#
# ft <- sens.mcsa(dta,
#            arng.u=c(-2,-1), arng.xu=c(-1,1), arng.yu=c(-1,1),
#            NREP=50000)

### mcsa (ingores weights in 8th col)
#print(exp(c(mean(ft[,7]),quantile(ft[,7],c(.025,.975)))))
### Bayes (uses weights)
#print(exp(c(sum(ft[,7]*ft[,8]), eqtailed(ft[,7],ft[,8]))))


#ft <- sens.mcsa(dta,
#            arng.u=c(-2,-1), arng.xu=c(-1,1), arng.yu=c(-1,1),
#            PRISHP="norm",NREP=50000)

### mcsa (ingores weights in 8th col)
#print(exp(c(mean(ft[,7]),quantile(ft[,7],c(.025,.975)))))
### Bayes (uses weights)
#print(exp(c(sum(ft[,7]*ft[,8]), eqtailed(ft[,7],ft[,8]))))