Create the Confounded Data Set and Save.
# eval=FALSE so the data is not over-written on re-running the script
df <- read.csv("~/teach/550/www/lab.csv")
df$LabTest <- as.integer(as.character(df$LabTest))
df <- df[!is.na(df$LabTest), ]
labTest <- df[!duplicated(df$ID), "LabTest"]
wts <- sqrt(labTest)/sum(sqrt(labTest))
lab2 <- sample(labTest, 200, prob = labTest, replace = TRUE)
lab1 <- sample(labTest, 200, replace = TRUE)
l1 <- 2 - lab1/8
l2 <- 5 - lab2/8
p1 <- 1/(1 + exp(-l1))
p2 <- 1/(1 + exp(-l2))
ys <- runif(400) < c(p1, p2)
gp <- rep(1:2, rep(200, 2))
df <- data.frame(dvt = yes.no(ys), Trt = factor(gp, labels = c("Long", "Short")),
Test = c(lab1, lab2))
save(df, file = "DVT.covar.rda")
## Warning: coercing argument of type 'double' to logical
## Yes 95% CI
## Long 7/200( 3.5%) 1.4%-7.1%
## Short 14/200( 7.0%) 3.9%-11.5%
## Estimated difference (Short-Long): 3.5%
## 95% Limits for Difference: -0.858% to 7.86%
## Fisher's exact (2-sided): P=0.177
## Uncorrected Chi-square: P=0.117
## Difference S.E.(Diff) t-value P-value Pooled s.d. d.f.
## -6.630 1.920 -3.460 0.001 19.200 398.000
## C.I. Width CI. (lower) CI. (upper)
## 3.770 -10.400 -2.870
##
## Call:
## glm(formula = dvt ~ Trt + Test, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4135 -0.1099 -0.0259 -0.0133 2.8233
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.7376 1.2865 3.68 0.00023 ***
## TrtShort 2.0424 0.6705 3.05 0.00232 **
## Test -0.1679 0.0314 -5.35 9e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 164.649 on 399 degrees of freedom
## Residual deviance: 69.667 on 397 degrees of freedom
## AIC: 75.67
##
## Number of Fisher Scoring iterations: 8
## Estimate Std. Error Pr(>|z|) 2.5 % 97.5 %
## (Intercept) 114.15767 1.286516 2.3097e-04 9.17108 1420.98577
## TrtShort 7.70882 0.670477 2.3180e-03 2.07146 28.68790
## Test 0.84542 0.031409 8.9894e-08 0.79495 0.89911