##### DatSimult.r ##### ## This function simulates NREP datasets over NSCEN scenarios (NREP=400, NSCEN=4 by default) ## Input variables: ## #n_cn.cmp: size of validation data in the control group #n_ca.cmp: size of validatation data in the case group #n_cn.inc: size of incomplete main data in the control group #n_ca.inc: size of incomplete main data in the case group #r_cn: true exposure prevalence in the control group #Logodd: true log odds ratio #sn_cn: sensitivity in the control group given nondiff misclassification (i.e. in scenarios 1) #sp_cn: specificity in the control group given nondiff misclassification (i.e. in scenarios 1) #sn_ca: sensitivity in the case group given nondiff misclassification (i.e. in scenarios 1) #sp_ca: specificity in the case group given nondiff misclassification (i.e. in scenarios 1) #NSCEN: nubmer of scenarios #NREP: number of datasets to be simulated in each scenario #update: from which group is differential misclassification introduced (case or control) #step.n: change of SN_i from the immediate previous scenario in the 'updated' group #step.p: change of SP_i from the immediate previous scenario in the 'updated' group ## Output variable: ## #my.dat: a 3-dim array containing simulated datasets (NREP datasets over NSCEN scenarios) DatSimult<-function(n_cn.cmp=80,n_ca.cmp=80,n_cn.inc=400,n_ca.inc=400,r_cn=0.04,Logodd=log(1.8),sn_cn=0.8,sp_cn=0.9,sn_ca=0.8,sp_ca=0.9,NSCEN=4,NREP=400,step.n=-0.05,step.p=-0.05,update="ca") { if (update=="ca") { my.dat<-array(NA,dim=c(6,2*NREP,NSCEN),dimnames=list(c("cnexp1","cnexp0","caexp1","caexp0","cntr","case"), rep(c("appexp1","appexp0"),NREP),NULL)) for (SCEN in (1:NSCEN)) { n_cn.tot <- n_cn.cmp+n_cn.inc n_ca.tot <- n_ca.cmp+n_ca.inc r_ca <- 1 / ( 1 + (1/exp(Logodd)) * (1-r_cn) / r_cn ) sn_ca <- sn_cn + (SCEN-1) * step.n sp_ca <- sp_cn + (SCEN-1) * step.p for (i in 1:NREP) { exps0 <- rbinom(n_cn.cmp+n_cn.inc, size=1, prob=r_cn) appr0 <- exps0*rbinom(n_cn.tot, size=1, prob=sn_cn) + (1-exps0)*rbinom(n_cn.tot, size=1, prob=1-sp_cn) cntr.cmp <- table(1-exps0[1:n_cn.cmp], 1-appr0[1:n_cn.cmp],dnn=c("1-exps","1-appr")) #table(A,B):levels of factor A indicate rows while levels of factor B indicate columns #1st col vs 2nd col:# of appr. exposure=1 vs # of appr. exposure=0 #1st row vs 2nd row:# of true exposure=1 vs # of true exposure=0 if ((length(cntr.cmp)==2) & (sum(exps0[1:n_cn.cmp])==0)) {cntr.cmp<-rbind(c(0,0),cntr.cmp)} if ((length(cntr.cmp)==2) & (sum(exps0[1:n_cn.cmp])==n_cn.cmp)) {cntr.cmp<-rbind(cntr.cmp,c(0,0))} if (length(cntr.cmp)==2 & sum(appr0[1:n_cn.cmp])==0) {cntr.cmp <- cbind(c(0,0),cntr.cmp)} if (length(cntr.cmp)==2 & sum(appr0[1:n_cn.cmp])==n_cn.cmp) {cntr.cmp <-cbind(cntr.cmp,c(0,0))} ### should consider cases with length(cntr.cmp)==1 cntr.sum <- table(1-appr0[(n_cn.cmp+1):n_cn.tot],dnn=c("1-appr")) #1st vs 2nd element: # of appr. exposure=1 vs # of appr. exposure=0 if (length(cntr.sum)==1 & sum(appr0[(n_cn.cmp+1):n_cn.tot]==0)) {cntr.sum <- c(0,cntr.sum)} if (length(cntr.sum)==1 & sum(appr0[(n_cn.cmp+1):n_cn.tot]==n_cn.inc)) {cntr.sum <- c(cntr.sum,0)} # simulate the case table exps1 <- rbinom(n_ca.cmp+n_ca.inc, size=1, prob=r_ca) appr1 <- exps1*rbinom(n_ca.tot, size=1, prob=sn_ca) + (1-exps1)*rbinom(n_ca.tot, size=1, prob=1-sp_ca) case.cmp <- table(1-exps1[1:n_ca.cmp], 1-appr1[1:n_ca.cmp],dnn=c("1-exps","1-appr")) #table(A,B):levels of factor A indicate rows while levels of factor B indicate columns #1st col vs 2nd col:# of appr. exposure=1 vs # of appr. exposure=0 #1st row vs 2nd row:# of true exposure=1 vs # of true exposure=0 if (length(case.cmp)==2 & sum(exps1[1:n_ca.cmp])==0) {case.cmp<-rbind(c(0,0),case.cmp)} if (length(case.cmp)==2 & sum(exps1[1:n_ca.cmp])==n_ca.cmp) {case.cmp<-rbind(case.cmp,c(0,0))} if (length(case.cmp)==2 & sum(appr1[1:n_ca.cmp])==0) {case.cmp <- cbind(c(0,0),case.cmp)} if (length(case.cmp)==2 & sum(appr1[1:n_ca.cmp])==n_ca.cmp) {case.cmp <-cbind(case.cmp,c(0,0))} ### should consider cases with length(casmp)==1 case.sum <- table(1-appr1[(n_ca.cmp+1):n_ca.tot],dnn=c("1-appr")) #1st vs 2nd element: # of appr. exposure=1 vs # of appr. exposure=0 if (length(case.sum)==1 & sum(appr1[(n_ca.cmp+1):n_ca.tot]==0)) {case.sum <- c(0,case.sum)} if (length(case.sum)==1 & sum(appr1[(n_ca.cmp+1):n_ca.tot]==n_ca.inc)) {case.sum <- c(case.sum,0)} my.dat[,(2*i-1):(2*i),SCEN]<-rbind(cntr.cmp,case.cmp,cntr.sum,case.sum) } # end i } # end SCEN } # end update=ca ################################################################## if (update=="cntr") { my.dat<-array(NA,dim=c(6,2*NREP,NSCEN),dimnames=list(c("cnexp1","cnexp0","caexp1","caexp0","cntr","case"), rep(c("appexp1","appexp0"),NREP),NULL)) for (SCEN in (1:NSCEN)) { n_cn.tot <- n_cn.cmp+n_cn.inc n_ca.tot <- n_ca.cmp+n_ca.inc r_ca <- 1 / ( 1 + (1/exp(Logodd)) * (1-r_cn) / r_cn ) sn_cn <- sn_ca + (SCEN-1) * step.n sp_cn <- sp_ca + (SCEN-1) * step.p print(c(r_cn, r_ca, sn_cn, sp_cn, sn_ca, sp_ca)) for (i in 1:NREP) { exps0 <- rbinom(n_cn.cmp+n_cn.inc, size=1, prob=r_cn) appr0 <- exps0*rbinom(n_cn.tot, size=1, prob=sn_cn) + (1-exps0)*rbinom(n_cn.tot, size=1, prob=1-sp_cn) cntr.cmp <- table(1-exps0[1:n_cn.cmp], 1-appr0[1:n_cn.cmp],dnn=c("1-exps","1-appr")) #table(A,B):levels of factor A indicate rows while levels of factor B indicate columns #1st col vs 2nd col:# of appr. exposure=1 vs # of appr. exposure=0 #1st row vs 2nd row:# of true exposure=1 vs # of true exposure=0 if ((length(cntr.cmp)==2) & (sum(exps0[1:n_cn.cmp])==0)) {cntr.cmp<-rbind(c(0,0),cntr.cmp)} if ((length(cntr.cmp)==2) & (sum(exps0[1:n_cn.cmp])==n_cn.cmp)) {cntr.cmp<-rbind(cntr.cmp,c(0,0))} if (length(cntr.cmp)==2 & sum(appr0[1:n_cn.cmp])==0) {cntr.cmp <- cbind(c(0,0),cntr.cmp)} if (length(cntr.cmp)==2 & sum(appr0[1:n_cn.cmp])==n_cn.cmp) {cntr.cmp <-cbind(cntr.cmp,c(0,0))} ### should consider cases with length(cntr.cmp)==1 cntr.sum <- table(1-appr0[(n_cn.cmp+1):n_cn.tot],dnn=c("1-appr")) #1st vs 2nd element: # of appr. exposure=1 vs # of appr. exposure=0 if (length(cntr.sum)==1 & sum(appr0[(n_cn.cmp+1):n_cn.tot]==0)) {cntr.sum <- c(0,cntr.sum)} if (length(cntr.sum)==1 & sum(appr0[(n_cn.cmp+1):n_cn.tot]==n_cn.inc)) {cntr.sum <- c(cntr.sum,0)} # simulate the case table exps1 <- rbinom(n_ca.cmp+n_ca.inc, size=1, prob=r_ca) appr1 <- exps1*rbinom(n_ca.tot, size=1, prob=sn_ca) + (1-exps1)*rbinom(n_ca.tot, size=1, prob=1-sp_ca) case.cmp <- table(1-exps1[1:n_ca.cmp], 1-appr1[1:n_ca.cmp],dnn=c("1-exps","1-appr")) #table(A,B):levels of factor A indicate rows while levels of factor B indicate columns #1st col vs 2nd col:# of appr. exposure=1 vs # of appr. exposure=0 #1st row vs 2nd row:# of true exposure=1 vs # of true exposure=0 if (length(case.cmp)==2 & sum(exps1[1:n_ca.cmp])==0) {case.cmp<-rbind(c(0,0),case.cmp)} if (length(case.cmp)==2 & sum(exps1[1:n_ca.cmp])==n_ca.cmp) {case.cmp<-rbind(case.cmp,c(0,0))} if (length(case.cmp)==2 & sum(appr1[1:n_ca.cmp])==0) {case.cmp <- cbind(c(0,0),case.cmp)} if (length(case.cmp)==2 & sum(appr1[1:n_ca.cmp])==n_ca.cmp) {case.cmp <-cbind(case.cmp,c(0,0))} ### should consider cases with length(casmp)==1 case.sum <- table(1-appr1[(n_ca.cmp+1):n_ca.tot],dnn=c("1-appr")) #1st vs 2nd element: # of appr. exposure=1 vs # of appr. exposure=0 if (length(case.sum)==1 & sum(appr1[(n_ca.cmp+1):n_ca.tot]==0)) {case.sum <- c(0,case.sum)} if (length(case.sum)==1 & sum(appr1[(n_ca.cmp+1):n_ca.tot]==n_ca.inc)) {case.sum <- c(case.sum,0)} my.dat[,(2*i-1):(2*i),SCEN]<-rbind(cntr.cmp,case.cmp,cntr.sum,case.sum) } # end i } # end SCEN } #end update=cntr return(my.dat) }