################################################################ # # checking the calibration of the large-sample # and small-sample intervals in the Kaiser ICU case study # with a large number of simulation replications, # to get definitive calibration answers rm( list = ls( ) ) # this removes everything in the R workspace # so that you can start with a clean slate n <- 112 # sample size print( population.theta <- 306 / 8561 ) # 0.03574349 n.null <- 0 n.negative.large.sample <- 0 n.negative.small.sample <- 0 n.hit.large.sample <- 0 n.hit.small.sample <- 0 n.hit.bayesian.beta.1.1 <- 0 n.hit.bayesian.beta.0.5.0.5 <- 0 M.calibration.large.sample <- 1000000 seed <- 6428493 set.seed( seed ) # i ran the code twice, with seed = 1 and seed = 2; # results are given below alpha <- 0.05 # confidence level 100 ( 1 - alpha ) % system.time( for ( m in 1:M.calibration.large.sample ) { s.star <- rbinom( 1, n, population.theta ) if ( s.star == 0 ) { n.null <- n.null + 1 } theta.hat <- s.star / n se.theta.hat <- sqrt( theta.hat * ( 1 - theta.hat ) / n ) large.sample.confidence.interval <- c( theta.hat - qnorm( 1 - alpha / 2 ) * se.theta.hat, theta.hat + qnorm( 1 - alpha / 2 ) * se.theta.hat ) if ( large.sample.confidence.interval[ 1 ] < 0 ) { n.negative.large.sample <- n.negative.large.sample + 1 } if ( ( large.sample.confidence.interval[ 1 ] < population.theta ) & ( large.sample.confidence.interval[ 2 ] > population.theta ) ) { n.hit.large.sample <- n.hit.large.sample + 1 } theta.hat.prime <- ( s.star + 0.5 ) / ( n + 1 ) eta.hat <- log( theta.hat.prime / ( 1 - theta.hat.prime ) ) se.eta.hat <- sqrt( 1 / ( n * theta.hat.prime * ( 1 - theta.hat.prime ) ) ) eta.interval <- c( eta.hat - qnorm( 1 - alpha / 2 ) * se.eta.hat, eta.hat + qnorm( 1 - alpha / 2 ) * se.eta.hat ) small.sample.confidence.interval <- 1 / ( 1 + exp( - eta.interval ) ) if ( small.sample.confidence.interval[ 1 ] < 0 ) { n.negative.small.sample <- n.negative.small.sample + 1 } if ( ( small.sample.confidence.interval[ 1 ] < population.theta ) & ( small.sample.confidence.interval[ 2 ] > population.theta ) ) { n.hit.small.sample <- n.hit.small.sample + 1 } bayesian.interval.beta.1.1 <- c( qbeta( 0.025, 1 + s.star, 1 + n - s.star ), qbeta( 0.975, 1 + s.star, 1 + n - s.star ) ) if ( ( bayesian.interval.beta.1.1[ 1 ] < population.theta ) & ( bayesian.interval.beta.1.1[ 2 ] > population.theta ) ) { n.hit.bayesian.beta.1.1 <- n.hit.bayesian.beta.1.1 + 1 } bayesian.interval.beta.0.5.0.5 <- c( qbeta( 0.025, 0.5 + s.star, 0.5 + n - s.star ), qbeta( 0.975, 0.5 + s.star, 0.5 + n - s.star ) ) if ( ( bayesian.interval.beta.0.5.0.5[ 1 ] < population.theta ) & ( bayesian.interval.beta.0.5.0.5[ 2 ] > population.theta ) ) { n.hit.bayesian.beta.0.5.0.5 <- n.hit.bayesian.beta.0.5.0.5 + 1 } } ) # user system elapsed # 29.36 0.00 29.38 # on my laptop 1,000,000 simulation iterations takes # about 30 seconds of clock time print( paste( 'seed =', seed, '; proportion of all-0 data sets =', signif( n.null / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; proportion of all-0 data sets = 0.01685" # "seed = 2 ; proportion of all-0 data sets = 0.01698" dbinom( 0, n, population.theta ) # 0.01696559 ( 1 - population.theta )^n # 0.01696559 print( paste( 'seed =', seed, '; proportion of large-sample negative intervals =', signif( n.negative.large.sample / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; proportion of large-sample negative intervals = 0.412" # "seed = 2 ; proportion of large-sample negative intervals = 0.4118" print( paste( 'seed =', seed, '; large-sample interval hit rate =', signif( n.hit.large.sample / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; large-sample interval hit rate = 0.9058" # "seed = 2 ; large-sample interval hit rate = 0.9056" print( paste( 'seed =', seed, '; proportion of small-sample negative intervals =', signif( n.negative.small.sample / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; proportion of small-sample negative intervals = 0" # "seed = 2 ; proportion of small-sample negative intervals = 0" print( paste( 'seed =', seed, '; small-sample interval hit rate =', signif( n.hit.small.sample / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; small-sample interval hit rate = 0.9521" # "seed = 2 ; small-sample interval hit rate = 0.9518" print( paste( 'seed =', seed, '; Bayesian interval ( 1, 1 ) hit rate =', signif( n.hit.bayesian.beta.1.1 / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; Bayesian interval ( 1, 1 ) hit rate = 0.9348" # "seed = 2 ; Bayesian interval ( 1, 1 ) hit rate = 0.9349" print( paste( 'seed =', seed, '; Bayesian interval ( 0.5, 0.5 ) hit rate =', signif( n.hit.bayesian.beta.0.5.0.5 / M.calibration.large.sample, digits = 4 ) ) ) # "seed = 1 ; Bayesian interval ( 0.5, 0.5 ) hit rate = 0.9637" # "seed = 2 ; Bayesian interval ( 0.5, 0.5 ) hit rate = 0.9637"