################################################################
#
# checking the calibration of the large-sample
# and small-sample intervals in the Kaiser ICU case study
# with a large number of simulation replications,
# to get definitive calibration answers

rm( list = ls( ) ) # this removes everything in the R workspace
                   # so that you can start with a clean slate

n <- 112           # sample size

print( population.theta <- 306 / 8561 )

# 0.03574349

n.null <- 0

n.negative.large.sample <- 0

n.negative.small.sample <- 0

n.hit.large.sample <- 0

n.hit.small.sample <- 0

n.hit.bayesian.beta.1.1 <- 0

n.hit.bayesian.beta.0.5.0.5 <- 0

M.calibration.large.sample <- 1000000

seed <- 6428493

set.seed( seed )

# i ran the code twice, with seed = 1 and seed = 2;
# results are given below

alpha <- 0.05        # confidence level 100 ( 1 - alpha ) %

system.time(

  for ( m in 1:M.calibration.large.sample ) {

    s.star <- rbinom( 1, n, population.theta )

    if ( s.star == 0 ) { 

      n.null <- n.null + 1

    }

    theta.hat <- s.star / n

    se.theta.hat <- sqrt( theta.hat * ( 1 - theta.hat ) / n )

    large.sample.confidence.interval <- 
      c( theta.hat - qnorm( 1 - alpha / 2 ) * se.theta.hat,
      theta.hat + qnorm( 1 - alpha / 2 ) * se.theta.hat )

    if ( large.sample.confidence.interval[ 1 ] < 0 ) {

      n.negative.large.sample <- n.negative.large.sample + 1

    }

    if ( ( large.sample.confidence.interval[ 1 ] <
      population.theta ) & ( large.sample.confidence.interval[ 2 ] >
      population.theta ) ) {

      n.hit.large.sample <- n.hit.large.sample + 1

    }

    theta.hat.prime <- ( s.star + 0.5 ) / ( n + 1 )

    eta.hat <- log( theta.hat.prime / ( 1 - theta.hat.prime ) )

    se.eta.hat <- sqrt( 1 / ( n * theta.hat.prime * 
      ( 1 - theta.hat.prime ) ) )

    eta.interval <- c( eta.hat - qnorm( 1 - alpha / 2 ) * se.eta.hat,
      eta.hat + qnorm( 1 - alpha / 2 ) * se.eta.hat )

    small.sample.confidence.interval <- 1 / 
      ( 1 + exp( - eta.interval ) )

    if ( small.sample.confidence.interval[ 1 ] < 0 ) {

      n.negative.small.sample <- n.negative.small.sample + 1

    }

    if ( ( small.sample.confidence.interval[ 1 ] <
      population.theta ) & ( small.sample.confidence.interval[ 2 ] >
      population.theta ) ) {

      n.hit.small.sample <- n.hit.small.sample + 1

    }

    bayesian.interval.beta.1.1 <- c( qbeta( 0.025, 1 + s.star,
      1 + n - s.star ), qbeta( 0.975, 1 + s.star, 1 + n - s.star ) )

    if ( ( bayesian.interval.beta.1.1[ 1 ] <
      population.theta ) & ( bayesian.interval.beta.1.1[ 2 ] >
      population.theta ) ) {

      n.hit.bayesian.beta.1.1 <- n.hit.bayesian.beta.1.1 + 1

    }

    bayesian.interval.beta.0.5.0.5 <- c( qbeta( 0.025, 0.5 + s.star,
      0.5 + n - s.star ), qbeta( 0.975, 0.5 + s.star, 0.5 + n - s.star ) )

    if ( ( bayesian.interval.beta.0.5.0.5[ 1 ] <
      population.theta ) & ( bayesian.interval.beta.0.5.0.5[ 2 ] >
      population.theta ) ) {

      n.hit.bayesian.beta.0.5.0.5 <- n.hit.bayesian.beta.0.5.0.5 + 1

    }

  }

)

#  user  system elapsed 
# 29.36    0.00   29.38 

# on my laptop 1,000,000 simulation iterations takes
# about 30 seconds of clock time

print( paste( 'seed =', seed, '; proportion of all-0 data sets =', 
  signif( n.null / M.calibration.large.sample, digits = 4 ) ) )

# "seed = 1 ; proportion of all-0 data sets = 0.01685"
# "seed = 2 ; proportion of all-0 data sets = 0.01698"

dbinom( 0, n, population.theta )

# 0.01696559

( 1 - population.theta )^n

# 0.01696559

print( paste( 'seed =', seed, 
  '; proportion of large-sample negative intervals =', 
  signif( n.negative.large.sample / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; proportion of large-sample negative intervals = 0.412"
# "seed = 2 ; proportion of large-sample negative intervals = 0.4118"

print( paste( 'seed =', seed, '; large-sample interval hit rate =', 
  signif( n.hit.large.sample / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; large-sample interval hit rate = 0.9058"
# "seed = 2 ; large-sample interval hit rate = 0.9056"

print( paste( 'seed =', seed, 
  '; proportion of small-sample negative intervals =', 
  signif( n.negative.small.sample / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; proportion of small-sample negative intervals = 0"
# "seed = 2 ; proportion of small-sample negative intervals = 0"

print( paste( 'seed =', seed, '; small-sample interval hit rate =', 
  signif( n.hit.small.sample / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; small-sample interval hit rate = 0.9521"
# "seed = 2 ; small-sample interval hit rate = 0.9518"

print( paste( 'seed =', seed, '; Bayesian interval ( 1, 1 ) hit rate =', 
  signif( n.hit.bayesian.beta.1.1 / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; Bayesian interval ( 1, 1 ) hit rate = 0.9348"
# "seed = 2 ; Bayesian interval ( 1, 1 ) hit rate = 0.9349"

print( paste( 'seed =', seed, '; Bayesian interval ( 0.5, 0.5 ) hit rate =', 
  signif( n.hit.bayesian.beta.0.5.0.5 / M.calibration.large.sample, 
  digits = 4 ) ) )

# "seed = 1 ; Bayesian interval ( 0.5, 0.5 ) hit rate = 0.9637"
# "seed = 2 ; Bayesian interval ( 0.5, 0.5 ) hit rate = 0.9637"