# gaussian analysis of nb10 data

scaled.inverse.chisq.density <- 
  function( nu.0, sigma.0.squared, sigma.squared ) {

  log.density <- ( nu.0 / 2 ) * log( nu.0 / 2 ) - lgamma( nu.0 / 2 ) +
    ( nu.0 / 2 ) * log( sigma.0.squared ) - ( 1 + nu.0 / 2 ) *
    log( sigma.squared ) - nu.0 * sigma.0.squared / 
    ( 2 * sigma.squared )

  return( exp( log.density ) )

}

y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
        398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 
        399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 
        410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 
        402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 
        411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 
        404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 
        401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 
        415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 
        403., 404. )

print( n <- length( y ) )

100

print( y.bar <- mean( y ) )

404.59

print( s <- sd( y ) )

6.466846

# reminder of small-sample-exact inference for sigma

# 100 ( 1 - alpha )% small-sample exact
# confidence interval for sigma in the Gaussian sampling model
# with unknown mu and unknown sigma ( alpha = 0.05 ):

# 5.677935 7.512375

# so 95% confidence interval for sigma.squared runs from
# 5.677935^2 = 32.23895 to 7.512375^2 = 56.43578

# and the MLE for sigma.squared is ( n - 1 ) * s^2 / n

( n - 1 ) * s^2 / n

41.4019

# bayesian analysis with low-information-content prior
# should be similar to small-sample-exact analysis

nu.0 <- 0.01             # small <--> low-information-content prior

sigma.0.squared <- 1     # 'default' choice for positive quantity

nu.n <- nu.0 + n

mu.0 <- 0                # 'default' choice for real-valued quantity

kappa.0 <- 0.01          # small <--> low-information-content prior

print( sigma.n.squared <- ( nu.0 * sigma.0.squared + ( n - 1 ) * s^2 +
  ( y.bar - mu.0 )^2 / ( 1 / kappa.0 + 1 / n ) ) / nu.n )

57.76389   

print( posterior.mean.sigma.squared <- 
  nu.n * sigma.n.squared / ( nu.n - 2 ) )

58.94263   # this is way too high, given the frequentist results

# frequentist interval runs from 32 to 56, so
# make the posterior plotting interval run from (say) 30 to 60

sigma.squared.grid <- seq( 30, 60, length = 500 )

plot( sigma.squared.grid, scaled.inverse.chisq.density( nu.n,   sigma.n.squared, sigma.squared.grid ), type = 'l', lwd = 2,
  xlab = 'sigma.squared', ylab = 'Posterior Density' )

# woof

sigma.squared.grid <- seq( 30, 100, length = 500 )

plot( sigma.squared.grid, scaled.inverse.chisq.density( nu.n,
  sigma.n.squared, sigma.squared.grid ), type = 'l', lwd = 2,
  xlab = 'sigma.squared', ylab = 'Posterior Density' )

# ok, somehow the 'low-information' prior is not a
# low-information prior after all

# notice that the sigma.n.squared expression is the sum of 3 terms;
# let's look at them one by one

print( first.term <- nu.0 * sigma.0.squared / nu.n )

9.999e-05      # this is negligible

print( second.term <- ( n - 1 ) * s^2 / nu.n )

41.39776       # this is almost exactly the MLE for sigma.squared

print( third.term <- ( ( y.bar - mu.0 )^2 / 
  ( 1 / kappa.0 + 1 / n ) ) / nu.n )

16.36603       # this is an enormous contribution to sigma.n.squared

# why is this term so big, when we tried to make it small 
# with kappa.0 = 0.01 and a 'default' choice of mu.0 = 0?

# well, first of all, with mu.0 = 0 and y.bar = 404.59,
# the numerator ( y.bar - mu.0 )^2 is really big

( y.bar - mu.0 )^2

# 163693.1

# the denominator of this term is driven by ( 1 / kappa.0 + 1 / n ),
# which needs to be really big to make the third term small

1 / kappa.0

100

1 / n

0.01

# so, secondly, kappa.0 = 0.01 is not small enough to make
# the third term negligible, given the 'default' choice of mu.0 = 0

# if we want to make the third term tiny, evidently there are two ways
# to do so: pick a value for mu.0 that's a lot closer to y.bar
# (this would involve using the data to specify the prior,
# which in general can be dangerous), or make kappa.0 smaller
# (this seems preferable)

kappa.0 <- 0.001

print( sigma.n.squared <- ( nu.0 * sigma.0.squared + ( n - 1 ) * s^2 +
  ( y.bar - mu.0 )^2 / ( 1 / kappa.0 + 1 / n ) ) / nu.n )

43.03461

# now this is quite a bit more like the frequentist answer (MLE 41.4),
# which is equivalent to a bayesian answer with prior information
# content 0

kappa.0 <- 0.0001

print( sigma.n.squared <- ( nu.0 * sigma.0.squared + ( n - 1 ) * s^2 +
  ( y.bar - mu.0 )^2 / ( 1 / kappa.0 + 1 / n ) ) / nu.n )

41.56154

# the moral of this story is that, if problem context implies
# that your prior should have low information content,
# attention to detail can sometimes be required to ensure
# that your 'low-information-content' prior really has
# low information content

# successful bayesian low-information-prior analysis 
# for sigma.squared:

nu.0 <- 0.01             # small <--> low-information-content prior

sigma.0.squared <- 1     # 'default' choice for positive quantity

nu.n <- nu.0 + n

mu.0 <- 0                # 'default' choice for real-valued quantity

kappa.0 <- 0.0001          # small <--> low-information-content prior

sigma.squared.grid <- seq( 25, 65, length = 500 )

plot( sigma.squared.grid, scaled.inverse.chisq.density( nu.n,   sigma.n.squared, sigma.squared.grid ), type = 'l', lwd = 2,
  xlab = 'sigma.squared', ylab = 'Posterior Density' )

print( posterior.mean.sigma.squared <- 
  nu.n * sigma.n.squared / ( nu.n - 2 ) )

# 42.40964

# recall that the MLE of sigma.squared is 41.4019

print( posterior.sd.sigma.squared <- 
  sqrt( 2 * nu.n^2 * sigma.n.squared^2 / 
  ( ( nu.n - 2 )^2 * (  nu.n - 4 ) ) ) )

6.120986

# so, roughly speaking, the posterior is concentrated between
# 42.40964 - 2 * 6.120986 =. 30.2 and 
# 42.40964 + 2 * 6.120986 =. 54.7

# the 95% small-sample interval ran from 32.2 to 56.4

# install CRAN package actuar

library( actuar )

plot( sigma.squared.grid, dinvgamma( sigma.squared.grid, nu.n / 2,
  scale = nu.n * sigma.n.squared / 2 ), type = 'l', lwd = 2,
  xlab = 'sigma.squared', ylab = 'Posterior Density' )

# identical to my home-grown density function

alpha <- 0.05

qinvgamma( alpha / 2, nu.n / 2, scale = nu.n * sigma.n.squared / 2 )
  
32.07908

qinvgamma( 1 - alpha / 2, nu.n / 2, scale = nu.n * sigma.n.squared / 2 )
  
55.99541

#      summary of results about sigma.squared
# 
#         frequentist        bayesian with diffuse prior
#
#   point                       point   
#  estimate   95% interval     estimate  95% interval
#
#    41.4    ( 32.2, 56.4 )      42.4   ( 32.1, 56.0 )

# now let's get the bayesian analysis for mu

scaled.t.density <- function( mu, sigma, nu, theta ) {

  return( dt( ( theta - mu ) / sigma, nu ) / sigma )

}

kappa.n <- kappa.0 + n

print( mu.n <- ( kappa.0 * mu.0 + n * y.bar ) / ( kappa.0 + n ) )

404.5896

mu.grid <- seq( 402, 407, length = 500 )

plot( mu.grid, scaled.t.density( mu.n, 
  sqrt( sigma.n.squared / kappa.n ), nu.n, mu.grid ),
  type = 'l', lwd = 2, xlab = 'mu', ylab = 'Posterior Density' )

library( mnormt )

lines( mu.grid, dmt( mu.grid, mean = mu.n, 
  sigma.n.squared / kappa.n, df = nu.n ),
  lty = 2, lwd = 2, col = 'red' )

# identical to my homegrown function

# but there is no qmt function, whereas we can use
# the homegrown approach to get the quantiles
# for the interval for mu:

mu.n + sqrt( sigma.n.squared / kappa.n ) * qt( alpha / 2, nu.n )

403.3106

mu.n + sqrt( sigma.n.squared / kappa.n ) * qt( 1 - alpha / 2, nu.n )

405.8686

#                 summary of results about mu
# 
#           frequentist            bayesian with diffuse prior
#
#   point                           point   
#  estimate      95% interval     estimate      95% interval
#
#   404.59   ( 403.31, 405.87 )    404.59   ( 403.31, 405.87 )