y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
        398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 
        399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 
        410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 
        402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 
        411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 
        404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 
        401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 
        415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 
        403., 404. )

print( n <- length( y ) )

100

# preliminary descriptive analysis

sort( y )

# 375 392 393 397 398 398 399 399 399 399 399 399 399 400 400 400 400 401
# 401 401 401 401 401 401 401 401 401 401 401 402 402 402 402 402 402 402
# 402 403 403 403 403 403 403 404 404 404 404 404 404 404 404 404 405 405
# 405 405 405 406 406 406 406 406 406 406 406 406 406 406 406 407 407 407
# 407 407 407 407 407 408 408 408 408 408 409 409 409 409 409 410 410 410
# 410 411 412 412 412 413 415 418 423 437

quantile( y )

#   0%  25%  50%  75% 100% 
# 375  401  404  407  437

mean( y )

404.59

sd( y )

6.466846

hist( y, breaks = 20, main = '', xlab = 'NB10 Measurements',
  prob = T )

qqnorm( y, main = 'Normal qqplot of NB10 data' )

abline( mean( y ), sd( y ), lwd = 2, col = 'red' )

plot( 0, 0, type = 'n', xlab = 'Theoretical Quantiles',
  ylab = 'Sample Quantiles', xlim = c( -2.6, 2.6 ),
  ylim = c( 370, 440 ), main = 'Normal qqplot of simulated data' )

abline( mean( y ), sd( y ), lwd = 2, col = 'red' )

M <- 200

seed <- 1

set.seed( seed )

for ( m in 1:M ) {

  y.star <- rnorm( n, mean( y ), sd( y ) )

  lines( qnorm( ( ( 1:n ) - 0.5 ) / n ), sort( y.star ),
    lty = 2, lwd = 2, col = 'black' )

}

lines( qnorm( ( ( 1:n ) - 0.5 ) / n ), sort( y ),
    lty = 1, lwd = 2, col = 'blue' )

qqnorm( sort( y )[ 2:99 ], 
  main = 'Normal qqplot of NB10 without the outliers')

abline( mean( sort( y )[ 2:99 ] ), sd( sort( y )[ 2:99 ] ),
  lwd = 2, col = 'red' ) 

######################### large-sample likelihood analysis

print( mu.hat <- mean( y ) )

404.59

print( se.mu.hat <- sd( y ) / sqrt( n ) )

0.6466846

n.mu.grid <- 500

mu.grid <- seq( mu.hat - 3 * se.mu.hat, mu.hat + 3 * se.mu.hat,
  length = n.mu.grid )

print( approx.sigma.hat <- sd( y ) )

print( approx.se.sigma.hat <- sd ( y ) / sqrt( 2 * n ) )

n.sigma.grid <- 500
 
sigma.grid <- seq( approx.sigma.hat - 3 * approx.se.sigma.hat, 
  approx.sigma.hat + 3 * approx.se.sigma.hat, length = n.sigma.grid )

likelihood <- matrix( NA, n.mu.grid, n.sigma.grid )

log.likelihood <- matrix( NA, n.mu.grid, n.sigma.grid )

for ( i in 1:n.mu.grid ) {

  for ( j in 1:n.sigma.grid ) {

    log.likelihood[ i, j ] <- - n * log( sigma.grid[ j ] ) -
      sum( ( y - mu.grid[ i ] )^2 ) / ( 2 * sigma.grid[ j ]^2 )

  }

}

print( max.log.likelihood <- max( log.likelihood ) )

-236.1664

for ( i in 1:n.mu.grid ) {

  for ( j in 1:n.sigma.grid ) {

    likelihood[ i, j ] <- exp( log.likelihood[ i, j ] - 
      max.log.likelihood )

  }

}

par( mfrow = c( 1, 2 ) )

persp( mu.grid, sigma.grid, likelihood, xlab = 'mu',
  ylab = 'sigma', zlab = 'Likelihood', theta = 30, phi = 45 )

persp( mu.grid, sigma.grid, likelihood, xlab = 'mu',
  ylab = 'sigma', zlab = 'Likelihood', theta = 30, phi = 0 )

par( mfrow = c( 1, 1 ) )


contour( mu.grid, sigma.grid, likelihood, xlab = 'mu',
  ylab = 'sigma' )

image( mu.grid, sigma.grid, likelihood, xlab = 'mu',
  ylab = 'sigma' )

persp( mu.grid, sigma.grid, log.likelihood, xlab = 'mu',
  ylab = 'sigma', zlab = 'Log Likelihood', theta = 30, phi = 45 )

contour( mu.grid, sigma.grid, log.likelihood, xlab = 'mu',
  ylab = 'sigma' )

image( mu.grid, sigma.grid, log.likelihood, xlab = 'mu',
  ylab = 'sigma' )

############################################################

y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
        398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 
        399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 
        410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 
        402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 
        411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 
        404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 
        401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 
        415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 
        403., 404. )

print( n <- length( y ) )

100

print( mu.hat.mle <- mean( y ) )

404.59

print( sigma.hat.mle <- sqrt( ( n - 1 ) / n ) * sd( y ) )

6.434431

sd( y )

6.466846

# note that with n = 100 the square root of the unbiased
# estimator s.u^2 of sigma^2 (which involves dividing by ( n - 1 ) )
# is hardly different from the MLE of sigma (which involves
# dividing by n)

print( se.mu.hat.mle <- sigma.hat.mle / sqrt( n ) )

0.6434431

alpha <- 0.05    # 95% confidence

# 100 ( 1 - alpha )% large-sample maximum-likelihood
# confidence interval for mu in the Gaussian sampling model
# with unknown mu and unknown sigma:

c( mu.hat.mle - qnorm( 1 - alpha / 2 ) * se.mu.hat.mle,
   mu.hat.mle + qnorm( 1 - alpha / 2 ) * se.mu.hat.mle )

# 403.3289 405.8511

print( se.sigma.hat.mle <- sigma.hat.mle / sqrt( 2 * n ) )

0.454983

# 100 ( 1 - alpha )% large-sample maximum-likelihood
# confidence interval for sigma in the Gaussian sampling model
# with unknown mu and unknown sigma:

c( sigma.hat.mle - qnorm( 1 - alpha / 2 ) * se.sigma.hat.mle,
   sigma.hat.mle + qnorm( 1 - alpha / 2 ) * se.sigma.hat.mle )

# 5.542681 7.326181