y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403., 398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 403., 404. ) print( n <- length( y ) ) 100 # preliminary descriptive analysis sort( y ) # 375 392 393 397 398 398 399 399 399 399 399 399 399 400 400 400 400 401 # 401 401 401 401 401 401 401 401 401 401 401 402 402 402 402 402 402 402 # 402 403 403 403 403 403 403 404 404 404 404 404 404 404 404 404 405 405 # 405 405 405 406 406 406 406 406 406 406 406 406 406 406 406 407 407 407 # 407 407 407 407 407 408 408 408 408 408 409 409 409 409 409 410 410 410 # 410 411 412 412 412 413 415 418 423 437 quantile( y ) # 0% 25% 50% 75% 100% # 375 401 404 407 437 mean( y ) 404.59 sd( y ) 6.466846 hist( y, breaks = 20, main = '', xlab = 'NB10 Measurements', prob = T ) qqnorm( y, main = 'Normal qqplot of NB10 data' ) abline( mean( y ), sd( y ), lwd = 2, col = 'red' ) plot( 0, 0, type = 'n', xlab = 'Theoretical Quantiles', ylab = 'Sample Quantiles', xlim = c( -2.6, 2.6 ), ylim = c( 370, 440 ), main = 'Normal qqplot of simulated data' ) abline( mean( y ), sd( y ), lwd = 2, col = 'red' ) M <- 200 seed <- 1 set.seed( seed ) for ( m in 1:M ) { y.star <- rnorm( n, mean( y ), sd( y ) ) lines( qnorm( ( ( 1:n ) - 0.5 ) / n ), sort( y.star ), lty = 2, lwd = 2, col = 'black' ) } lines( qnorm( ( ( 1:n ) - 0.5 ) / n ), sort( y ), lty = 1, lwd = 2, col = 'blue' ) qqnorm( sort( y )[ 2:99 ], main = 'Normal qqplot of NB10 without the outliers') abline( mean( sort( y )[ 2:99 ] ), sd( sort( y )[ 2:99 ] ), lwd = 2, col = 'red' ) ######################### large-sample likelihood analysis print( mu.hat <- mean( y ) ) 404.59 print( se.mu.hat <- sd( y ) / sqrt( n ) ) 0.6466846 n.mu.grid <- 500 mu.grid <- seq( mu.hat - 3 * se.mu.hat, mu.hat + 3 * se.mu.hat, length = n.mu.grid ) print( approx.sigma.hat <- sd( y ) ) print( approx.se.sigma.hat <- sd ( y ) / sqrt( 2 * n ) ) n.sigma.grid <- 500 sigma.grid <- seq( approx.sigma.hat - 3 * approx.se.sigma.hat, approx.sigma.hat + 3 * approx.se.sigma.hat, length = n.sigma.grid ) likelihood <- matrix( NA, n.mu.grid, n.sigma.grid ) log.likelihood <- matrix( NA, n.mu.grid, n.sigma.grid ) for ( i in 1:n.mu.grid ) { for ( j in 1:n.sigma.grid ) { log.likelihood[ i, j ] <- - n * log( sigma.grid[ j ] ) - sum( ( y - mu.grid[ i ] )^2 ) / ( 2 * sigma.grid[ j ]^2 ) } } print( max.log.likelihood <- max( log.likelihood ) ) -236.1664 for ( i in 1:n.mu.grid ) { for ( j in 1:n.sigma.grid ) { likelihood[ i, j ] <- exp( log.likelihood[ i, j ] - max.log.likelihood ) } } par( mfrow = c( 1, 2 ) ) persp( mu.grid, sigma.grid, likelihood, xlab = 'mu', ylab = 'sigma', zlab = 'Likelihood', theta = 30, phi = 45 ) persp( mu.grid, sigma.grid, likelihood, xlab = 'mu', ylab = 'sigma', zlab = 'Likelihood', theta = 30, phi = 0 ) par( mfrow = c( 1, 1 ) ) contour( mu.grid, sigma.grid, likelihood, xlab = 'mu', ylab = 'sigma' ) image( mu.grid, sigma.grid, likelihood, xlab = 'mu', ylab = 'sigma' ) persp( mu.grid, sigma.grid, log.likelihood, xlab = 'mu', ylab = 'sigma', zlab = 'Log Likelihood', theta = 30, phi = 45 ) contour( mu.grid, sigma.grid, log.likelihood, xlab = 'mu', ylab = 'sigma' ) image( mu.grid, sigma.grid, log.likelihood, xlab = 'mu', ylab = 'sigma' ) ############################################################ y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403., 398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 403., 404. ) print( n <- length( y ) ) 100 print( mu.hat.mle <- mean( y ) ) 404.59 print( sigma.hat.mle <- sqrt( ( n - 1 ) / n ) * sd( y ) ) 6.434431 sd( y ) 6.466846 # note that with n = 100 the square root of the unbiased # estimator s.u^2 of sigma^2 (which involves dividing by ( n - 1 ) ) # is hardly different from the MLE of sigma (which involves # dividing by n) print( se.mu.hat.mle <- sigma.hat.mle / sqrt( n ) ) 0.6434431 alpha <- 0.05 # 95% confidence # 100 ( 1 - alpha )% large-sample maximum-likelihood # confidence interval for mu in the Gaussian sampling model # with unknown mu and unknown sigma: c( mu.hat.mle - qnorm( 1 - alpha / 2 ) * se.mu.hat.mle, mu.hat.mle + qnorm( 1 - alpha / 2 ) * se.mu.hat.mle ) # 403.3289 405.8511 print( se.sigma.hat.mle <- sigma.hat.mle / sqrt( 2 * n ) ) 0.454983 # 100 ( 1 - alpha )% large-sample maximum-likelihood # confidence interval for sigma in the Gaussian sampling model # with unknown mu and unknown sigma: c( sigma.hat.mle - qnorm( 1 - alpha / 2 ) * se.sigma.hat.mle, sigma.hat.mle + qnorm( 1 - alpha / 2 ) * se.sigma.hat.mle ) # 5.542681 7.326181