# setwd( "C:/Teaching/AMS-206-132/Winter-2017" ) y <- scan( "income-data.txt" ) n <- length( y ) theta.pretend <- mean( y ) M <- 100000 alpha <- 0.05 n.hit <- 0 y.bar.star <- rep( NA, M ) seed <- 12 set.seed( seed ) for ( i in 1:M ) { y.star <- sample( y, replace = T ) y.bar.star[ i ] <- mean( y.star ) s.star <- sd( y.star ) ci <- c( y.bar.star[ i ] - qnorm( 1 - alpha / 2 ) * s.star / sqrt( n ), y.bar.star[ i ] + qnorm( 1 - alpha / 2 ) * s.star / sqrt( n ) ) if ( ( ci[ 1 ] < theta.pretend ) & ( theta.pretend < ci[ 2 ] ) ) { n.hit <- n.hit + 1 } if ( ( i %% 1000 ) == 0 ) print( i ) } print( hit.rate <- n.hit / M ) qqnorm( y.bar.star ) abline( mean( y.bar.star ), sd( y.bar.star ), col = 'red', lwd = 3 ) quantile( y.bar.star, probs = c( 0.025, 0.975 ) ) # 2.5% 97.5% # 76.36481 89.87092 c( mean( y ) - qnorm( 1 - alpha / 2 ) * sd( y ) / sqrt( n ), mean( y ) + qnorm( 1 - alpha / 2 ) * sd( y ) / sqrt( n ) ) # 76.12607 89.64344 hist( y.bar.star, breaks = 500, xlab = 'Simulated Means', ylab = 'Density', prob = T, main = 'Bootstrapped Sample Means' ) y.bar.grid <- seq( 70, 100, length = 500 ) lines( y.bar.grid, dnorm( y.bar.grid, mean( y.bar.star ), sd( y.bar.star ) ), lty = 1, lwd = 3, col = 'red' ) ####################################################################### hist( y, breaks = 100, main = 'U.S. Family Income (2009)', xlab = 'Income', ylab = 'Density', prob = T, ylim = c( 0, 0.012 ) ) w <- log( y ) mu.hat.mle <- mean( w ) sigma.hat.mle <- sd( w ) * sqrt( ( n - 1 ) / n ) library( stats ) y.grid <- seq( 0, 1000, length = 500 ) lines( y.grid, dlnorm( y.grid, mu.hat.mle, sigma.hat.mle ), lty = 1, lwd = 3, col = 'red' ) text( 200, 0.011, 'Lognormal', col = 'red' ) ####################################################################### hist( y, breaks = 100, main = 'U.S. Family Income (2009)', xlab = 'Income', ylab = 'Density', prob = T, ylim = c( 0, 0.012 ) ) lines( y.grid, dgamma( y.grid, shape = 1.305, rate = 0.01575 ), lty = 1, lwd = 3, col = 'blue' ) text( 250, 0.004, 'Gamma', col = 'blue' ) ####################################################################### rdirichlet <- function( n, alpha ) { p <- length( alpha ) g <- matrix( NA, n, p ) for ( i in 1:n ) { for ( j in 1:p ) { g[ i, j ] <- rgamma( 1, rate = 1, shape = alpha[ j ] ) } g[ i, ] <- g[ i, ] / sum( g[ i, ] ) } return( g ) } p <- 3 N <- c( 727, 583, 137 ) epsilon <- 0.01 alpha.prior <- c( epsilon, epsilon, epsilon ) alpha.posterior <- alpha.prior + N M <- 100000 theta.star <- rdirichlet( M, alpha.posterior ) theta.1.star <- theta.star[ , 1 ] theta.2.star <- theta.star[ , 2 ] theta.3.star <- theta.star[ , 3 ] print( posterior.mean.theta.1 <- mean( theta.1.star ) ) 0.5023507 print( posterior.sd.theta.1 <- sd( theta.1.star ) ) 0.01311638 print( mcse.mean.theta.1 <- sd( theta.1.star ) / sqrt( M ) ) 4.147763e-05 hist( theta.1.star, breaks = 250, prob = T, main = 'Marginal Posterior', xlab = 'theta.1', ylab = 'Density' ) gamma.star <- theta.1.star - theta.2.star print( posterior.mean.gamma <- mean( gamma.star ) ) 0.0993945 print( posterior.sd.gamma <- sd( gamma.star ) ) 0.0248164 hist( gamma.star, breaks = 250, prob = T, main = 'Marginal Posterior', xlab = 'gamma', ylab = 'Density' ) mean( gamma.star > 0 ) 0.99995