Write a program that illustrates the central limit theorem as follows: as arguments the program takes a function f that generates data from some probability distribution. It then generates samples of size 1, size 5, size 10 and size 20 and finds the means for each. This is repeated 1000 times. Finally the histograms of each of the four cases is shown, using ggplot2. Here is an example of what this should look like:
hw2 <- function(f, B=1000) {
n <- c(1, 5, 10, 20)
X <- matrix(f(20000), B, 20)
A <- matrix(0, B, 4)
colnames(A) <- paste0("n = ", n)
for(i in 1:4) A[ ,i] <- apply(X[, 1:n[i], drop=FALSE], 1, mean)
df <- as.data.frame(A)
rg <- range(df)
bw <- diff(rg)/50
plt <- as.list(1:4)
for(i in 1:4) {
df1 <- data.frame(x=A[, i])
pars <- c(mean(A[, i]), sd(A[ ,i]))
plt[[i]] <- ggplot(df1, aes(x)) +
geom_histogram(aes(y = ..density..),
color = "black",
fill = "white",
binwidth = bw) +
labs(x = "x", y = "Density") +
stat_function(fun = dnorm, colour = "blue",
args=list(mean=pars[1], sd=pars[2])) +
lims(x=rg)
}
pushViewport(viewport(layout = grid.layout(2, 2)))
print(plt[[1]] ,
vp=viewport(layout.pos.row=1, layout.pos.col=1))
print(plt[[2]] ,
vp=viewport(layout.pos.row=1, layout.pos.col=2))
print(plt[[3]] ,
vp=viewport(layout.pos.row=2, layout.pos.col=1))
print(plt[[4]] ,
vp=viewport(layout.pos.row=2, layout.pos.col=2))
}
hw2(runif)
hw2(function(n) rchisq(n, 1))