Solution to Homework 7

Problem 1

Write a routine that generates random variates from a distribution with density proportional to \(g(x)=\sqrt{(1+x)^3(1-x)^5}\), \(-1<x<1\). Use only runif in your routine. Show that your routine generates the correct data.

The graph of this curve looks as follows:

curve(sqrt((1+x)^3*(1-x)^5), -1, 1, lwd=2)

the normalizing constant is

K <- integrate(function(x) sqrt((1+x)^3*(1-x)^5), -1, 1)$value
K

## [1] 1.178097

We can use accept-reject with \(Y\sim U[-1,1]\), which we can generate with 2*runif(1)-1 and which has density g(x)=1/2. Then

\[ \begin{aligned} &\frac{f(x)}{g(x)} = \frac1{K}\sqrt{(1+x)^3(1-x)^5}/(1/2)\\ &\frac{d}{dx}\sqrt{(1+x)^3(1-x)^5} =\\ &\frac{3(1+x)^2(1-x)^5+(1+x)^3(-5)(1-x)^4}{2\sqrt{(1+x)^3(1-x)^5}} =\\ &-(1+4x)\frac{(1+x)^2(1-x)^4}{\sqrt{(1+x)^3(1-x)^5}} =0\\ &x=-\frac14\\ &c=\max\{\frac{f(x)}{g(x)},-1<x<1\}=\\ &\frac2{K}\sqrt{(1-\frac14)^3(1-(-\frac14))^5}=\\ &\frac2{1.178}\sqrt{(\frac34)^3(\frac54))^5}=1.9265\\ \end{aligned} \]

so we have

rhw7p1 <- function(B=1e4) {
  K1 <- (2/1.178)/1.9264
  f.c <- function(x) 
    K1*sqrt((1+x)^3*(1-x)^5)
  x <- rep(0, B)
  for(i in 1:B) {
    repeat {
      Y <- runif(1, -1, 1)
      if(runif(1)<f.c(Y)) {
        x[i] <- Y
        break
      }
    }
  }
  x
}

Let’s check:

x <- rhw7p1()
hist(x, 50, freq=FALSE, main="")
curve(sqrt((1+x)^3*(1-x)^5)/1.178, -1, 1, 
      col="blue", lwd=2, add=TRUE)

Problem 2

Write a routine that generates random variates from a distribution with density proportional to \(g(k)=\exp(-\sqrt{k})\), \(k=0, 1, ...\). Show that your routine generates the correct data.

Again we want to use accept-reject. One solution would be to find N such that \(P(X>N)=0.99999\) and then use the sample command:

x <- 0:10000
K <- sum(exp(-sqrt(x)))
K

## [1] 2.670407

x <- (-1)
p <- 0
repeat {
  x <- x+1
  p <- p+exp(-sqrt(x))
  if(p>0.99999*K) break
}
x

## [1] 194

g is stricly decreasing, so

\[ \begin{aligned} &c=\max\{\frac{f(x)}{g(x)},x=0,1\} = \\ &\frac{f(0)}{g(0)} = \frac{1/2.67}{1/195}=73.41\\ \end{aligned} \]

rhw7p2 <- function(B=1e4) {
  K1 <-195/2.67/73.41 
  f.c <- function(x) 
     K1*exp(-sqrt(x))
  x <- rep(0, B)
  for(i in 1:B) {
    repeat {
      Y <- sample(0:194, 1)
      if(runif(1)<f.c(Y)) {
        x[i] <- Y
        break
      }
    }
  }
  x
}
x <- 0:10
p1 <- exp(-sqrt(x))/2.67
x <- rhw7p2()
p2 <- table(x)[1:11]/1e4
tmp <- round(cbind("Theory"=p1, "Simulation"=p2), 3)
kable.nice(tmp, do.row.names = FALSE)

Theory	Simulation
0.375	0.385
0.138	0.133
0.091	0.092
0.066	0.064
0.051	0.051
0.040	0.039
0.032	0.032
0.027	0.025
0.022	0.023
0.019	0.019
0.016	0.016

Problem 3

Write a routine that generates random variates from a distribution with density \[f(x,y)=1.732\exp\left(-[2+\sin(2\pi x)] y\right)\] \(0<x<1\), \(y>0\). Use the simulated data to find F_X(0.5), F(0.5, 1), E[X], Var[X], E[Y], Var[Y], Cor(X,Y), F_X|Y=1(0.5|1) and E[X|Y=1].

BONUS: Draw the graph of f_X|Y=0.1(x|0.1).

I want to use accept-reject, so I need a random variable that I can generate. Note that \(-[2+\sin(2\pi x)] \le -1\), so let’s try (U, V) with \(U\sim U[0, 1]\), \(V\sim Exp(1)\) and independent. Therefore \(g(x,y)=I_{(0,1)}(x)e^{-y}\) and

\[ \begin{aligned} &\frac{f(x,y)}{g(x,y)}= \\ &\frac{1.732\exp\left(-[2+\sin(2\pi x)] y\right)}{e^{-y}} \le \\ &k \frac{\exp\left(-y\right)}{e^{-y}} \le 1.732\\ &c = \max\left\{ \frac{f(x,y)}{g(x,y)} \right\} \le 1.732 \\ &\frac{f(x,y)}{cg(x,y)} = \exp\left(-y-\sin(2\pi x)y\right) \end{aligned} \]

B=1e5
rhw7p3 <- function(B=1e5) {
  f.c <- function(x) 
     exp(-x[2]-sin(2*pi*x[1])*x[2])
  xy <- matrix(0, B, 2)
  for(i in 1:B) {
    repeat {
      Y <- c(runif(1), rexp(1, 1))
      if(runif(1)<f.c(Y)) {
        xy[i, ] <- Y
        break
      }
    }
  }
  xy
}
xy <- rhw7p3()

I will start by verifying the simulation. For that I need the marginals, but here I will find the numerically:

hist(xy[, 1], 50, freq=FALSE, main="X")
x=seq(0, 1, length=250)
fx=x
f=function(y, x) 1.732*exp(-(2+sin(2*pi*x))*y)
for(i in seq_along(x)) 
  fx[i] <- integrate(f, 0, Inf, x=x[i])$value
lines(x, fx, col="blue",lwd=2)

hist(xy[, 2], 50, freq=FALSE, main="Y")
y=seq(0, 5, length=250)
fy=y
f=function(x, y) exp(-(2+sin(2*pi*x))*y)
for(i in seq_along(y)) 
  fy[i] <- 1.732*integrate(f, 0, 1, y=y[i])$value
lines(y, fy, col="blue",lwd=2)

and

#F_X(0.5)
round(sum(xy[, 1]<0.5)/B, 3)

## [1] 0.331

#F(0.5, 1)
round(sum(xy[, 1]<0.5 & xy[, 2]<1)/B, 3)

## [1] 0.305

#E[X], Var[X]
round(c(mean(xy[, 1]), var(xy[, 1])), 3)

## [1] 0.584 0.074

#E[X], Var[X]
round(c(mean(xy[, 2]), var(xy[, 2])), 3)

## [1] 0.672 0.557

#Cor(X,y)
round(cor(xy)[1, 2], 3)

## [1] 0.229

#F_X|Y=1(0.5|1) and E[X|Y=1]
x=xy[xy[, 2]>0.95 & xy[, 2]<1.05 ]
round(c(length(x), sum(x<0.5)/length(x), mean(x)), 3)

## [1] 5900.000    0.115    0.816

BONUS: Finally the graph of f_X|Y=0.1(x|0.1). One idea is first to find F_X|Y=0.1(x|0.1) and then use the definition of a derivative, \(f'(x) = \lim_{h\rightarrow 0} [f(x+h)-f(x)]/h\)

x=xy[xy[, 2]>0.05 & xy[, 2]<0.15 ]
t=seq(0, 1, length=100)
y=t
for(i in seq_along(t))
  y[i]<-sum(x<t[i])/length(x)
hist(x, 50, freq=FALSE)
lines(t[-1], 100*(y[-1]-y[-100]), type="l", col="blue", lwd=2)

This is a so-called non-parametric density estimate. There are however much better ones:

hist(x, 50, freq=FALSE)
lines(density(x, bw=0.03), type="l", col="blue", lwd=2)