\documentclass[12pt]{article}
\usepackage[margin=1.06in]{geometry}                
\geometry{letterpaper}                  
\usepackage{graphicx}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{hyperref, multicol}
\usepackage{pifont}
\usepackage{marvosym}
\hypersetup{colorlinks=false, allcolors=blue}
\newcommand{\noin}{\noindent}        
\newcommand{\SD}{\textnormal{SD}}
\newcommand{\var}{\textnormal{Var}}    
\newcommand{\cov}{\textnormal{Cov}}                                 
\newcommand{\corr}{\textnormal{Corr}}                  
\newcommand{\Bern}{\textnormal{Bern}}
\newcommand{\Bin}{\textnormal{Bin}}
\newcommand{\Geom}{\textnormal{Geom}}
\newcommand{\FS}{\textnormal{FS}}
\newcommand{\HGeom}{\textnormal{HGeom}}
\newcommand{\NBin}{\textnormal{NBin}}
\newcommand{\Pois}{\textnormal{Pois}}
\newcommand{\Expo}{\textnormal{Expo}}
\newcommand{\Unif}{\textnormal{Unif}}
\newcommand{\Beta}{\textnormal{Beta}}
\newcommand{\Gam}{\textnormal{Gamma}}
\newcommand{\N}{\mathcal{N}}

                     
\begin{document}
 
\noindent {\large  \textbf{Stat 110 Penultimate Homework, Fall 2017}} 

\bigskip

\noindent \textbf{Due}: Friday 11/17 at 5:00 pm, submitted as a PDF via the  \href{https://canvas.harvard.edu/courses/27764}{{course webpage}}. Please check carefully to make sure you upload the correct file. Your submission must be a single PDF file, no more than $20$ MB in size. It can be typeset or scanned, but must be clear and easily legible (not blurry or faint) and correctly rotated. No submissions on paper or by email will be accepted. Please show your work and give clear, careful, convincing justifications. See the syllabus for the collaboration policy. 

\bigskip

\noindent 1. (BH 9.1) Fred wants to travel from Blotchville to Blissville, and is deciding between $3$ options (involving different routes or different forms of transportation). The $j$th option would take an average of $\mu_j$ hours, with a standard deviation of $\sigma_j$ hours. Fred randomly chooses between the $3$ options, with equal probabilities. Let $T$ be how long it takes for him to get from Blotchville to Blissville.

\medskip

\noin (a) Find $E(T)$. Is it simply $(\mu_1+\mu_2+\mu_3)/3$, the average of the expectations?

\medskip

\noin (b) Find $\var(T)$. Is it simply $(\sigma^2_1+\sigma^2_2+\sigma^2_3)/3$, the average of the variances?


\bigskip

\noindent 2. (BH 9.3)  A group of $21$ women and $14$ men are enrolled in a medical study. Each of them has a certain disease with probability $p$, independently. It is then found (through extremely reliable testing) that exactly $5$ of the people have the disease. Given this information, what is the expected number of women who have the disease? 

\bigskip

\noindent 3. (BH 9.8) There are two envelopes, each of which has a check for a $\Unif(0,1)$ amount of money, measured in thousands of dollars. The amounts in the two envelopes are independent. You get to choose an envelope and open it, and then you can either keep that amount or switch to the other envelope and get whatever amount is in that envelope. 

Suppose that you use the following strategy: choose an envelope and open it. If you observe $U$, then stick with that envelope with probability $U$, and switch to the other envelope with probability $1-U$. 

\medskip

\noin (a) Find the probability that you get the larger of the two amounts.

\medskip

\noin (b) Find the expected value of what you will receive. 

\bigskip

\noindent 4. (BH 9.18) Let $X$ be the height of a randomly chosen adult man, and $Y$ be his father's height, where $X$ and $Y$ have been standardized to have mean 0 and standard deviation 1. Suppose that $(X,Y)$ is Bivariate Normal, with $X,Y \sim \mathcal{N}(0,1)$ and $\corr(X,Y)=\rho$.

\medskip

\noin (a) Let $y=ax+b$ be the equation of the best line for predicting $Y$ from $X$ (in the sense of minimizing the mean squared error), e.g., if we were to observe $X=1.3$ then we would predict that $Y$ is $1.3a+b$. Now suppose that we want to use $Y$ to predict $X$, rather than using $X$ to predict $Y$. Give and explain an \emph{intuitive guess} for what the slope is of the best line for predicting $X$ from $Y$.

\medskip

\noin (b) Find a constant $c$ (in terms of $\rho$) and an r.v.~$V$ such that $Y=cX+V$, with $V$ independent of $X$. 

\medskip

\noin Hint: Start by finding $c$ such that $\cov(X,Y-cX)=0$. 

\medskip

\noin (c) Find a constant $d$ (in terms of $\rho$) and an r.v.~$W$ such that $X=dY+W$, with $W$ independent of $Y$. 

\medskip

\noin (d) Find $E(Y|X)$ and $E(X|Y)$.

\medskip

\noin (e) Reconcile (a) and (d), giving a clear and correct intuitive explanation.

\bigskip

\noindent 5. (BH 9.24) Kelly makes a series of $n$  bets, each of which she has probability $p$ of winning, independently. Initially, she has $x_0$ dollars. Let $X_j$ be the amount she has immediately after her $j$th bet is settled. Let $f$ be a constant in $(0,1)$, called the \emph{betting fraction}. On each bet, Kelly wagers a fraction $f$ of her wealth, and then she either wins or loses that amount. For example, if her current wealth is \$100 and $f = 0.25$, then she bets \$25 and either gains or loses that amount. (A famous choice when $p>1/2$ is $f=2p-1$, which is known as the \emph{Kelly criterion}.) Find $E(X_n)$ (in terms of $n,p,f,x_0$).

\bigskip

\noindent 6. (BH 9.37)  Show that for any r.v.s $X$ and $Y$, $$E(Y|E(Y|X)) = E(Y|X).$$ This has a nice intuitive interpretation if we think of $E(Y|X)$ as the prediction we would make for $Y$ based on $X$: given the prediction we would use for predicting $Y$ from $X$, we no longer need to know $X$ to predict $Y$---we can just use the prediction we have! For example, letting $E(Y|X)=g(X)$, if we observe $g(X)=7$, then we may or may not know what $X$ is (since $g$ may not be one-to-one). But even without knowing $X$, we know that the prediction for $Y$ based on $X$ is $7$.

\bigskip

\noindent 7. (BH 9.42) An actuary wishes to estimate various quantities related to the number of insurance claims and the dollar amounts of those claims for someone named Fred.  Suppose that Fred will make $N$ claims next year, where $N|\lambda \sim \Pois(\lambda)$. But $\lambda$ is unknown, so the actuary, taking a Bayesian approach, gives $\lambda$ a prior distribution based on past experience. Specifically, the prior is $\lambda \sim \Expo(1)$. The dollar amount of a claim is Log-Normal with parameters $\mu$ and $\sigma^2$ (here $\mu$ and $\sigma^2$ are the mean and variance of the underlying Normal), with $\mu$ and $\sigma^2$ known. The dollar amounts of the claims are i.i.d.~and independent of $N$.

\medskip

\noin (a) Find $E(N)$ and $\var(N)$ using properties of conditional expectation (your answers should not depend on $\lambda$, since $\lambda$ is unknown and being treated as an r.v.!). 

\medskip

\noin (b) Find the mean and variance of the total dollar amount of all the claims.

\medskip

\noin (c) Find the distribution of $N$. If it is a named distribution we have studied, give its name and parameters.

\medskip

\noin (d) Find the posterior distribution of $\lambda$, given that it is observed that Fred makes $N=n$ claims next year. If it is a named distribution we have studied, give its name and parameters.

\end{document}