返回

MVSA_Assignments (4).tex

16.5 KB · TEX · 2026-06-07 09:18
\documentclass[a4paper, 10.5pt, twoside, openany]{book}
\usepackage{amsfonts}
\usepackage{array}
\usepackage{boxedminipage, fancybox}
\usepackage{caption}
\usepackage{color}
\usepackage[colorlinks,linkcolor=blue]{hyperref}
\usepackage{ctex}
\usepackage{datetime}
\usepackage[dvipsnames]{xcolor}
\usepackage{enumerate}
\usepackage{epsfig,graphicx,subfigure}
\usepackage{extarrows}
\usepackage{fancyheadings}
\usepackage{float}
\usepackage{geometry}
\usepackage{listings}
\usepackage{longtable}
\usepackage{makeidx}
\usepackage{mathrsfs}
\usepackage{multirow}
\usepackage{natbib}
\usepackage{pifont}
\usepackage{rotating}
\usepackage{setspace}
\usepackage{shadow}
\usepackage{stmaryrd, amssymb, amsmath}
\usepackage{tabularx}
\usepackage{url}
\usepackage{varioref}
\usepackage{verbatim}
\usepackage{wrapfig}
\usepackage{xcolor}

\geometry{left=2.0cm, right=2.0cm, top=2.5cm, bottom=2.5cm}
\linespread{1.5}
\definecolor{mygray}{rgb}{0.85, 0.85, 0.85}
\newcommand{\codeinline}[1]{\colorbox{mygray}{\lstinline|#1|}}

%% ----------------------------------------------------------------------------------------------------------------------------------------------------------------------

\title{\Huge \bf 《多元统计分析》课后作业}
\author{\kaishu 姓名:\underline{\quad 你的姓名 \quad} \\[5mm]
                 \kaishu 学号:\underline{\quad 你的学号 \quad} \\[5mm]
                 \kaishu 班级:\underline{\quad 统计 23-X 班 \quad} \\[50mm]
                 \kaishu 中国石油大学(北京)克拉玛依校区文理学院数学与统计系
                 }
\date{\today}

\begin{document}

% -------------------------------------------- 封面页 --------------------------------------------
\frontmatter
\maketitle

% -------------------------------------------- 作业要求 --------------------------------------------
\chapter{作业要求}
\begin{enumerate}
	\item 可以和其他同学讨论作业当中的问题,但应当自己独立完成作业
	\item 计算、证明等要有过程,要有主要步骤的说明
	\item 请将计算、绘图所用的 R 代码以及生成的结果和图像一并添加在作业文件当中
	\item 请使用 \LaTeX 编辑并生成 PDF 格式的文件,第X周作业文件命名方式:学号-姓名-X.pdf
	\item 评分标准:每一问得分 $\in \left\{ 2 ,\, 1 ,\, 0 \right\}$
		\begin{itemize}
			\item 2:~ 按时完成并上交作业,且答案基本正确
			\item 1:~ 按时完成并上交作业,且答案部分正确
			\item 0:~ 答案完全错误,或者迟交作业(规定时间72小时之后)
		\end{itemize}
	\item 请将完成的 PDF 格式的作业文件发送至邮箱:xiaolei@cup.edu.cn
	\item 每位同学可以有一次迟交作业的机会,但不得晚于规定时间三日之后
	\item 第4周作业截止时间:2026年4月10日24:00
\end{enumerate}

\tableofcontents

% -------------------------------------------- 正文部分 --------------------------------------------
\mainmatter



% -------------------------------------------- 第 4 周作业 --------------------------------------------
\chapter{第 4 周作业}

{\kaishu \color{blue} 第 4 周作业截止时间:} 2026年4月10日24:00

{\kaishu \color{blue} 第 4 周作业完成时间:} \today \space \currenttime      % 请勿编辑、删除本行!

\begin{enumerate}
	\item {\color{TealBlue} [2 分]}  证明
		\begin{equation}
			f_{\boldsymbol{Y}} (\boldsymbol{y}) = \begin{cases} \dfrac{1}{2} \, y_1 - \dfrac{1}{4} \, y_2 \,, & 0 \leq y_1 \leq 2 ,\, \left| y_2 \right| \leq 1 - \left| 1 - y_1 \right| \,, \\[2mm]
			0 \,, & \text{其它} \end{cases}
		\end{equation}
		是一个概率密度函数.

		{\color{red} \heiti 【证明】} 要验证这是一个概率密度函数需验证非负性以及积分等于 \(1\)。

\paragraph{非负性:}
在定义域内,\(y_1 \ge 0\),且 \(|y_2| \le 1 - |1-y_1|\)。分两种情况:
\begin{itemize}
    \item 当 \(0 \le y_1 \le 1\) 时,\(|1-y_1| = 1-y_1\),则 \(|y_2| \le y_1\)。此时
    \[
    \frac12 y_1 - \frac14 y_2 \ge \frac12 y_1 - \frac14 y_1 = \frac14 y_1 \ge 0.
    \]
    \item 当 \(1 \le y_1 \le 2\) 时,\(|1-y_1| = y_1-1\),则 \(|y_2| \le 2-y_1\)。此时
    \[
    \frac12 y_1 - \frac14 y_2 \ge \frac12 y_1 - \frac14 (2-y_1) = \frac34 y_1 - \frac12 \ge \frac34\cdot1 - \frac12 = \frac14 > 0.
    \]
\end{itemize}
因此 \(f_{\boldsymbol{Y}}(\boldsymbol{y}) \ge 0\) 处处成立。

\paragraph{归一性:}
计算二重积分
\[
I = \iint_{\mathbb{R}^2} f_{\boldsymbol{Y}}(\boldsymbol{y})\,\mathrm{d}y_1\mathrm{d}y_2.
\]
先对 \(y_2\) 积分。对于固定的 \(y_1\),令 \(L = 1 - |1-y_1|\),则 \(y_2\) 从 \(-L\) 到 \(L\),且被积函数关于 \(y_2\) 为线性,奇次项积分为零:
\[
\int_{-L}^{L} \left(\frac12 y_1 - \frac14 y_2\right)\mathrm{d}y_2 = \frac12 y_1 \cdot (2L) = y_1 L.
\]
于是
\[
I = \int_{0}^{2} y_1 \bigl(1 - |1-y_1|\bigr)\,\mathrm{d}y_1.
\]
分段计算:
\[
\begin{aligned}
\int_{0}^{1} y_1 \cdot y_1\,\mathrm{d}y_1 &= \int_{0}^{1} y_1^2\,\mathrm{d}y_1 = \frac13, \\
\int_{1}^{2} y_1 \cdot (2-y_1)\,\mathrm{d}y_1 &= \int_{1}^{2} (2y_1 - y_1^2)\,\mathrm{d}y_1 = \left[ y_1^2 - \frac13 y_1^3 \right]_{1}^{2} \\
&= \left(4 - \frac83\right) - \left(1 - \frac13\right) = \frac43 - \frac23 = \frac23.
\end{aligned}
\]
故 \(I = \frac13 + \frac23 = 1\)。积分值为 \(1\),又具有非负性,因此 \(f_{\boldsymbol{Y}}\) 是一个概率密度函数。\(\square\)


	\item 设 $\boldsymbol{X} = \left( X_1 ,\, X_2 \right)^{\rm T}$ 的概率密度函数为
		\begin{equation}
			f \left( x_1 \,,\, x_2 \right) = \begin{cases} 4 \, x_1 x_2 \, {\rm e}^{-x^2_1} \,, & x_1 > 0 \,,\, 0< x_2 < 1 \,, \\ 0 \,, & \text{其它.} \end{cases}
		\end{equation}
		\begin{enumerate}
			\item {\color{TealBlue} [2 分]} 计算 $\mathbb{E} (\boldsymbol{X})$ 与 $\mathbb{V}{\rm ar} (\boldsymbol{X})$.

			{\color{red} \heiti 【解】} 边际密度为:
    \[
    f_{X_1}(x_1) = \int_0^1 4x_1x_2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_2 = 2x_1\mathrm{e}^{-x_1^2},\quad x_1>0,
    \]
    \[
    f_{X_2}(x_2) = \int_0^\infty 4x_1x_2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2x_2,\quad 0<x_2<1.
    \]
    \begin{equation}
			f \left( x_1 \,,\, x_2 \right) = f_{X_1}(x_1)\cdot f_{X_2}(x_2) 
		\end{equation}可见 $X_1$ 与 $X_2$ 独立

    \[
    \begin{aligned}
    \mathbb{E}(X_1) &= \int_0^\infty x_1\cdot 2x_1\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\int_0^\infty x_1^2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\cdot\frac{\sqrt\pi}{4} = \frac{\sqrt\pi}{2},\\
    \mathbb{E}(X_2) &= \int_0^1 x_2\cdot 2x_2\,\mathrm{d}x_2 = 2\int_0^1 x_2^2\,\mathrm{d}x_2 = \frac23,\\
    \end{aligned}
    \]
    由于独立,协方差为 $0$。故
    \[
    \mathbb{E}(\boldsymbol{X}) = \begin{pmatrix} \frac{\sqrt\pi}{2} \\[4pt] \frac23 \end{pmatrix},\qquad
    \mathbb{V}\mathrm{ar}(\boldsymbol{X}) = \begin{pmatrix}
    1-\frac{\pi}{4} & 0 \\[4pt]
    0 & \frac{1}{18}
    \end{pmatrix}.
    \]

          \item {\color{TealBlue} [2 分]} 计算 $\mathbb{E} \left( X_1 \left| X_2 \right. \right)$ 与 $\mathbb{E} \left( X_2 \left| X_1 \right. \right)$.

			{\color{red} \heiti 【解】} 由独立性,条件期望等于无条件期望:
    \[
    \mathbb{E}(X_1\mid X_2) = \mathbb{E}(X_1) = \frac{\sqrt\pi}{2},\qquad
    \mathbb{E}(X_2\mid X_1) = \mathbb{E}(X_2) = \frac23.
    \]

			\item {\color{TealBlue} [2 分]} 计算 $\mathbb{V}{\rm ar} \left( X_1 \left| X_2 \right. \right)$ 与 $\mathbb{V}{\rm ar} \left( X_2 \left| X_1 \right. \right)$.

			{\color{red} \heiti 【解】} 由独立性,条件方差等于无条件方差:
    \[
\begin{aligned}  
    \mathbb{E}(X_1^2) &= \int_0^\infty x_1^2\cdot 2x_1\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\int_0^\infty x_1^3\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\cdot\frac12 = 1,\quad
    \operatorname{Var}(X_1) &= \mathbb{E}(X_1^2) - \bigl(\mathbb{E}(X_1)\bigr)^2 = 1 - \frac{\pi}{4},\quad[6pt]
\end{aligned}
    \]
   \[
\begin{aligned}  
    \mathbb{E}(X_2^2) &= \int_0^1 x_2^2\cdot 2x_2\,\mathrm{d}x_2 = 2\int_0^1 x_2^3\,\mathrm{d}x_2 = \frac12,\quad
    \operatorname{Var}(X_2) &= \frac12 - \left(\frac23\right)^2 = \frac{1}{18}.\quad
\end{aligned}
    \]

    \[
\begin{aligned}  
     \operatorname{Var}(X_1\mid X_2) = \operatorname{Var}(X_1) = 1 - \frac{\pi}{4},\quad
    \operatorname{Var}(X_2\mid X_1) = \operatorname{Var}(X_2) = \frac{1}{18}.
\end{aligned}
    \]
		\end{enumerate}

	\item {\color{TealBlue} [2 分]} 设 $\boldsymbol{X} = \left( X_1 ,\, X_2 \right)^{\rm T}$ 的概率密度函数为
		\begin{equation}
			f \left( x_1 \,,\, x_2 \right) = \begin{cases} \dfrac{1}{2 \pi} \,, & 0 < x_1 < 2 \pi \,,\, 0 < x_2 < 1 \,, \\[2mm] 0 \,, & \text{其它.} \end{cases}
		\end{equation}
		令 
		\begin{equation}
			\begin{cases} U_1 = \left( \sin X_1 \right) \sqrt{-2 \, \ln X_2} \\[2mm] U_2 = \left( \cos X_1 \right) \sqrt{-2 \, \ln X_2} \end{cases}
		\end{equation}
		求 $\boldsymbol{U} = \left( U_1 ,\, U_2 \right)^{\rm T}$ 的概率密度函数 $g \left( u_1 \,,\, u_2 \right)$.

		{\color{red} \heiti 【解】} 由题易知 $X_1$ 与 $X_2$ 独立,$X_1 \sim \mathrm{Uniform}(0,2\pi)$,$X_2 \sim \mathrm{Uniform}(0,1)$。定义中间变量
\[
R = \sqrt{-2\ln X_2},\qquad \Theta = X_1,
\]
则 $U_1 = R\sin\Theta$, $U_2 = R\cos\Theta$,又变换 $(X_1,X_2) \mapsto (U_1,U_2)$ 是一一映射。其逆变换为
\[
X_1 = \Theta = \operatorname{atan2}(U_1,U_2),\qquad X_2 = \exp\!\left(-\frac{R^2}{2}\right) = \exp\!\left(-\frac{U_1^2+U_2^2}{2}\right).
\]
计算雅可比行列式。先由极坐标变换 $(U_1,U_2) \to (R,\Theta)$,其雅可比绝对值为 $R$,故 $(R,\Theta) \to (U_1,U_2)$ 的雅可比绝对值为 $1/R$。再计算 $(X_1,X_2) \to (R,\Theta)$:
\[
X_1 = \Theta,\quad X_2 = e^{-R^2/2},
\]
则
\[
\frac{\partial(X_1,X_2)}{\partial(R,\Theta)} = \begin{vmatrix}
0 & 1 \\[2pt]
-Re^{-R^2/2} & 0
\end{vmatrix} = R e^{-R^2/2}.
\]
因此从 $(U_1,U_2)$ 到 $(X_1,X_2)$ 的雅可比行列式绝对值为
\[
\left|\frac{\partial(X_1,X_2)}{\partial(U_1,U_2)}\right| = \left|\frac{\partial(X_1,X_2)}{\partial(R,\Theta)}\right| \cdot \left|\frac{\partial(R,\Theta)}{\partial(U_1,U_2)}\right| = \bigl(R e^{-R^2/2}\bigr) \cdot \frac{1}{R} = e^{-(U_1^2+U_2^2)/2}.
\]
于是,$\boldsymbol{U}$ 的联合密度为
\[
g(u_1,u_2) = f\bigl(x_1(u_1,u_2), x_2(u_1,u_2)\bigr) \cdot e^{-(u_1^2+u_2^2)/2}.
\]
由于 $f(x_1,x_2)=1/(2\pi)$ 在定义域内,且变换后 $(u_1,u_2)$ 可取遍整个 $\mathbb{R}^2$,故
\[
g(u_1,u_2) = \frac{1}{2\pi}\, e^{-(u_1^2+u_2^2)/2},\qquad (u_1,u_2)\in\mathbb{R}^2.
\]
即 $\boldsymbol{U}$ 服从标准二元正态分布 $N_2(\boldsymbol{0},I_2)$。


	\item {\color{TealBlue} [2 分]}  设 $\boldsymbol{X} \sim N_p ( \boldsymbol{\mu} ,\, \mathnormal{\Sigma})$,其概率密度函数为
		\begin{equation}
			f (\boldsymbol{x}) = \left| 2 \pi \mathnormal{\Sigma} \right|^{-1/2} \exp
			\left\{ -\dfrac{1}{2} (\boldsymbol{x} - \boldsymbol{\mu})^{\rm T} \mathnormal{\Sigma}^{-1} (\boldsymbol{x} - \boldsymbol{\mu}) \right\} \,.
		\end{equation}
		若 $\mathcal{A}$ 为 $p \times p$ 的非奇异矩阵,$\boldsymbol{c} \in \mathbb{R}^p$ 为常数向量. 证明:
		\begin{equation}
			\boldsymbol{Y} = \mathcal{A} \, \boldsymbol{X} + \boldsymbol{c} \sim N_p \left( \mathcal{A} \, \boldsymbol{\mu} + \boldsymbol{c} ,\, 
				\mathcal{A} \, \mathnormal{\Sigma} \mathcal{A}^{\rm T} \right) \,.
		\end{equation}

		{\color{red} \heiti 【证明】} 由于 $\mathcal{A}$ 非奇异,变换 $\boldsymbol{y} = \mathcal{A}\boldsymbol{x}+\boldsymbol{c}$ 是一一映射,其逆变换为 $\boldsymbol{x} = \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c})$。雅可比行列式的绝对值为
\[
\left| \frac{\partial \boldsymbol{x}}{\partial \boldsymbol{y}} \right| = |\det(\mathcal{A}^{-1})| = |\det(\mathcal{A})|^{-1}.
\]
因此 $\boldsymbol{Y}$ 的密度函数为
\[
g(\boldsymbol{y}) = f\!\left( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) \right) \cdot |\det(\mathcal{A})|^{-1}.
\]

代入 $f$ 的表达式:
\[
g(\boldsymbol{y}) = |2\pi\Sigma|^{-1/2} \exp\left\{ -\frac12 \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr)^{\mathsf T} \Sigma^{-1} \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr) \right\} \cdot |\det(\mathcal{A})|^{-1}.
\]

将指数中的括号合并:
\[
\mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} = \mathcal{A}^{-1}\bigl( \boldsymbol{y} - \boldsymbol{c} - \mathcal{A}\boldsymbol{\mu} \bigr) = \mathcal{A}^{-1}\bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]
于是二次型成为
\[
\bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr)^{\mathsf T} \Sigma^{-1} \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr) = \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}^{-1})^{\mathsf T} \Sigma^{-1} \mathcal{A}^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]

注意到
\[
(\mathcal{A}^{-1})^{\mathsf T} \Sigma^{-1} \mathcal{A}^{-1} = (\mathcal{A} \Sigma \mathcal{A}^{\mathsf T})^{-1},
\]
因为 $(\mathcal{A} \Sigma \mathcal{A}^{\mathsf T})^{-1} = (\mathcal{A}^{\mathsf T})^{-1} \Sigma^{-1} \mathcal{A}^{-1}$。所以指数部分变为
\[
-\frac12 \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}\Sigma\mathcal{A}^{\mathsf T})^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]

常数因子部分:
\[
|2\pi\Sigma|^{-1/2} \cdot |\det(\mathcal{A})|^{-1} = \bigl( (2\pi)^p |\Sigma| \bigr)^{-1/2} |\det(\mathcal{A})|^{-1} = \bigl( (2\pi)^p |\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}| \bigr)^{-1/2},
\]
因为 $|\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}| = |\mathcal{A}| \cdot |\Sigma| \cdot |\mathcal{A}^{\mathsf T}| = |\mathcal{A}|^2 |\Sigma|$,所以 $|\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{1/2} = |\det(\mathcal{A})| \cdot |\Sigma|^{1/2}$,从而
\[
|\det(\mathcal{A})|^{-1} |\Sigma|^{-1/2} = |\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{-1/2}.
\]

因此
\[
g(\boldsymbol{y}) = |2\pi \mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{-1/2} \exp\left\{ -\frac12 \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}\Sigma\mathcal{A}^{\mathsf T})^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr) \right\},
\]
这正是 $N_p\bigl( \mathcal{A}\boldsymbol{\mu}+\boldsymbol{c},\; \mathcal{A}\Sigma\mathcal{A}^{\mathsf T} \bigr)$ 的密度函数。$\square$


	\item 考虑矩不存在的 Cauchy 分布,从而中心极限定理 (CLT) 无法应用.
		\begin{enumerate}
			\item {\color{TealBlue} [2 分]} 取三个不同的样本容量 $n$,对来自 Cauchy 分布总体的样本均值 $\overline{\boldsymbol{x}}$ 进行模拟,作直方图以及相应的核密度曲线图.
				{\color{red} \bf  提示:} {\kaishu Cauchy 分布可以通过 rcauchy(n, location = 0, scale = 1) 进行模拟.}

			{\color{red} \heiti 【解】} 样本容量取 $n = 5,\;30,\;100$,重复次数 $N_{\text{sim}} = 5000$。
\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{C:/Users/35297/Documents/Rplot01.png}
    \caption{直方图}
    \label{fig:kde}
\end{figure}
\begin{verbatim}
						
		library(plotly)

set.seed(123)

n_values <- c(5, 30, 100)
B <- 5000

mean_1 <- replicate(B, mean(rcauchy(n_values[1], 0, 1)))
mean_2 <- replicate(B, mean(rcauchy(n_values[2], 0, 1)))
mean_3 <- replicate(B, mean(rcauchy(n_values[3], 0, 1)))

fig1 <- plot_ly(
  x = mean_1,
  type = "histogram",
  histnorm = "probability density",
  name = "n = 5",
  marker = list(color = "#4C78A8"),
  opacity = 0.75
)

fig2 <- plot_ly(
  x = mean_2,
  type = "histogram",
  histnorm = "probability density",
  name = "n = 30",
  marker = list(color = "#F58518"),
  opacity = 0.75
)

fig3 <- plot_ly(
  x = mean_3,
  type = "histogram",
  histnorm = "probability density",
  name = "n = 100",
  marker = list(color = "#54A24B"),
  opacity = 0.75
)

subplot(
  fig1, fig2, fig3,
  nrows = 1,
  shareY = TRUE
) %>%
  layout(
    title = "Cauchy分布总体下不同样本容量的样本均值分布",
    
    plot_bgcolor = '#e5ecf6',
    
    xaxis = list(title = "样本均值", range = c(-20,20)),
    xaxis2 = list(title = "样本均值", range = c(-20,20)),
    xaxis3 = list(title = "样本均值", range = c(-20,20)),
    
    yaxis = list(title = "Density"),
    
    legend = list(
      x = 0.82,
      y = 1.02,
      font = list(size = 14)
    )
  )
					\end{verbatim}
			\item {\color{TealBlue} [2 分]} 当 $n \rightarrow \infty$ 时,你预期会出现什么情况?.

			{\color{red} \heiti 【解】}预期依然是个 Cauchy 分布,依然有尖峰和厚尾,不服从中心极限定理,也不收敛
		\end{enumerate}
\end{enumerate}
\end{document}
\end{document}