\documentclass[a4paper, 10.5pt, twoside, openany]{book}
\usepackage{amsfonts}
\usepackage{array}
\usepackage{boxedminipage, fancybox}
\usepackage{caption}
\usepackage{color}
\usepackage[colorlinks,linkcolor=blue]{hyperref}
\usepackage{ctex}
\usepackage{datetime}
\usepackage[dvipsnames]{xcolor}
\usepackage{enumerate}
\usepackage{epsfig,graphicx,subfigure}
\usepackage{extarrows}
\usepackage{fancyheadings}
\usepackage{float}
\usepackage{geometry}
\usepackage{listings}
\usepackage{longtable}
\usepackage{makeidx}
\usepackage{mathrsfs}
\usepackage{multirow}
\usepackage{natbib}
\usepackage{pifont}
\usepackage{rotating}
\usepackage{setspace}
\usepackage{shadow}
\usepackage{stmaryrd, amssymb, amsmath}
\usepackage{tabularx}
\usepackage{url}
\usepackage{varioref}
\usepackage{verbatim}
\usepackage{wrapfig}
\usepackage{xcolor}
\geometry{left=2.0cm, right=2.0cm, top=2.5cm, bottom=2.5cm}
\linespread{1.5}
\definecolor{mygray}{rgb}{0.85, 0.85, 0.85}
\newcommand{\codeinline}[1]{\colorbox{mygray}{\lstinline|#1|}}
%% ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
\title{\Huge \bf 《多元统计分析》课后作业}
\author{\kaishu 姓名:\underline{\quad 你的姓名 \quad} \\[5mm]
\kaishu 学号:\underline{\quad 你的学号 \quad} \\[5mm]
\kaishu 班级:\underline{\quad 统计 23-X 班 \quad} \\[50mm]
\kaishu 中国石油大学(北京)克拉玛依校区文理学院数学与统计系
}
\date{\today}
\begin{document}
% -------------------------------------------- 封面页 --------------------------------------------
\frontmatter
\maketitle
% -------------------------------------------- 作业要求 --------------------------------------------
\chapter{作业要求}
\begin{enumerate}
\item 可以和其他同学讨论作业当中的问题,但应当自己独立完成作业
\item 计算、证明等要有过程,要有主要步骤的说明
\item 请将计算、绘图所用的 R 代码以及生成的结果和图像一并添加在作业文件当中
\item 请使用 \LaTeX 编辑并生成 PDF 格式的文件,第X周作业文件命名方式:学号-姓名-X.pdf
\item 评分标准:每一问得分 $\in \left\{ 2 ,\, 1 ,\, 0 \right\}$
\begin{itemize}
\item 2:~ 按时完成并上交作业,且答案基本正确
\item 1:~ 按时完成并上交作业,且答案部分正确
\item 0:~ 答案完全错误,或者迟交作业(规定时间72小时之后)
\end{itemize}
\item 请将完成的 PDF 格式的作业文件发送至邮箱:xiaolei@cup.edu.cn
\item 每位同学可以有一次迟交作业的机会,但不得晚于规定时间三日之后
\item 第4周作业截止时间:2026年4月10日24:00
\end{enumerate}
\tableofcontents
% -------------------------------------------- 正文部分 --------------------------------------------
\mainmatter
% -------------------------------------------- 第 4 周作业 --------------------------------------------
\chapter{第 4 周作业}
{\kaishu \color{blue} 第 4 周作业截止时间:} 2026年4月10日24:00
{\kaishu \color{blue} 第 4 周作业完成时间:} \today \space \currenttime % 请勿编辑、删除本行!
\begin{enumerate}
\item {\color{TealBlue} [2 分]} 证明
\begin{equation}
f_{\boldsymbol{Y}} (\boldsymbol{y}) = \begin{cases} \dfrac{1}{2} \, y_1 - \dfrac{1}{4} \, y_2 \,, & 0 \leq y_1 \leq 2 ,\, \left| y_2 \right| \leq 1 - \left| 1 - y_1 \right| \,, \\[2mm]
0 \,, & \text{其它} \end{cases}
\end{equation}
是一个概率密度函数.
{\color{red} \heiti 【证明】} 要验证这是一个概率密度函数需验证非负性以及积分等于 \(1\)。
\paragraph{非负性:}
在定义域内,\(y_1 \ge 0\),且 \(|y_2| \le 1 - |1-y_1|\)。分两种情况:
\begin{itemize}
\item 当 \(0 \le y_1 \le 1\) 时,\(|1-y_1| = 1-y_1\),则 \(|y_2| \le y_1\)。此时
\[
\frac12 y_1 - \frac14 y_2 \ge \frac12 y_1 - \frac14 y_1 = \frac14 y_1 \ge 0.
\]
\item 当 \(1 \le y_1 \le 2\) 时,\(|1-y_1| = y_1-1\),则 \(|y_2| \le 2-y_1\)。此时
\[
\frac12 y_1 - \frac14 y_2 \ge \frac12 y_1 - \frac14 (2-y_1) = \frac34 y_1 - \frac12 \ge \frac34\cdot1 - \frac12 = \frac14 > 0.
\]
\end{itemize}
因此 \(f_{\boldsymbol{Y}}(\boldsymbol{y}) \ge 0\) 处处成立。
\paragraph{归一性:}
计算二重积分
\[
I = \iint_{\mathbb{R}^2} f_{\boldsymbol{Y}}(\boldsymbol{y})\,\mathrm{d}y_1\mathrm{d}y_2.
\]
先对 \(y_2\) 积分。对于固定的 \(y_1\),令 \(L = 1 - |1-y_1|\),则 \(y_2\) 从 \(-L\) 到 \(L\),且被积函数关于 \(y_2\) 为线性,奇次项积分为零:
\[
\int_{-L}^{L} \left(\frac12 y_1 - \frac14 y_2\right)\mathrm{d}y_2 = \frac12 y_1 \cdot (2L) = y_1 L.
\]
于是
\[
I = \int_{0}^{2} y_1 \bigl(1 - |1-y_1|\bigr)\,\mathrm{d}y_1.
\]
分段计算:
\[
\begin{aligned}
\int_{0}^{1} y_1 \cdot y_1\,\mathrm{d}y_1 &= \int_{0}^{1} y_1^2\,\mathrm{d}y_1 = \frac13, \\
\int_{1}^{2} y_1 \cdot (2-y_1)\,\mathrm{d}y_1 &= \int_{1}^{2} (2y_1 - y_1^2)\,\mathrm{d}y_1 = \left[ y_1^2 - \frac13 y_1^3 \right]_{1}^{2} \\
&= \left(4 - \frac83\right) - \left(1 - \frac13\right) = \frac43 - \frac23 = \frac23.
\end{aligned}
\]
故 \(I = \frac13 + \frac23 = 1\)。积分值为 \(1\),又具有非负性,因此 \(f_{\boldsymbol{Y}}\) 是一个概率密度函数。\(\square\)
\item 设 $\boldsymbol{X} = \left( X_1 ,\, X_2 \right)^{\rm T}$ 的概率密度函数为
\begin{equation}
f \left( x_1 \,,\, x_2 \right) = \begin{cases} 4 \, x_1 x_2 \, {\rm e}^{-x^2_1} \,, & x_1 > 0 \,,\, 0< x_2 < 1 \,, \\ 0 \,, & \text{其它.} \end{cases}
\end{equation}
\begin{enumerate}
\item {\color{TealBlue} [2 分]} 计算 $\mathbb{E} (\boldsymbol{X})$ 与 $\mathbb{V}{\rm ar} (\boldsymbol{X})$.
{\color{red} \heiti 【解】} 边际密度为:
\[
f_{X_1}(x_1) = \int_0^1 4x_1x_2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_2 = 2x_1\mathrm{e}^{-x_1^2},\quad x_1>0,
\]
\[
f_{X_2}(x_2) = \int_0^\infty 4x_1x_2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2x_2,\quad 0<x_2<1.
\]
\begin{equation}
f \left( x_1 \,,\, x_2 \right) = f_{X_1}(x_1)\cdot f_{X_2}(x_2)
\end{equation}可见 $X_1$ 与 $X_2$ 独立
\[
\begin{aligned}
\mathbb{E}(X_1) &= \int_0^\infty x_1\cdot 2x_1\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\int_0^\infty x_1^2\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\cdot\frac{\sqrt\pi}{4} = \frac{\sqrt\pi}{2},\\
\mathbb{E}(X_2) &= \int_0^1 x_2\cdot 2x_2\,\mathrm{d}x_2 = 2\int_0^1 x_2^2\,\mathrm{d}x_2 = \frac23,\\
\end{aligned}
\]
由于独立,协方差为 $0$。故
\[
\mathbb{E}(\boldsymbol{X}) = \begin{pmatrix} \frac{\sqrt\pi}{2} \\[4pt] \frac23 \end{pmatrix},\qquad
\mathbb{V}\mathrm{ar}(\boldsymbol{X}) = \begin{pmatrix}
1-\frac{\pi}{4} & 0 \\[4pt]
0 & \frac{1}{18}
\end{pmatrix}.
\]
\item {\color{TealBlue} [2 分]} 计算 $\mathbb{E} \left( X_1 \left| X_2 \right. \right)$ 与 $\mathbb{E} \left( X_2 \left| X_1 \right. \right)$.
{\color{red} \heiti 【解】} 由独立性,条件期望等于无条件期望:
\[
\mathbb{E}(X_1\mid X_2) = \mathbb{E}(X_1) = \frac{\sqrt\pi}{2},\qquad
\mathbb{E}(X_2\mid X_1) = \mathbb{E}(X_2) = \frac23.
\]
\item {\color{TealBlue} [2 分]} 计算 $\mathbb{V}{\rm ar} \left( X_1 \left| X_2 \right. \right)$ 与 $\mathbb{V}{\rm ar} \left( X_2 \left| X_1 \right. \right)$.
{\color{red} \heiti 【解】} 由独立性,条件方差等于无条件方差:
\[
\begin{aligned}
\mathbb{E}(X_1^2) &= \int_0^\infty x_1^2\cdot 2x_1\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\int_0^\infty x_1^3\mathrm{e}^{-x_1^2}\,\mathrm{d}x_1 = 2\cdot\frac12 = 1,\quad
\operatorname{Var}(X_1) &= \mathbb{E}(X_1^2) - \bigl(\mathbb{E}(X_1)\bigr)^2 = 1 - \frac{\pi}{4},\quad[6pt]
\end{aligned}
\]
\[
\begin{aligned}
\mathbb{E}(X_2^2) &= \int_0^1 x_2^2\cdot 2x_2\,\mathrm{d}x_2 = 2\int_0^1 x_2^3\,\mathrm{d}x_2 = \frac12,\quad
\operatorname{Var}(X_2) &= \frac12 - \left(\frac23\right)^2 = \frac{1}{18}.\quad
\end{aligned}
\]
\[
\begin{aligned}
\operatorname{Var}(X_1\mid X_2) = \operatorname{Var}(X_1) = 1 - \frac{\pi}{4},\quad
\operatorname{Var}(X_2\mid X_1) = \operatorname{Var}(X_2) = \frac{1}{18}.
\end{aligned}
\]
\end{enumerate}
\item {\color{TealBlue} [2 分]} 设 $\boldsymbol{X} = \left( X_1 ,\, X_2 \right)^{\rm T}$ 的概率密度函数为
\begin{equation}
f \left( x_1 \,,\, x_2 \right) = \begin{cases} \dfrac{1}{2 \pi} \,, & 0 < x_1 < 2 \pi \,,\, 0 < x_2 < 1 \,, \\[2mm] 0 \,, & \text{其它.} \end{cases}
\end{equation}
令
\begin{equation}
\begin{cases} U_1 = \left( \sin X_1 \right) \sqrt{-2 \, \ln X_2} \\[2mm] U_2 = \left( \cos X_1 \right) \sqrt{-2 \, \ln X_2} \end{cases}
\end{equation}
求 $\boldsymbol{U} = \left( U_1 ,\, U_2 \right)^{\rm T}$ 的概率密度函数 $g \left( u_1 \,,\, u_2 \right)$.
{\color{red} \heiti 【解】} 由题易知 $X_1$ 与 $X_2$ 独立,$X_1 \sim \mathrm{Uniform}(0,2\pi)$,$X_2 \sim \mathrm{Uniform}(0,1)$。定义中间变量
\[
R = \sqrt{-2\ln X_2},\qquad \Theta = X_1,
\]
则 $U_1 = R\sin\Theta$, $U_2 = R\cos\Theta$,又变换 $(X_1,X_2) \mapsto (U_1,U_2)$ 是一一映射。其逆变换为
\[
X_1 = \Theta = \operatorname{atan2}(U_1,U_2),\qquad X_2 = \exp\!\left(-\frac{R^2}{2}\right) = \exp\!\left(-\frac{U_1^2+U_2^2}{2}\right).
\]
计算雅可比行列式。先由极坐标变换 $(U_1,U_2) \to (R,\Theta)$,其雅可比绝对值为 $R$,故 $(R,\Theta) \to (U_1,U_2)$ 的雅可比绝对值为 $1/R$。再计算 $(X_1,X_2) \to (R,\Theta)$:
\[
X_1 = \Theta,\quad X_2 = e^{-R^2/2},
\]
则
\[
\frac{\partial(X_1,X_2)}{\partial(R,\Theta)} = \begin{vmatrix}
0 & 1 \\[2pt]
-Re^{-R^2/2} & 0
\end{vmatrix} = R e^{-R^2/2}.
\]
因此从 $(U_1,U_2)$ 到 $(X_1,X_2)$ 的雅可比行列式绝对值为
\[
\left|\frac{\partial(X_1,X_2)}{\partial(U_1,U_2)}\right| = \left|\frac{\partial(X_1,X_2)}{\partial(R,\Theta)}\right| \cdot \left|\frac{\partial(R,\Theta)}{\partial(U_1,U_2)}\right| = \bigl(R e^{-R^2/2}\bigr) \cdot \frac{1}{R} = e^{-(U_1^2+U_2^2)/2}.
\]
于是,$\boldsymbol{U}$ 的联合密度为
\[
g(u_1,u_2) = f\bigl(x_1(u_1,u_2), x_2(u_1,u_2)\bigr) \cdot e^{-(u_1^2+u_2^2)/2}.
\]
由于 $f(x_1,x_2)=1/(2\pi)$ 在定义域内,且变换后 $(u_1,u_2)$ 可取遍整个 $\mathbb{R}^2$,故
\[
g(u_1,u_2) = \frac{1}{2\pi}\, e^{-(u_1^2+u_2^2)/2},\qquad (u_1,u_2)\in\mathbb{R}^2.
\]
即 $\boldsymbol{U}$ 服从标准二元正态分布 $N_2(\boldsymbol{0},I_2)$。
\item {\color{TealBlue} [2 分]} 设 $\boldsymbol{X} \sim N_p ( \boldsymbol{\mu} ,\, \mathnormal{\Sigma})$,其概率密度函数为
\begin{equation}
f (\boldsymbol{x}) = \left| 2 \pi \mathnormal{\Sigma} \right|^{-1/2} \exp
\left\{ -\dfrac{1}{2} (\boldsymbol{x} - \boldsymbol{\mu})^{\rm T} \mathnormal{\Sigma}^{-1} (\boldsymbol{x} - \boldsymbol{\mu}) \right\} \,.
\end{equation}
若 $\mathcal{A}$ 为 $p \times p$ 的非奇异矩阵,$\boldsymbol{c} \in \mathbb{R}^p$ 为常数向量. 证明:
\begin{equation}
\boldsymbol{Y} = \mathcal{A} \, \boldsymbol{X} + \boldsymbol{c} \sim N_p \left( \mathcal{A} \, \boldsymbol{\mu} + \boldsymbol{c} ,\,
\mathcal{A} \, \mathnormal{\Sigma} \mathcal{A}^{\rm T} \right) \,.
\end{equation}
{\color{red} \heiti 【证明】} 由于 $\mathcal{A}$ 非奇异,变换 $\boldsymbol{y} = \mathcal{A}\boldsymbol{x}+\boldsymbol{c}$ 是一一映射,其逆变换为 $\boldsymbol{x} = \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c})$。雅可比行列式的绝对值为
\[
\left| \frac{\partial \boldsymbol{x}}{\partial \boldsymbol{y}} \right| = |\det(\mathcal{A}^{-1})| = |\det(\mathcal{A})|^{-1}.
\]
因此 $\boldsymbol{Y}$ 的密度函数为
\[
g(\boldsymbol{y}) = f\!\left( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) \right) \cdot |\det(\mathcal{A})|^{-1}.
\]
代入 $f$ 的表达式:
\[
g(\boldsymbol{y}) = |2\pi\Sigma|^{-1/2} \exp\left\{ -\frac12 \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr)^{\mathsf T} \Sigma^{-1} \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr) \right\} \cdot |\det(\mathcal{A})|^{-1}.
\]
将指数中的括号合并:
\[
\mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} = \mathcal{A}^{-1}\bigl( \boldsymbol{y} - \boldsymbol{c} - \mathcal{A}\boldsymbol{\mu} \bigr) = \mathcal{A}^{-1}\bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]
于是二次型成为
\[
\bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr)^{\mathsf T} \Sigma^{-1} \bigl( \mathcal{A}^{-1}(\boldsymbol{y}-\boldsymbol{c}) - \boldsymbol{\mu} \bigr) = \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}^{-1})^{\mathsf T} \Sigma^{-1} \mathcal{A}^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]
注意到
\[
(\mathcal{A}^{-1})^{\mathsf T} \Sigma^{-1} \mathcal{A}^{-1} = (\mathcal{A} \Sigma \mathcal{A}^{\mathsf T})^{-1},
\]
因为 $(\mathcal{A} \Sigma \mathcal{A}^{\mathsf T})^{-1} = (\mathcal{A}^{\mathsf T})^{-1} \Sigma^{-1} \mathcal{A}^{-1}$。所以指数部分变为
\[
-\frac12 \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}\Sigma\mathcal{A}^{\mathsf T})^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr).
\]
常数因子部分:
\[
|2\pi\Sigma|^{-1/2} \cdot |\det(\mathcal{A})|^{-1} = \bigl( (2\pi)^p |\Sigma| \bigr)^{-1/2} |\det(\mathcal{A})|^{-1} = \bigl( (2\pi)^p |\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}| \bigr)^{-1/2},
\]
因为 $|\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}| = |\mathcal{A}| \cdot |\Sigma| \cdot |\mathcal{A}^{\mathsf T}| = |\mathcal{A}|^2 |\Sigma|$,所以 $|\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{1/2} = |\det(\mathcal{A})| \cdot |\Sigma|^{1/2}$,从而
\[
|\det(\mathcal{A})|^{-1} |\Sigma|^{-1/2} = |\mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{-1/2}.
\]
因此
\[
g(\boldsymbol{y}) = |2\pi \mathcal{A}\Sigma\mathcal{A}^{\mathsf T}|^{-1/2} \exp\left\{ -\frac12 \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr)^{\mathsf T} (\mathcal{A}\Sigma\mathcal{A}^{\mathsf T})^{-1} \bigl( \boldsymbol{y} - (\mathcal{A}\boldsymbol{\mu}+\boldsymbol{c}) \bigr) \right\},
\]
这正是 $N_p\bigl( \mathcal{A}\boldsymbol{\mu}+\boldsymbol{c},\; \mathcal{A}\Sigma\mathcal{A}^{\mathsf T} \bigr)$ 的密度函数。$\square$
\item 考虑矩不存在的 Cauchy 分布,从而中心极限定理 (CLT) 无法应用.
\begin{enumerate}
\item {\color{TealBlue} [2 分]} 取三个不同的样本容量 $n$,对来自 Cauchy 分布总体的样本均值 $\overline{\boldsymbol{x}}$ 进行模拟,作直方图以及相应的核密度曲线图.
{\color{red} \bf 提示:} {\kaishu Cauchy 分布可以通过 rcauchy(n, location = 0, scale = 1) 进行模拟.}
{\color{red} \heiti 【解】} 样本容量取 $n = 5,\;30,\;100$,重复次数 $N_{\text{sim}} = 5000$。
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{C:/Users/35297/Documents/Rplot01.png}
\caption{直方图}
\label{fig:kde}
\end{figure}
\begin{verbatim}
library(plotly)
set.seed(123)
n_values <- c(5, 30, 100)
B <- 5000
mean_1 <- replicate(B, mean(rcauchy(n_values[1], 0, 1)))
mean_2 <- replicate(B, mean(rcauchy(n_values[2], 0, 1)))
mean_3 <- replicate(B, mean(rcauchy(n_values[3], 0, 1)))
fig1 <- plot_ly(
x = mean_1,
type = "histogram",
histnorm = "probability density",
name = "n = 5",
marker = list(color = "#4C78A8"),
opacity = 0.75
)
fig2 <- plot_ly(
x = mean_2,
type = "histogram",
histnorm = "probability density",
name = "n = 30",
marker = list(color = "#F58518"),
opacity = 0.75
)
fig3 <- plot_ly(
x = mean_3,
type = "histogram",
histnorm = "probability density",
name = "n = 100",
marker = list(color = "#54A24B"),
opacity = 0.75
)
subplot(
fig1, fig2, fig3,
nrows = 1,
shareY = TRUE
) %>%
layout(
title = "Cauchy分布总体下不同样本容量的样本均值分布",
plot_bgcolor = '#e5ecf6',
xaxis = list(title = "样本均值", range = c(-20,20)),
xaxis2 = list(title = "样本均值", range = c(-20,20)),
xaxis3 = list(title = "样本均值", range = c(-20,20)),
yaxis = list(title = "Density"),
legend = list(
x = 0.82,
y = 1.02,
font = list(size = 14)
)
)
\end{verbatim}
\item {\color{TealBlue} [2 分]} 当 $n \rightarrow \infty$ 时,你预期会出现什么情况?.
{\color{red} \heiti 【解】}预期依然是个 Cauchy 分布,依然有尖峰和厚尾,不服从中心极限定理,也不收敛
\end{enumerate}
\end{enumerate}
\end{document}
\end{document}