# clear variables and close windows rm(list = ls(all = TRUE)) graphics.off() options(digits = 3) ###################################### Car Marks 数据 ###################################### # load data 载入数据 load("~/Desktop/2026_Multivariate Statistical Analysis/R_Code/Data/carmean2.rda") (x = carmean2) ###################################### 先作主成分分析 NPCA.car = princomp(x, cor = TRUE, scores = TRUE, fix_sign = TRUE) summary(NPCA.car) # 主成分的标准差 NPCA.car$sdev # 主成分在原变量的载荷(权重) NPCA.car$loadings # 碎石图 screeplot(NPCA.car, ylim = c(0, 6), main = 'Scree Plot of NPCA for Car Marks') # 观测值在各主成分的投影 NPCA.car$scores # 各变量与第一主成分的相关系数 cor.x_NPC1 = NPCA.car$sdev[1] * NPCA.car$loadings[, 1] # 各变量与第二主成分的相关系数 cor.x_NPC2 = NPCA.car$sdev[2] * NPCA.car$loadings[, 2] # 各变量方差由前两个主成分解释的比例 NPC2.prop = cor.x_NPC1^2 + cor.x_NPC2^2 cbind(NPC1 = cor.x_NPC1, NPC2 = cor.x_NPC2, Sum_Squares = NPC2.prop) # 变量投影在前两个主成分上的散点图 graphics.off() ucircle = cbind(cos((0:360)/180 * pi), sin((0:360)/180 * pi)) plot(ucircle, type = "l", lty = "solid", col = "blue", xlab = "First PC", ylab = "Second PC", main = "Carmeans", cex.lab = 1.2, cex.axis = 1.2, cex.main = 1.8, lwd = 2, asp = 1) abline(h = 0, v = 0) # label = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8") label = names(cor.x_NPC1) text(-cor.x_NPC1, cor.x_NPC2, label, cex = 1) # 观测值在前两个主成分的散点图 graphics.off() plot(cbind(NPCA.car$scores[, 1], NPCA.car$scores[, 2]), type = "n", xlab = "PC 1", ylab = "PC 2", main = "First NPC vs. Second NPC", cex.lab = 1.2, cex.axis = 1.2, cex.main = 1.6) abline(h = 0, v = 0, lty = 2) label = 1:NPCA.car$n.obs text(NPCA.car$scores[, 1], NPCA.car$scores[, 2], label) ###################################### 再来确定因子数 # correlation matrix 计算相关矩阵 (r = cor(x)) # 将相关矩阵主对角元素为零,记为 m m = r for (i in 1:ncol(r)) { m[i, i] = r[i, i] - 1 } m # 计算 psi_{jj} psi = matrix(0, 8, 8) for (i in 1:8) { psi[i, i] = 1 - max(abs(m[, i])) } psi # 对 R - Psi 作谱分解 spectral decomposition eig = eigen(r - psi) # 特征值: 前两个特征值为正,且与其它相比较大,所以取 k = 2 个因子即可 ee = eig$values ee ee = eig$values[1:2] # 取前两个大的特征值 ee # 特征向量 vv = eig$vectors vv vv = eig$vectors[, 1:2] # 前两个特征向量 vv = t(t(vv[, 1:2]) * sign(vv[2, 1:2])) # 调整特征向量的符号 q1 = sqrt(ee[1]) * vv[, 1] # 变量在第一个主方向上的投影 q2 = sqrt(ee[2]) * vv[, 2] # 变量在第二个主方向上的投影 (q = cbind(q1, q2)) # plot 变量在前两个因子上的投影散点图 plot(q, type = "n", xlab = "First Factor", ylab = "Second Factor", main = "Car Marks Data", xlim = c(-1.2, 1.2), ylim = c(-0.4, 0.9), cex.lab = 1.4, cex.axis = 1.4, cex.main = 1.8) text(q, colnames(x), cex = 1.2, xpd = NA) abline(v = 0) abline(h = 0) ###################################### Consumer-Preference Study ###################################### rm(list = ls(all = TRUE)) graphics.off() x = c(1.00, 0.02, 0.96, 0.42, 0.01, 0.02, 1.00, 0.13, 0.71, 0.85, 0.96, 0.13, 1.00, 0.50, 0.11, 0.42, 0.71, 0.50, 1.00, 0.79, 0.01, 0.85, 0.11, 0.79, 1.00) R = matrix(x, nrow = 5, byrow = TRUE) R R.eig = eigen(R) # 谱分解 R.eig$values # 特征值 sum(R.eig$values[1:2]) / 5 # 两个公共因子的累积方差占总方差的比例 # 公共因子的载荷 (k=2) load.F = R.eig$vectors %*% diag(sqrt(R.eig$values)) round(load.F[, 1:2], digits = 2) # 公共因子方差 (k=2) h = load.F^2 h = h[, 1] + h[, 2] round(h, digits = 2) # 特殊因子方差 (k=2) Psi = 1 - h round(Psi, digits = 2) # 放在一个数据框中 FA = cbind(Load_F1 = load.F[, 1], Load_F2 = load.F[, 2], Communalities = h, S_Variances = Psi) round(FA, digits = 2) # 验证估计的效果 load.F[, 1:2] %*% t(load.F[, 1:2]) + diag(Psi) ###################################### Boston Housing 数据集的因子分析 ###################################### # clear variables and close windows rm(list = ls(all = TRUE)) library(MASS) data = Boston head(data) # transform data 变量数据的变换 xt = data xt[, 1] = log(data[, 1]) xt[, 2] = data[, 2]/10 xt[, 3] = log(data[, 3]) xt[, 5] = log(data[, 5]) xt[, 6] = log(data[, 6]) xt[, 7] = (data[, 7]^(2.5))/10000 xt[, 8] = log(data[, 8]) xt[, 9] = log(data[, 9]) xt[, 10] = log(data[, 10]) xt[, 11] = exp(0.4 * data[, 11])/1000 xt[, 12] = data[, 12]/100 xt[, 13] = sqrt(data[, 13]) xt[, 14] = log(data[, 14]) data = xt[, -4] # 剔除变量 X_4 round(head(data), digits = 4) # rename variables 重新命名变量 colnames(data) = c("X1", "X2", "X3", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13", "X14") round(head(data), digits = 4) # standardize variables 变量标准化 da = scale(data) round(head(da), digits = 4) # correlation matrix 计算相关矩阵 dat = cor(da) round(dat, digits = 4) # R 中的因子分析 ?factanal # Maximum Likelihood Factor Analysis without varimax rotation: factanal 极大似然因子分析,未作 varimax 旋转 mlm = factanal(da, 3, rotation = "none", covmat = dat) str(mlm) # estimated factor loadings 因子载荷的估计值 load = mlm$loadings load # the estimated factor loadings matrix 因子载荷矩阵 ld = cbind(load[, 1], load[, 2], load[, 3]) round(ld, digits = 4) # communalities are calculated 计算公共因子方差 com = diag(ld %*% t(ld)) round(com, digits = 4) # specific variances are calculated 计算特殊因子方差 psi = diag(dat) - diag(ld %*% t(ld)) round(psi, digits = 4) # 因子载荷、公共因子方差、特殊因子方差的矩阵 tbl = cbind(L_F1 = load[, 1], L_F2 = load[, 2], L_F3 = load[, 3], com, psi) round(tbl, digits = 4) # plot first factor against second 变量在因子 1 和因子 2 平面上的散点图 graphics.off() plot(load[, 1], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors21 - theta21", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 1], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot first factor against third 变量在因子 1 和因子 3 平面上的散点图 graphics.off() plot(load[, 1], load[, 3], type = "n", xlab = "x", ylab = "y", main = "Factors31 - theta31", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 1], load[, 3], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot second factor against third 变量在因子 2 和因子 3 平面上的散点图 graphics.off() plot(load[, 3], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors23 - theta23", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 3], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # 三张图放在一起 graphics.off() par(mfcol = c(2, 2)) plot(load[, 1], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors21 - theta21", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 1], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) plot(load[, 1], load[, 3], type = "n", xlab = "x", ylab = "y", main = "Factors31 - theta31", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 1], load[, 3], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) plot(load[, 3], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors23 - theta23", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1.0, 1.0), ylim = c(-0.6, 0.6), asp = 1) text(load[, 3], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # R 中的因子旋转 ?varimax # Maximum Likelihood Factor Analysis after varimax rotation 极大似然方法的因子分析,因子旋转之后 # rotates the factor loadings matrix 因子载荷矩阵的旋转 var = varimax(ld) str(var) # estimated factor loadings after varimax 旋转后的因子载荷 load = var$loadings round(load, digits = 4) # 旋转后的因子载荷矩阵 vl = cbind(load[, 1], load[, 2], load[, 3]) round(vl, digits = 4) # 旋转矩阵 rm = var$rotmat round(rm, digits = 4) # communalities are calculated 计算公共因子方差 com = diag(vl %*% t(vl)) round(com, digits = 4) # specific variances are calculated 计算特殊因子方差 psi = diag(dat) - diag(vl %*% t(vl)) round(psi, digits = 4) # 旋转后的因子载荷、公共因子方差、特殊因子方差的矩阵 tbl = cbind(RL_F1 = load[, 1], RL_F2 = load[, 2], RL_F3 = load[, 3], com, psi) round(tbl, digits = 4) graphics.off() par(mfcol = c(2, 2)) # plot first factor against second 旋转后变量在第一、第二公共因子的散点图 plot(load[, 1], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors21 - theta21", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0) # plot first factor against third 旋转后变量在第一、第三公共因子的散点图 plot(load[, 1], load[, 3], type = "n", xlab = "x", ylab = "y", main = "Factors31 - theta31", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 3], colnames(data), cex = 1.1) abline(h = 0, v = 0) # plot second factor against third 旋转后变量在第二、第三公共因子的散点图 plot(load[, 3], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors23 - theta23", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 3], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0) #### Principal Component Method after varimax rotation spectral decomposition 主成分方法的因子分析,旋转后 # 相关矩阵的谱分解 e = eigen(dat) # 特征值 eigval.all = e$values # 全部特征值 round(eigval.all, digits = 4) eigval = e$values[1:3] # 前三个最大特征值 round(eigval, digits = 4) # 特征向量 eigvec.all = e$vectors # 全部特征向量 round(eigvec.all, digits = 4) eigvec = e$vectors[, 1:3] # 前三个特征向量 round(eigvec, digits = 4) # the estimated factor loadings matrix 计算因子载荷矩阵的估计值 Q = eigvec %*% sqrt(diag(eigval)) round(Q, digits = 4) # rotates the factor loadings matrix 因子载荷矩阵的旋转 pcm = varimax(Q) load = pcm$loadings # estimated factor loadings after varimax 旋转后的因子载荷 ld = cbind(load[, 1], load[, 2], load[, 3]) # 旋转后的因子载荷矩阵 round(ld, digits = 4) rm = pcm$rotmat # 旋转矩阵 round(rm, digits = 4) # communalities are calculated 计算公共因子方差 com = diag(ld %*% t(ld)) names(com) = row.names(dat) round(com, digits = 4) # specific variances are calculated 计算特殊因子方差 psi = diag(dat) - diag(ld %*% t(ld)) round(psi, digits = 4) # 旋转后的因子载荷、公共因子方差、特殊因子方差的矩阵 tbl = cbind(LF1 = load[, 1], LF2 = load[, 2], LF3 = load[, 3], com, psi) round(tbl, digits = 4) graphics.off() par(mfcol = c(2, 2)) # plot first factor against second 旋转后变量在第一、第二公共因子的散点图 plot(load[, 1], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors21 - theta21", xlim = c(-1, 1), font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot first factor against third 旋转后变量在第一、第三公共因子的散点图 plot(load[, 1], load[, 3], type = "n", xlab = "x", ylab = "y", main = "Factors31 - theta31", xlim = c(-1, 1), font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 3], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot second factor against third 旋转后变量在第二、第三公共因子的散点图 plot(load[, 3], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors23 - theta23", xlim = c(-1, 1), font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, ylim = c(-1, 1), asp = 1) text(load[, 3], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) #### Principal Factor Method after varimax rotation inverse of the correlation matrix #### 主因子方法的因子分析,旋转后,从相关矩阵的逆矩阵出发 f = solve(dat) # 相关矩阵的逆矩阵 # preliminary estimate of psi - (psi 的) 初始值 psiini = diag(1/f[row(f) == col(f)]) psi = psiini round(psi, digits = 4) # 迭代求解因子载荷矩阵 for (i in 1:10) { ee = eigen(dat - psi) # R - Psi 的谱分解 eigval = ee$values[1:3] # 取前三个特征值 eigvec = ee$vectors[, 1:3] # 取前三个特征向量 EE = matrix(eigval, nrow(dat), ncol = 3, byrow = T) QQ = sqrt(EE) * eigvec # 计算载荷矩阵 psiold = psi psi = diag(as.vector(1 - t(colSums(t(QQ * QQ))))) # 利用载荷矩阵计算 psi 的估计值 i = i + 1 z = psi - psiold convergence = z[row(z) == col(z)] } round(QQ, digits = 4) # 未旋转的因子载荷矩阵 # rotates the factor loadings matrix 因子载荷矩阵的旋转 pfm = varimax(QQ) # estimated factor loadings after varimax 旋转后的因子载荷矩阵 load = pfm$loadings ld = cbind(load[, 1], load[, 2], load[, 3]) round(ld, digits = 4) # communalities are calculated 计算公共因子方差 com = diag(ld %*% t(ld)) round(com, digits = 4) # specific variances are calculated 计算特殊因子方差 psi = diag(dat) - diag(ld %*% t(ld)) round(psi, digits = 4) # 旋转后的因子载荷、公共因子方差、特殊因子方差的矩阵 tbl = cbind(L_P1 = load[, 1], L_P2 = load[, 2], L_P3 = load[, 3], com, psi) round(tbl, digits = 4) graphics.off() par(mfcol = c(2, 2)) # plot first factor against second 旋转后变量在第一、第二公共因子的散点图 plot(load[, 1], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors21 - theta21", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot first factor against third 旋转后变量在第一、第三公共因子的散点图 plot(load[, 1], load[, 3], type = "n", xlab = "x", ylab = "y", main = "Factors31 - theta31", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 1], load[, 3], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) # plot second factor against third 旋转后变量在第二、第三公共因子的散点图 plot(load[, 3], load[, 2], type = "n", xlab = "x", ylab = "y", main = "Factors23 - theta24", font.main = 1, cex.lab = 1.1, cex.axis = 1.1, cex.main = 1.4, xlim = c(-1, 1), ylim = c(-1, 1), asp = 1) text(load[, 3], load[, 2], colnames(data), cex = 1.1) abline(h = 0, v = 0, lty = 2) ######################################## Ability and Intelligence Tests 主成分分析 # clear variables and close windows rm(list = ls(all = TRUE)) graphics.off() ?ability.cov ability.cov options(digits = 2) # 利用函数 cov2cor 计算相关矩阵 x = ability.cov$cov # 读入协方差矩阵数据 cor_x = cov2cor(x) # 利用协方差矩阵计算相关矩阵 cor_x library(psych) ?fa.parallel # fa.parallel 的帮助文件: 数据或相关矩阵的碎石图 graphics.off() fa.parallel(cor_x, n.obs = 112, fa = "both", n.iter = 20, show.legend = FALSE, main = "Scree plots with parallel analysis") ?fa # fa 函数的帮助文件:因子分析 fa_x = fa(cor_x, nfactors = 2, n.obs = 112, rotate = "none", fm = "pa", scores = "regression" ) fa_x # 正交旋转 fa_x.varimax = fa(cor_x, nfactors = 2, n.obs = 112, rotate = "varimax", fm = "pa", scores = "regression" ) fa_x.varimax # 正交旋转效果图 graphics.off() par(mfrow = c(1, 2)) factor.plot(fa_x.varimax, labels = rownames(fa_x.varimax$loadings)) fa.diagram(fa_x.varimax, simple = TRUE)