rm(list = ls(all = TRUE)) # 清空当前环境 graphics.off() # 清空绘图区域 ## ---------------------------- Covariance matrix for the bank data set ---------------------------- library(mclust) data(banknote) str(banknote) head(banknote) S <- cov(banknote[, 2:7]) round(S, digits = 2) library(ggplot2) fig_1 <- ggplot(data = banknote, aes(x = Left, y = Diagonal, color = Status, size = 1.5)) + geom_point() fig_1 fig_2 <- ggplot(data = banknote, aes(x = Right, y = Top, color = Status, size = 1.5)) + geom_point() fig_2 counterfeit <- subset(banknote, Status == "counterfeit") S_f <- cov(counterfeit[, 2:7]) round(S_f, digits = 3) fig_3 <- ggplot(data = counterfeit, aes(x = Left, y = Diagonal, size = 1.5, color = 'orange')) + geom_point() fig_3 fig_4 <- ggplot(data = counterfeit, aes(x = Right, y = Top, size = 1.5, color = 'orange')) + geom_point() fig_4 genuine <- subset(banknote, Status == "genuine") S_g <- cov(genuine[, 2:7]) round(S_g, digits = 3) fig_5 <- ggplot(data = genuine, aes(x = Left, y = Diagonal, size = 1.5)) + geom_point() fig_5 fig_6 <- ggplot(data = genuine, aes(x = Right, y = Top, size = 1.5,)) + geom_point() fig_6 ################################ Correlation matrix ############################### R_g <- cor(genuine[, 2:7]) round(R_g, digits = 2) R_f <- cor(counterfeit[, 2:7]) round(R_f, digits = 2) library(corrgram) str(auto) head(auto) cor(auto$MPG, auto$Weight) library(ggplot2) ggplot(auto, aes(x = Origin, y = MPG)) + geom_boxplot() ggplot(auto, aes(x = MPG, y = Weight, color = Origin, shape = Origin, size = 2)) + geom_point() w <- log((1 + cor(auto$MPG, auto$Weight)) / (1 - cor(auto$MPG, auto$Weight))) / 2 w z <- (w - 0) / sqrt(1/(74-3)) z p_value <- 2 * pnorm(abs(z), 0, 1, lower.tail = FALSE) p_value E_w <- log((1 + (-0.75)) / (1 - (-0.75))) / 2 E_w z1 <- (w - (E_w)) / sqrt(1/(74-3)) z1 p_value <- 2 * pnorm(abs(z1), 0, 1, lower.tail = FALSE) p_value x <- c(230, 125, 200, 109, 181, 99, 55, 107, 165, 97, 105, 98, 150, 115, 85, 71, 97, 120, 0, 82, 192, 100, 150, 103, 181, 80, 85, 111, 189, 90, 120, 93, 172, 95, 110, 86, 170, 125, 130, 78) pullover <- matrix(x, ncol = 4, byrow = TRUE) pullover <- as.data.frame(pullover) colnames(pullover) <- c("X1", "X2", "X3", "X4") pullover ggplot(pullover, aes(x = X4, y = X1, size = 1.5)) + geom_point() r_14 <- cor(pullover$X1, pullover$X4) r_14 w <- 0.5 * (log((1 + r_14)/(1 - r_14))) w w_star <- w - (3 * w + tanh(w)) / (4 * (10 - 1)) w_star z <- (w_star - 0) / (sqrt(1/(10-1))) z p_value <- 2 * pnorm(abs(z), 0, 1, lower.tail = FALSE) p_value ################################ Summary Statistics ############################### pullover pullover.mean <- sapply(pullover, mean) pullover.mean pullover <- as.matrix(pullover) pullover.mean <- as.matrix(pullover.mean) pullover.cov <- (1/10) * t(pullover) %*% pullover - pullover.mean %*% t(pullover.mean) pullover.cov pullover.cov.u <- (10/9) * pullover.cov pullover.cov.u pullover.cov.u2 <- cov(pullover) pullover.cov.u2 pullover.cor <- cor(pullover) round(pullover.cor, digits = 2) pullover A <- matrix(c(0, 0, 1, 10), nrow=1) A %*% pullover.mean S_Y <- A %*% pullover.cov.u2 %*% t(A) S_Y ################################ Linear Model for Two Variables ############################### pullover = as.data.frame(pullover) plot(X1 ~ X2, data = pullover, xlab = "Price (X2)", ylab = "Sales (X1)", pch = 16, cex = 1.5) pullover.lm <- lm(X1 ~ X2, data = pullover) abline(pullover.lm, col = "red", lwd = 2) summary(pullover.lm) ################################ Simple Analysis of Variance ############################### Sales <- c(9, 11, 10, 12, 7, 11, 12, 10, 11, 13, 10, 15, 11, 15, 15, 13, 7, 15, 13, 10, 18, 14, 17, 9, 14, 17, 16, 14, 17, 15) Strategy <- as.factor(c(rep(1, 10), rep(2, 10), rep(3, 10))) Pullover <- data.frame(Sales = Sales, Strategy = Strategy) Pullover library(ggplot2) p1 <- ggplot(data = Pullover) + geom_boxplot(aes(x = Strategy, y = Sales, fill = Strategy)) p1 library(dplyr) group_by(Pullover, Strategy) %>% summarise( n_i = n(), hat_mean_i = mean(Sales), hat_sd_i = sd(Sales) ) sales.lm <- lm(Sales ~ Strategy, data = Pullover) summary(sales.lm) anova(sales.lm) sales.aov <- aov(Sales ~ Strategy, data = Pullover) summary(sales.aov) ################################ Multiple Linear Model ############################### pullover pullover.lm2 <- lm(X1 ~ ., data = pullover) summary(pullover.lm2) ################################ Boston Housing ############################### library(MASS) str(Boston) head(Boston) ?Boston Boston_mean <- sapply(Boston, mean) Boston_median <- sapply(Boston, median) Boston_var <- sapply(Boston, var) Boston_sd <- sapply(Boston, sd) Boston_descriptive <- tibble(Mean = Boston_mean, Median = Boston_median, Variance = Boston_var, Std = Boston_sd) Boston_descriptive S <- cov(Boston) round(S, digits = 2) R <- cor(Boston) round(R, digits = 2) R_14 <- R[14, 1:13] W_14 <- 0.5 * log((1 + R_14) / (1 - R_14)) ## Fisher's Z-transformation W_14 Z_14 <- (W_14 - 0) / sqrt(1/(dim(Boston)[1]-3)) ## Z statistics Z_14 p_value <- 2 * pnorm(abs(Z_14), lower.tail = FALSE) ## testing H_0: r = 0 p_value Boston.lm <- lm(medv ~ ., data = Boston) summary(Boston.lm)