统计与R讲座的演示程序

上传人:沈*** 文档编号:82490961 上传时间:2022-04-29 格式:DOC 页数:20 大小:156.02KB
返回 下载 相关 举报
统计与R讲座的演示程序_第1页
第1页 / 共20页
统计与R讲座的演示程序_第2页
第2页 / 共20页
统计与R讲座的演示程序_第3页
第3页 / 共20页
点击查看更多>>
资源描述
# 统计与R讲座的演示程序。# 演示用的数据 # 19个学生的身高体重数据。d.class -structure(list(name = structure(c(2L, 3L, 5L, 10L, 11L, 12L, 15L, 16L, 17L, 1L, 4L, 6L, 7L, 8L, 9L, 13L, 14L, 18L, 19L), .Label = c(Alfred, Alice, Becka, Duke, Gail, Guido, James, Jeffrey, John, Karen, Kathy, Mary, Philip, Robert, Sandy, Sharon, Tammy, Thomas, William), class = factor), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c(F, M), class = factor), age = c(13L, 13L, 14L, 12L, 12L, 15L, 11L, 15L, 14L, 14L, 14L, 15L, 12L, 13L, 12L, 16L, 12L, 11L, 15L), height = c(56.5, 65.3, 64.3, 56.3, 59.8, 66.5, 51.3, 62.5, 62.8, 69, 63.5, 67, 57.3, 62.5, 59, 72, 64.8, 57.5, 66.5), weight = c(84, 98, 90, 77, 84.5, 112, 50.5, 112.5, 102.5, 112.5, 102.5, 133, 83, 84, 99.5, 150, 128, 85, 112), .Names = c(name, sex, age, height, weight), class = data.frame, row.names = c(NA, -19L)# 100个学生的语文、数学、英语、物理、化学、生物成绩。主成份和因子分析。d.scores -structure(list(学号 = c(225040819, 210050718, 226170534, 226191045, 229120703, 225102012, 210060331, 226090835, 225180415, 225072225, 228192837, 228160809, 226071147, 226191019, 228032508, 205093429, 228121617, 210070122, 226190523, 205154816, 205021825, 228221122, 203050439, 205253820, 228211614, 225052428, 210070332, 203102326, 228200918, 226020307, 205134827, 226121101, 229120707, 205134418, 225092127, 228171508, 221060722, 226080741, 229080931, 228020803, 205034506, 203041406, 226080420, 205063410, 225172323, 225040827, 205134308, 203131832, 205173806, 226190443, 228212122, 226141416, 203020227, 228051109, 205053908, 228070939, 210111213, 229081026, 203021502, 226171716, 225080604, 226081301, 205241919, 203010233, 210070218, 228122620, 225122710, 225051018, 226081020, 226020322, 203071407, 225031614, 203060719, 228182104, 221020221, 205172627, 229091404, 228072710, 205011714, 226191139, 226151322, 205172106, 203060550, 203161626, 203061014, 205233330, 203211013, 205084213, 203112725, 217040329, 225040817, 203121211, 228032313, 225020208, 203142140, 226020335, 205223328, 210091321, 225102218, 225071708), 语文 = c(97, 88, 93, 81, 89, 98, 82, 97, 93, 84, 84, 105, 92, 88, 85, 75, 87, 77, 94, 83, 92, 103, 84, 88, 94, 79, 80, 92, 87, 102, 73, 85, 75, 87, 79, 103, 80, 86, 80, 107, 76, 93, 79, 83, 94, 93, 84, 83, 83, 81, 81, 100, 94, 88, 79, 99, 72, 98, 73, 89, 96, 98, 92, 98, 93, 91, 67, 95, 92, 88, 91, 84, 103, 91, 87, 98, 82, 92, 96, 94, 91, 87, 98, 0, 89, 77, 96, 83, 38, 78, 102, 80, 110, 96, 77, 103, 94, 70, 74, 80), 数学 = c(81, 31, 96, 95, 75, 76, 40, 107, 98, 62, 25, 90, 116, 102, 0, 79, 90, 57, 116, 50, 111, 105, 102, 96, 80, 28, 45, 69, 87, 100, 58, 90, 101, 45, 44, 98, 5, 116, 65, 107, 56, 66, 98, 77, 42, 93, 60, 82, 55, 107, 86, 93, 102, 102, 80, 96, 15, 40, 88, 73, 114, 104, 81, 112, 55, 76, 28, 89, 103, 109, 56, 76, 70, 89, 52, 91, 20, 77, 68, 91, 98, 96, 82, 0, 85, 68, 105, 59, 0, 52, 89, 106, 0, 110, 58, 118, 77, 15, 65, 72), 英语 = c(78.5, 33, 70.5, 74, 60.5, 65.5, 61, 71, 91, 72, 52.5, 82.5, 78, 65, 0, 47.5, 63.5, 41, 70.5, 52, 74, 90.5, 81, 49.5, 91.5, 48, 63.5, 38.5, 82, 82, 71.5, 80, 55.5, 42, 62.5, 71, 46, 94.5, 55.5, 101, 53.5, 72.5, 74, 59.5, 64.5, 79, 37, 58, 43, 70, 58, 71, 97, 84, 44, 95, 28.5, 59, 48, 64.5, 94, 62.5, 72, 90.5, 73.5, 66, 58.5, 85, 79, 71.5, 64.5, 70, 85, 82.5, 79, 70, 57.5, 68.5, 81, 64.5, 75.5, 57, 94.5, 0, 48.5, 29, 56.5, 57, 60.5, 51.5, 82, 57, 0, 90.5, 52, 79.5, 75.5, 28.5, 49, 82), 物理 = c(49, 34, 33, 3, 40, 50, 51, 22, 22, 49, 15, 43, 11, 23, 21, 89, 36, 61, 20, 44, 15, 58, 20, 46, 36, 36, 39, 59, 17, 21, 57, 34, 36, 43, 45, 42, 28, 29, 61, 29, 19, 67, 34, 48, 58, 32, 66, 24, 27, 17, 40, 0, 9, 27, 26, 14, 15, 56, 72, 82, 39, 23, 44, 19, 53, 30, 34, 52, 38, 65, 57, 26, 54, 53, 15, 17, 17, 61, 6, 21, 36, 43, 49, 21, 56, 49, 50, 63, 26, 43, 46, 9, 39, 20, 29, 16, 67, 33, 43, 14), 化学 = c(56, 24, 57, 18, 17, 13, 51, 19, 22, 57, 14, 49, 32, 20, 24, 92, 46, 56, 28, 33, 31, 68, 12, 62, 70, 39, 18, 69, 8, 25, 50, 20, 18, 48, 49, 40, 32, 47, 84, 68, 16, 60, 53, 47, 47, 25, 32, 35, 22, 22, 32, 0, 25, 39, 59, 31, 18, 56, 59, 85, 24, 26, 22, 28, 74, 35, 28, 39, 55, 67, 63, 31, 53, 75, 23, 42, 40, 49, 16, 44, 41, 52, 71, 19, 18, 72, 50, 87, 14, 81, 40, 14, 9, 63, 35, 28, 80, 22, 46, 8), 生物 = c(42, 28, 59, 16, 42, 43, 59, 44, 44, 60, 9, 60, 26, 7, 7, 84, 54, 46, 41, 63, 35, 58, 4, 32, 66, 53, 44, 61, 31, 30, 60, 20, 27, 67, 46, 80, 20, 46, 63, 44, 22, 64, 51, 60, 70, 30, 65, 47, 44, 29, 59, 0, 27, 51, 71, 65, 27, 21, 65, 79, 29, 17, 35, 26, 56, 40, 22, 58, 61, 67, 57, 21, 51, 46, 11, 48, 36, 56, 17, 52, 64, 51, 72, 30, 54, 36, 61, 60, 42, 69, 67, 27, 34, 44, 49, 22, 69, 33, 60, 11), .Names = c(学号, 语文, 数学, 英语, 物理, 化学, 生物), row.names = c(225040819, 210050718, 226170534, 226191045, 229120703, 225102012, 210060331, 226090835, 225180415, 225072225, 228192837, 228160809, 226071147, 226191019, 228032508, 205093429, 228121617, 210070122, 226190523, 205154816, 205021825, 228221122, 203050439, 205253820, 228211614, 225052428, 210070332, 203102326, 228200918, 226020307, 205134827, 226121101, 229120707, 205134418, 225092127, 228171508, 221060722, 226080741, 229080931, 228020803, 205034506, 203041406, 226080420, 205063410, 225172323, 225040827, 205134308, 203131832, 205173806, 226190443, 228212122, 226141416, 203020227, 228051109, 205053908, 228070939, 210111213, 229081026, 203021502, 226171716, 225080604, 226081301, 205241919, 203010233, 210070218, 228122620, 225122710, 225051018, 226081020, 226020322, 203071407, 225031614, 203060719, 228182104, 221020221, 205172627, 229091404, 228072710, 205011714, 226191139, 226151322, 205172106, 203060550, 203161626, 203061014, 205233330, 203211013, 205084213, 203112725, 217040329, 225040817, 203121211, 228032313, 225020208, 203142140, 226020335, 205223328, 210091321, 225102218, 225071708), class = data.frame)# 癌症病人康复的logistic回归。d.remiss -structure(list(remiss = c(1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L), cell = c(0.8, 0.9, 0.8, 1, 0.9, 1, 0.95, 0.95, 1, 0.95, 0.85, 0.7, 0.8, 0.2, 1, 1, 0.65, 1, 0.5, 1, 1, 0.9, 1, 0.95, 1, 1, 1), smear = c(0.83, 0.36, 0.88, 0.87, 0.75, 0.65, 0.97, 0.87, 0.45, 0.36, 0.39, 0.76, 0.46, 0.39, 0.9, 0.84, 0.42, 0.75, 0.44, 0.63, 0.33, 0.93, 0.58, 0.32, 0.6, 0.69, 0.73), infil = c(0.66, 0.32, 0.7, 0.87, 0.68, 0.65, 0.92, 0.83, 0.45, 0.34, 0.33, 0.53, 0.37, 0.08, 0.9, 0.84, 0.27, 0.75, 0.22, 0.63, 0.33, 0.84, 0.58, 0.3, 0.6, 0.69, 0.73), li = c(1.9, 1.4, 0.8, 0.7, 1.3, 0.6, 1, 1.9, 0.8, 0.5, 0.7, 1.2, 0.4, 0.8, 1.1, 1.9, 0.5, 1, 0.6, 1.1, 0.4, 0.6, 1, 1.6, 1.7, 0.9, 0.7), blast = c(1.1, 0.74, 0.176, 1.053, 0.519, 0.519, 1.23, 1.354, 0.322, 0, 0.279, 0.146, 0.38, 0.114, 1.037, 2.064, 0.114, 1.322, 0.114, 1.072, 0.176, 1.591, 0.531, 0.886, 0.964, 0.398, 0.398), temp = c(0.996, 0.992, 0.982, 0.986, 0.98, 0.982, 0.992, 1.02, 0.999, 1.038, 0.988, 0.982, 1.006, 0.99, 0.99, 1.02, 1.014, 1.004, 0.99, 0.986, 1.01, 1.02, 1.002, 0.988, 0.99, 0.986, 0.986), .Names = c(remiss, cell, smear, infil, li, blast, temp), class = data.frame, row.names = c(NA, -27L)# 工作性质与工作满意度关系。典型相关分析。d.jobs -structure(list(Career = c(72L, 63L, 96L, 96L, 84L, 66L, 31L, 45L, 42L, 79L, 39L, 54L, 60L, 63L), Supervis = c(26L, 76L, 31L, 98L, 94L, 10L, 40L, 14L, 18L, 74L, 12L, 35L, 75L, 45L), Finance = c(9L, 7L, 7L, 6L, 6L, 5L, 9L, 2L, 6L, 4L, 2L, 3L, 5L, 5L), Variety = c(10L, 85L, 83L, 82L, 36L, 28L, 64L, 19L, 33L, 23L, 37L, 23L, 45L, 22L), Feedback = c(11L, 22L, 63L, 75L, 77L, 24L, 23L, 15L, 13L, 14L, 13L, 74L, 58L, 67L), Autonomy = c(70L, 93L, 73L, 97L, 97L, 75L, 75L, 50L, 70L, 90L, 70L, 53L, 83L, 53L), .Names = c(Career, Supervis, Finance, Variety, Feedback, Autonomy), class = data.frame, row.names = c(NA, -14L)# 北京地区1949-1964年洪水受灾面积与成灾面积数据。时间序列。d.flood - matrix(c(1949 , 1 , 331.12 , 243.96 ,1950 , 2 , 380.44 , 293.90 ,1951 , 3 , 59.63 , 59.63,1952 , 4 , 37.89 , 18.09,1953 , 5 , 103.66 ,72.92,1954 , 6 , 316.67 , 244.57,1955 , 7 , 208.72 , 155.77,1956 , 8 , 288.79 , 255.22,1957 , 9 , 25.00 , 0.50,1958 , 10 , 84.72 , 48.59,1959 , 11 , 260.89 ,202.96,1960, 12 , 27.18 ,15.02,1961, 13 , 20.74 ,17.09,1962 , 14 , 52.99 ,14.66,1963 , 15 , 99.25 , 45.61,1964 , 16 , 55.36 ,41.90), byrow=T, ncol=4, dimnames=list(1949:1964, c(year, t, area1, area2)# 第一讲 # R初步演示。x1 - 0:100x2 - x1 * 2 * pi / 100y1 - sin(x2)plot(x2, y1, type=l)abline(h=0, lwd=2)abline(v=(0:4)/2*pi, lty=3, col=gray)y2 - cos(x2)lines(x2, y2, lty=2, col=green)demo(graphics)demo(image)cl - read.csv(class.csv, header=TRUE)summary(cl)mean(cl$height)var(cl$height)lm1 - lm(weight height + age + sex, data=cl)print(summary(lm1)lm2 - step(lm1, weight height + age + sex, data=cl)sink(myres.txt, split=TRUE)print(A)print(Ai)sink()# R向量演示。marks - c(10, 6, 4, 7, 8)x - c(1:3, 10:13)x1 - c(1, 2)x2 - c(3, 4)x - c(x1, x2)xx - c(1, 10)x + 2x - 2x * 2x / 2x 22 / x2 xx1 - c(1, 10)x2 - c(4, 2)x1 + x2x1 - x2x1 * x2x1 / x2x1 - c(1, 10)x2 - c(1, 3, 5, 7)x1 + x2x1 * x2sort(c(3, 5, 1)clorder(cl$height),ages - c(李明=30, 张聪=25, 刘颖=28)ages - c(30, 25, 28)names(ages) - c(李明, 张聪, 刘颖)# R矩阵演示。A - matrix(1:6, nrow=3, ncol=2)B - matrix(1,-1, 1,1, nrow=2, ncol=2, byrow=TRUE)C1 - A %*% BC2 - A + 2C3 - A / 2colnames(A) - c(x, y)A,y# R函数演示。fsub - function(x, y=0) cat(x=, x, y=, y, n) x - yfsub(5,3)fsub(5)fsub(x=5, y=3)fsub(y=3, x=5)# R探索性数据分析演示。x - cl$sexprint(x)table(x)table(x)/length(x)barplot(table(x)x - rnorm(30, 10, 2)summary(x)hist(x)boxplot(x)qqnorm(x);qqline(x)x - rexp(30)summary(x)hist(x)boxplot(x)qqnorm(x);qqline(x)# 第二讲 # 用optim求正态MLE的演示。objf - function(theta, x) mu - theta1 s2 - exp(theta2) n - length(x) res - n*log(s2) + 1/s2*sum(x - mu)2) ressim - function(n=30) mu0 - 20 sigma0 - 2 x - rnorm(n, mu0, sigma0) theta0 - c(0,0) ores - optim(theta0, objf, x=x) print(ores) theta - ores$par mu - theta1 sigma , mu0, n) cat(sigma: , sigma, = , sigma0, n)# 用optimize求正态MLE的演示。sim1 - function(n=30) mu0 - 20 sigma0 - 2 x - rnorm(n, mu0, sigma0) mu - mean(x) ss - sum(x - mu)2)/length(x) objf - function(delta,ss) log(delta) + 1/delta*ss ores - optimize(objf, lower=0.0001, upper=1000, ss=ss) delta - ores$minimum sigma , mu0, n) cat(sigma: , sigma, = , sigma0, n)# R置信区间演示。x - rnorm(100,10,3)mu - mean(x)sig - sd(x)p = pnorm(15,mu,sig) - pnorm(5,mu,sig)cat(Prob in 5,15 = , p, n)# 用t.test求正态均值置信区间。x - c(11.67, 9.29, 10.45, 9.01, 12.67, 16.24, 11.64, 7.73, 12.23)t.test(x, conf.level=0.95)# 单总体双侧均值t检验。x - c(490, 506, 508, 502, 498, 511, 510, 515, 512)t.test(x, mu=500, side=two-sided)# 单总体右侧方差卡方检验。x - c(42,65,75,78,59,71,57,68,54,55)n - length(x)chi2 - (n-1)*var(x)/80p - 1-pchisq(chi2,n-1)cat(Chi-squared = , chi2, p-value = , p, n)# 两样本t检验。x - c(20.5, 19.8, 19.7, 20.4, 20.1, 20.0, 19.0, 19.9)y - c(20.7, 19.8, 19.5, 20.8, 20.4, 19.6, 20.2)t.test(x, y)# 方差比较。var.test(x, y)# 成对t检验。x - c(20.5, 18.8, 19.8, 20.9, 21.5, 19.5, 21.0, 21.2)y - c(17.7, 20.3, 20.0, 18.8, 19.0, 20.1, 20.0, 19.1)t.test(x, y, paired=TRUE)# 单总体比例检验。# 12次中5次成功,与40%比较。binom.test(5, 12, 0.4)prop.test(5, 12, 0.4)# 单总体比例检验。# 120次中35次成功,与25%比较。prop.test(35, 120, 0.25)# 两个比例的比较。102中成功23和135中成功25的比例比较。prop.test(c(23,25), c(102,135)# 第三讲 方差分析 # Wilcoxon秩和检验。x - c(20.5, 19.8, 19.7, 20.4, 20.1, 20.0, 19.0, 19.9)y - c(20.7, 19.8, 19.5, 20.8, 20.4, 19.6, 20.2)wilcox.test(x, y)# 符号检验。x - c(20.5, 18.8, 19.8, 20.9, 21.5, 19.5, 21.0, 21.2)y - c(17.7, 20.3, 20.0, 18.8, 19.0, 20.1, 20.0, 19.1)t.test(x, y, paired=TRUE)z 0), sum(z != 0), p=0.5)# Wilcoxon 符号秩检验wilcox.test(x, y, paired=TRUE)# 单总体拟合优度卡方检验。x - c(18, 13, 17, 21, 15, 16)p - rep(1/6, 6)chisq.test(x, p)# 独立性卡方检验tab - matrix(c(60, 3, 32, 11), nrow=2, ncol=2, byrow=TRUE, dimnames=list(c(病人, 健康人), c(吸烟, 不吸烟)chisq.test(tab)# 单因素方差分析A - factor(rep(1:5, each=4)y - c(25.6, 22.2, 28.0, 29.8, 24.4, 30.0, 29.0, 27.5, 25.0, 27.7, 23.0, 32.2, 28.8, 28.0, 31.5, 25.9, 20.6, 21.2, 22.0, 21.2)d - data.frame(A, y)plot(y A, data=d)summary(aov(y A, data=d)tapply(d$y, d$A, mean)# 控制单次比较错误率的多重比较pairwise.t.test(y, A, p.adjust=none)# 控制总错误率的多重比较pairwise.t.test(y, A)# 控制错误发现率的多重比较pairwise.t.test(y, A, p.adjust=fdr)# 用Tukey同时置信区间做多重比较。TukeyHSD(aov(y A, data=d)# Bartlett方差齐性检验bartlett.test(y A, data=d)# Levene方差齐性检验require(car)leveneTest(y A, data=d)# 方差不相等情形下单因素方差分析的Welch检验oneway.test(y A, data=d)# 非参数单因素方差分析的Kruskal-Wallis检验kruskal.test(y A, data=d)# 两因素方差分析rats - data.frame(y = c(0.31, 0.45, 0.46, 0.43, # (1,1) 0.82, 1.10, 0.88, 0.72, # (1,2) 0.43, 0.45, 0.63, 0.76, # (1,3) 0.45, 0.71, 0.66, 0.62, # (1,4) 0.36, 0.29, 0.40, 0.23, # (2,1) 0.92, 0.61, 0.49, 1.24, # (2,2) 0.44, 0.35, 0.31, 0.40, # (2,3) 0.56, 1.02, 0.71, 0.38, # (2,4) 0.22, 0.21, 0.18, 0.23, # (3,1) 0.30, 0.37, 0.38, 0.29, # (3,2) 0.23, 0.25, 0.24, 0.22, # (3,3) 0.30, 0.36, 0.31, 0.33), # (3,4)Toxicant=factor(rep(1:3, each=4*4),Cure=factor(rep(rep(1:4, each=4), 3)# 比较各组的盒形图opar - par(mfrow=c(1,2)plot(y Toxicant + Cure, data=rats)par(opar)# 带交互作用的两因素方差分析res - aov(y Toxicant + Cure + Toxicant:Cure, data=rats)summary(res)# 无交互作用的两因素方差分析res2 - aov(y Toxicant + Cure, data=rats)summary(res2)tapply(rats$y, rats$Toxicant, mean)tapply(rats$y, rats$Cure, mean)# 协方差分析, 用HH包。require(HH)ancova(y A + x, data=d)# 协方差分析,用lm函数。anova(lm(y A + x, data=d) # 三个三水平因素的正交设计。烟灰砖试验d - data.frame(A = factor(rep(1:3, each=3),B = factor(rep(1:3, 3),C = factor(c(1,2,3, 2,3,1, 3,1,2),y = c(16.9, 19.1, 16.7, 19.8, 23.7, 19.0, 25.0, 20.4, 23.1)# 画图比较opar - par(mfrow=c(1,3)plot(y A + B + C, data=d)par(opar)# 方差分析summary(aov(y A + B + C, data=d)# 第四讲 统计模型 # 相关与回归 # 生成线性回归模拟数据nsamp - 30x - runif(nsamp, -10, 10)y - 20 + 0.5*x + rnorm(nsamp,0,0.5)plot(x, y)# 生成二次曲线回归模拟数据y2 - 0.5*x2 + rnorm(nsamp,0,2)plot(x, y2)# 生成指数函数回归模拟数据y3 - exp(0.2*(x+10) + rnorm(nsamp,0,2)plot(x, y3)# 生成对数函数回归模拟数据y4 - log(10*(x+12) + rnorm(nsamp,0,0.1)plot(x, y4)# 计算相关系数并检验:线性相关例子cor(x, y)cor.test(x, y)# 计算相关系数并检验:二次曲线相关例子cor(x, y2)cor.test(x, y2)# 模拟数据的一元回归plot(x, y)abline(lm(y x), col=red, lwd=2)res - lm(y x); ressummary(res)# 19个学生身高体重的回归。d - read.csv(class.csv, header=TRUE)lm1 - lm(weight height, data=d); summary(lm1)plot(weight height, data=d)abline(lm1, col=red, lwd=2)plot(lm1)# 样条回归例子。nsamp - 30x - runif(nsamp, -10, 10)x - sort(x)y - 10*sin(x/10*pi)2 + rnorm(nsamp,0,0.2)plot(x, y)require(splines)res - smooth.spline(x, y)lines(spline(x, res$y), col=red)# 体重对身高和年龄的多元回归。lm2 - lm(weight height + age, data=d)summary(lm2)# 只对年龄回归。lm3 - lm(weight age, data=d)summary(lm3)# 逐步回归。lm4 - step(lm(weight height + age + sex, data=d)summary(lm4)# 以因子为自变量的回归。lm5 - lm(weight height + sex, data=d)summary(lm5)# 癌症病人康复概率的logistic回归。remiss - read.csv(remiss.csv, header=TRUE)res1 - glm(remiss cell+smear+infil+li +blast+temp, family=binomial, data=remiss)summary(res1)# 逐步剔除不重要的自变量。res2 - glm(remiss cell+smear+infil+li +temp, family=binomial, data=remiss)summary(res2)res3 - glm(remiss cell+infil+li +temp, family=binomial, data=remiss)summary(res3)res4 - glm(remiss cell+li+temp, family=binomial, data=remiss)summary(res4)# step()逐步回归。ress - step(glm(remiss cell+smear+infil+li +blast+temp, family=binomial, data=remiss)summary(ress)# 多元分析 # 主成份分析。# 模拟例子nsamp - 30; s - 1p01 - runif(nsamp, -10, 10)p02 - runif(nsamp, 0, 5)x1 - 0.5*p01 + 0.1*p02 + rnorm(nsamp, 0, s)x2 - 0.1*p01 - 0.5*p02 + rnorm(nsamp, 0, s)x3 - 0.5*p01 - 0.1*p02 + rnorm(nsamp, 0, s)x4 - -0.1*p01 + 0.5*p02 + rnorm(nsamp, 0, s)# 用协方差阵。res1 - princomp(x1+x2+x3+x4, cor=FALSE, scores=TRUE)summary(res1)# 用相关阵。res2 - princomp(x1+x2+x3+x4, cor=TRUE)summary(res2)# 计算主成份得分。pr1 - predict(res1)plot(d,1, pr1,1)# 因子分析。# 100个学生的语文、数学、英语、物理、化学、生物成绩。scores - read.csv(scores.csv, header=TRUE)dd - scores,2:7res - factanal(dd, factors=2)res# 判别分析。# 鸢尾花数据的Fisher判别分析。data(iris)plot(Sepal.Length Species, data=iris)require(MASS)res - lda(Species Sepal.Length+Sepal.Width +Petal.Length+Petal.Width, data=iris)res# 对比判别效果。pr - predict(res)$classtable(iris,Species, pr)# 聚类分析。# 鸢尾花数据的聚类分析。data(iris)# 计算距离。di - dist(iris,1:4, method=euclidean)print(as.matrix(di)1:10,1:10)# 谱系聚类。res - hclust(di, method=complete)plot(res, labels=FALSE)# 在结果树形图中切分出三个类。rect.hclust(res, k=3)# 得到分三个类的分类结果。clus - cutree(res, k=3)# 与真实类对比。table(iris,Species, clus)# 典型相关分析# 鸢尾花数据花萼长、宽与花瓣长、宽的典型相关。data(iris)res - cancor(iris,1:2, iris,3:4)print(res)# 工作性质与工作满意度的典型相关。jobs - read.csv(jobs.csv, header=TRUE)res - cancor(jobs,1:3, jobs,4:6)print(res)# 时间序列分析 # 北京地区1949-1964年洪灾受灾和成灾面积数据,年数据。print(d.flood)flood.area1 - ts(d.flood,area1, frequency=1, start=1949)flood.area2 - ts(d.flood,area2, frequency=1, start=1949)plot(flood.area1)lines(flood.area2, col=red, lty=2)# 自相关系数函数ACF。acf(flood.area1)# 居民用煤消耗季度数据,从1991年到1996年。coal.consumption - ts(c( 6878.4 , 5343.7 , 4847.9 , 6421.9 , 6815.4 , 5532.6 , 4745.6 , 6406.2 , 6634.4 , 5658.5 , 4674.8 , 6445.5 , 7130.2 , 5532.6 , 4989.6 , 6642.3 , 74
展开阅读全文
相关资源
正为您匹配相似的精品文档
相关搜索

最新文档


当前位置:首页 > 办公文档


copyright@ 2023-2025  zhuangpeitu.com 装配图网版权所有   联系电话:18123376007

备案号:ICP2024067431-1 川公网安备51140202000466号


本站为文档C2C交易模式,即用户上传的文档直接被用户下载,本站只是中间服务平台,本站所有文档下载所得的收益归上传人(含作者)所有。装配图网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对上载内容本身不做任何修改或编辑。若文档所含内容侵犯了您的版权或隐私,请立即通知装配图网,我们立即给予删除!