您的位置:首页 > 编程语言

R语言实战:机器学习与数据分析源代码3

2016-06-14 11:45 531 查看
本文辑录了《R语言实战——机器学习与数据分析》(电子工业出版社2016年出版)一书第12章至第15章之代码,主要包括EM、支持向量机和人工神经网络等内容。本书引言请见如下链接:

/article/11896969.html



内容简介:本书系统地介绍了统计分析和机器学习领域中最为重要和流行的多种技术及它们的基本原理,在详解有关算法的基础上,结合大量R语言实例演示了这些理论在实践中的使用方法。具体内容被分成三个部分,即R语言编程基础、基于统计的数据分析方法以及机器学习理论。统计分析与机器学习部分又具体介绍了包括参数估计、假设检验、极大似然估计、非参数检验方法(包括列联分析、符号检验、符号秩检验等)、方差分析、线性回归(包括岭回归和Lasso方法)、逻辑回归、支持向量机、聚类分析(包括K均值算法和EM算法)和人工神经网络等内容。同时,统计理论的介绍也为深化读者对于后续机器学习部分的理解提供了很大助益。知识结构和阅读进度的安排上既兼顾了循序渐进的学习规律,亦统筹考虑了夯实基础的必要性

网上书店地址

电子工业出版社官网

中国互动出版网China-pub

京东商城(1)

京东商城(2)

Chapter 12

P279~280

qf(0.05 , 2, 15, lower.tail = FALSE)
pf(4.698, 2, 15, lower.tail = FALSE)

X <- c(4.2, 3.3, 3.7, 4.3, 4.1, 3.3,
+ 4.5, 4.4, 3.5, 4.2, 4.6, 4.2,
+ 5.6, 3.6, 4.5, 5.1, 4.9, 4.7)
A <- factor(rep(1:3, each=6))
my.data <- data.frame(X, A)
my.aov <- aov(X~A, data = my.data)
summary(my.aov)


P285

qf(0.05, 2, 10, lower.tail = FALSE)
pf(4.24, 2, 10, lower.tail = FALSE)

qf(0.05, 5, 10, lower.tail = FALSE)
pf(29.49, 5, 10, lower.tail = FALSE)

x <- c(64, 65, 73, 53, 54, 59, 71, 68, 79,
+ 41, 46, 38, 50, 58, 65, 42, 40, 46)
my.data <- data.frame(x, A = gl(6, 3), B = gl(3, 1, 18))

my.aov <- aov(x ~ A+B, data = my.data)
summary(my.aov)


P286~287

pancakes <- data.frame(supp = rep(c("no supplement", "supplement"),
+ each = 12), whey = rep(rep(c("0%", "10%", "20%", "30%"),
+ each = 3), 2), quality = c(4.4, 4.5, 4.3, 4.6, 4.5, 4.8,
+ 4.5, 4.8, 4.8, 4.6, 4.7, 5.1, 3.3, 3.2, 3.1, 3.8, 3.7, 3.6,
+ 5, 5.3, 4.8, 5.4, 5.6, 5.3))

round(tapply(pancakes$quality, pancakes[, 1:2], mean), 2)

library(stats)
interaction.plot(pancakes$whey, pancakes$supp, pancakes$quality)


P289

pancakes.lm <- lm(quality ~ supp * whey, data = pancakes)
anova(pancakes.lm)

my.aov <- aov(quality ~ supp * whey, data = pancakes)
summary(my.aov)


P290~291

p.adjust.methods

pairwise.t.test(X, A, p.adjust.method = "bonferroni")


P292~293

x <- c(4.2, 3.3, 3.7, 4.3, 4.1, 3.3,
+ 4.5, 4.4, 3.5, 4.2, 4.6, 4.2,
+ 5.6, 3.6, 4.5, 5.1, 4.9, 4.7)
group <- factor(rep(LETTERS[1:3], each = 6));
mice <- data.frame(x, group)
mice.aov <- aov(x ~ group, data = mice)
summary(mice.aov)

library(multcomp)
mice.Dunnett <- glht(mice.aov, linfct=mcp(group = "Dunnett"))
summary(mice.Dunnett)


P294

windows(width=5,height=3,pointsize=10)
plot(mice.Dunnett,sub="Mice Data")
mtext("Dunnet's Method",side=3,line=0.5)

qtukey(0.05, 3, 15, lower.tail = F)


P296~297

posthoc <- TukeyHSD(mice.aov, 'group')
posthoc

ptukey(1.96785, 3, 15, lower.tail = F)
ptukey(4.32925, 3, 15, lower.tail = F)
ptukey(2.36140, 3, 15, lower.tail = F)

library(agricolae)
comparison <- HSD.test(mice.aov, 'group', console = T)


P298~299

print(comparison$groups)

library(agricolae)
comparison <- SNK.test(mice.aov, "group", console = T)


P301

ptukey(1.968, 2, 15, lower.tail = F)
ptukey(4.329, 3, 15, lower.tail = F)
ptukey(2.361, 2, 15, lower.tail = F)


P303

pchisq(1.4947, 2, lower.tail = F)

bartlett.test(X ~ A, data = my.data)


P305~306

X <- c(0.383, 0.517, 0.117, 0.483, 0.283, 0.517,
+ 0.267, 0.167, 0.733, 0.033, 0.367, 0.033,
+ 0.867, 1.133, 0.233, 0.367, 0.167, 0.033)
A <- factor(rep(1:3, each=6))
my.data <- data.frame(X, A)
my.aov <- aov(X~A, data = my.data)
summary(my.aov)

library(car)
leveneTest(X ~ A, data = my.data)

leveneTest(X ~ A, data = my.data, center = mean)

library(lawstat)
levene.test(X, A, location="median")

levene.test(X, A, location="mean")


Chapter 13

P313

countries = read.csv("c:/countries_data.csv")
head(countries)

var = as.character(countries$countries)
for(i in 1:30) dimnames(countries)[[1]][i] = var[i]
countries = countries[,2:3]
names(countries) = c("Services(%)", "Aged_Population(%)")
head(countries)

my.km <- kmeans(countries, center = 2)
my.km$center

head(my.km$cluster)

plot(countries, col = my.km$cluster)
points(my.km$centers, col = 1:2, pch = 8, cex = 2)


P323~324

my.em <- Mclust(countries)
summary(my.em)

summary(my.em, parameters = TRUE)

mclust2Dplot(countries, parameters = my.em$parameters,
+ z = my.em$z, what = "classification", main = TRUE)


P325

model_density <- densityMclust(countries)
plot(model_density, countries, col = "cadetblue",
nlevels = 25, what = "density")

plot(model_density, what = "density", type = "persp", theta = 235)


Chapter 14

P351~352

library(lattice)
xyplot(Petal.Length ~ Petal.Width, data = iris, groups = Species,
+ auto.key=list(corner=c(1,0)))

data(iris)
attach(iris)
subdata <- iris[iris$Species != 'virginica',]
subdata$Species <- factor(subdata$Species)
model1 <- svm(Species ~ Petal.Length + Petal.Width, data = subdata)


P353~354

plot(model1, subdata, Petal.Length ~ Petal.Width)
model2 <- svm(Species ~ ., data = iris)

summary(model2)

x = iris[, -5] #提取iris 数据中除第5 列以外的数据作为特征变量
y = iris[, 5] #提取iris 数据中的第5 列数据作为结果变量
model3 = svm(x, y, kernel = "radial",
+ gamma = if (is.vector(x)) 1 else 1 / ncol(x))


P355~356

pred <- predict(model3, x)
table(pred, y)

pred <- predict(model3, x, decision.values = TRUE)
attr(pred, "decision.values")[1:4,]

pred[77:78]

plot(cmdscale(dist(iris[,-5])),
+ col = c("orange","blue","green")[as.integer(iris[,5])],
+ pch = c("o","+")[1:150 %in% model3$index + 1])
legend(1.8, -0.8, c("setosa","versicolor","virgincia"),
+ col = c("orange","blue","green"), lty = 1)


Chapter 15

P372~373

samp <- c(sample(1:50,25), sample(51:100,25), sample(101:150,25))
ird <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]),
+ species = factor(c(rep("s",50), rep("c", 50), rep("v", 50))))
ir.nn1 <- nnet(species ~ ., data = ird, subset = samp, size = 2,
+ rang = 0.1, decay = 5e-4, maxit = 200)

targets <- class.ind( c(rep("s", 50), rep("c", 50), rep("v", 50)))
ir <- rbind(iris3[,,1],iris3[,,2],iris3[,,3])
ir.nn2 <- nnet(ir[samp,], targets[samp,], size = 2, rang = 0.1,
+ decay = 5e-4, maxit = 200)

summary(ir.nn1)
table(ird$species[-samp], predict(ir.nn1, ird[-samp,], type = "class"))


P374

pre.matrix <- function(true, pred) {
+ name = c("c","s","v")
+ true <- name[max.col(true)]
+ cres <- name[max.col(pred)]
+ table(true, cres)
+ }

pre.matrix(targets[-samp,], predict(ir.nn2, ir[-samp,]))
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: