R语言实战分析预测海藻数量
2016-12-06 20:54
471 查看
********************预测海藻数量R语言脚本************************
---加载数据包
library(DMwR)
head(algae)
---对于数据给出行名称
algae=read.table("Analysis.txt",
header=F,
dec='.',
col.names=c('season','size','speed','mxPH','mno2','cl','no3','nh4','opo4','po4','chla','a1','a2','a3','a4','a5','a6','a7'),
na.strings=c('XXXXXXX'))
---绘制PH直方图
hist(algae$mxPH,prob=T)
---绘制PH直方图加密度图,用QQ图查看数据是否符合正态分布
library(car)
par(mfrow=c(1,2))
hist(algae$mxPH,prob=T,xlab='',main='Histogram of maximum ph value',ylim=0:1)
lines(density(algae$mxPH,na.rm=T))
rug(jitter(algae$mxPH))
qq.plot(algae$mxPH,main='Normal QQ Plot of maximum PH')
par(mfrow=c(1,1))
---绘制opo4箱线图
boxplot(algae$opo4,ylab="orthophosphate (opo4)")
rug(jitter(algae$opo4),side=2)
abline(h=mean(algae$opo4,na.rm=T),lty=2)
---离群值的检测,三条线分别表示均值,均值加标准差,中位数
plot(algae$nh4,xlab="")
abline(h=mean(algae$nh4,na.rm=T),lty=1)
abline(h=mean(algae$nh4,na.rm=T)+sd(algae$nh4,na.rm=T),lty=2)
abline(h=median(algae$nh4,na.rm=T),lty=3)
identify(algae$nh4)
---离群值的检测
plot(algae$nh4,xlab="")
clicked.lines=identify(algae$nh4)
algae[clicked.lines, ]
algae[algae$nh4.line>19000, ]
---因子变量绘制lattice箱线图(在规模较小的河流中,a1的频率较高)
library(lattice)
bwplot(size~a1,data=algae,ylab='Rive Size',xlab='Algal A1')
---分位箱线图
library(Hmisc)
bwplot(size~a1,data=algae,panel=panel.bpplot,
probs=seq(.01,.49,by=.01),datadensity=TRUE,
yalb='River Size',xlab='Algal A1'
)
---两个条件的影响绘图
mino2=equal.count(na.omit(algae$mno2),number=5,overlap=1/5)
stripplot(season ~ a3/mino2,data=algae[!is.na(algae$mno2), ])
******数据缺失处理*******
---剔除
library(DMwR)
data(algae)
algae[!complete.cases(algae),]
nrow(algae[!complete.cases(algae),])
algae=na.omit(algae)
algae=algae[-c(62,199),]
---找出每行数据缺失数据
apply(algae,1,function(x) sum(is.na(x)))
---设定值
data(algae)
manyNAs(algae,0.2)
algae=algae[-manyNAs(algae),]
---用最高频值填补缺失值
--均值
algae[48,"mxPH"]=mean(algae$mxPH,na.rm=T)
--中位数
algae[is.na(algae$Chla),"Chla"]=median(algae$Chla,na.rm=T)
--众数(函数包)
data(algae)
algae=algae[-manyNAs(algae),]
algae=centralImputation(algae)
--通过变量的相关关系来填写缺失值
cor(algae[,4:18],use="complete.obs")
symnum(cor(algae[,4:18],use="complete.obs"))
---通过线性相关po4和opo4值的关系填写缺失值
data(algae)
algae=algae[-manyNAs(algae),]
lm(PO4~oPO4,data=algae)
algae[28,"PO4"]=42.897+1.293*algae[28,"oPO4"]
---构造函数计算opo4和po4的值
data(algae)
algae=algae[manyNAs(algae),]
fillPO4=function(op){
if(is.na(op))
return(NA)
else return(42.897+1.239*op)
}
algae[is.na(algae$PO4),"PO4"]=sapply(algae[is.na(algae$PO4),
"oPO4"],fillPO4)
---不同季节直方图
histogram(~mxPH | season ,data=algae)
algae$season=factor(algae$season, levels=c("spring","summer","autum","winter"))
---扩展
histogram(~mxPH | size*speed,data=algae)
stripplot(size~mxPH | speed,data=algae,jitter= T)
*****通过探索案例之间的相似性来填补缺失值
data(algae)
algae=algae[-manyNAs(algae),]
---一般采用欧式距离
algae=knnImputation(algae,k=10)
--中位数
algae=knnImputation(algae,k=10,meth="median")
*********获取预测模型***********
---多元线性回归函数模型
--获取不含缺失值的数据框
library(rpart)
data(algae)
algae=algae[-manyNAs(algae),]
clean.algae=knnImputation(algae,k=10)
---建立一个预测海藻的线性回归模型
lm.a1=lm(a1~.,data=clean.algae[,1:12])
---获取更多线性模型
summary(lm.a1)
---利用anova做精简的线性模型,进行比较
anova(lm.a1)
---做出微小调整
lm2.a1=update(lm.a1,.~.-season)
summary(lm2.a1)
---对于两个模型进行正式比较
anova(lm.a1,lm2.a1)
---采用向后消元法得到一个新的模型
final.lm=step(lm.a1)
---获得最后模型
summary(final.lm)
--这个模型解释的方差比例仍然不是很乐观,这样的假设对于海藻案例应用不是很合适
**********决策树预测************
决策树模型可以处理缺失值
library(rpart)
data(algae)
algae=algae[-manyNAs(algae),]
---获取回归树
rt.a1=rpart(a1~.,data=algae[,1:12])
rt.a1
---绘制树形图
prettyTree(rt.a1)
---复杂度损失修剪方法
printcp(rt.a1)
--通过使用不同的CP值来建立这棵树
rt2.a1=prune(rt.a1,cp=0.08)
rt2.a1
---rpartXse函数自动完成
(rt.a1=rpartXse(a1~.,data=algae[,1:12]))
--使用函数snip.rpart()交互对树进行剪枝
first.tree=rpart(a1~.,data=algae[,1:12])
snip.rpart(first.tree,c(4,7))
---画出回归树
prettyTree(first.tree)
snip.rpart(first.tree)
********模型的评估**********
--回归模型的评估方法是将目标变量的预测值与实际值进行比较得到,并从这些比较中计算耨写平均误差的度量。
一种度量方法是平均绝对误差(MAE)
lm.predictions.a1=predict(final.lm,clean.algae)
rt.predictions.a1=predict(rt.a1,algae)
(mae.a1.lm=mean(abs(lm.predictions.a1-algae[,"a1"])))
(mae.a1.rt=mean(abs(rt.predictions.a1-algae[,"a1"])))
---另外一种度量误差的方法是均方误差(MSE)
(mse.a1.lm=mean(lm.predictions.a1-algae[,"a1"])^2)
(mse.a1.rt=mean(rt.predictions.a1-algae[,"a1"])^2)
---还有一种度量方法是标准化后的平均绝对误差(NMSE)
(nmse.a1.lm=mean((lm.predictions.a1-algae[,'a1'])^2)/
mean((mean(algae[,'a1'])-algae[,'a1'])^2))
(nmse.a1.rt=mean((rt.predictions.a1-algae[,'a1'])^2)/
mean((mean(algae[,'a1'])-algae[,'a1'])^2))
---NMSE的值越小越好,模型的性能越好
---利用函数来计算回归模型的性能指标
regr.eval(algae[,"a1"],rt.predictions.a1,train.y=algae[,"a1"])
---可视化展现预测模型的值
old.par=par(mfrow=c(1,2))
plot(lm.predictions.a1,algae[,"a1"],main="Liner model",xlab="predictions",ylab="True values")
abline(0,1,lty=2)
plot(rt.predictions.a1,algae[,"a1"],main="Regrsson Tree",xlab="Predictions",ylab="True value")
abline(0,1,lty=2)
par=(old.par)
--用函数identify()来检查哪些预测特别差的检查,该函数可以让用户通过互动方式点击图形的点
plot(lm.predictions.a1,algae[,'a1'],main="Liner Model",xlab="Predictions ",ylab="True Values")
abline(0,1,lty=2)
algae[identify(lm.predictions.a1,algae[,'a1']),]
--去除小于零的数
sensible.lm.predictions.a1=ifelse(lm.predictions.a1<0,0,lm.predictions.a1)
regr.eval(algae[,"a1"],lm.predictions.a1,stats=c("mae","mse"))
regr.eval(algae[,"a1"],sensible.lm.predictions.a1,stats=c("mae","mse"))
******预测任务的决策
1.为预测任务选择模型
2.比较模型性能评估指标
3.选择获取评估指标的可靠估计的实验方法
---给出目标模型函数
cv.rpart=function(form.train,test,...){
m=rpartXse(form,train,...)
p=predict(m,test)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean(mean(resp(form,train))-resp(form,test))^2)
}
cv.lm=function(form,train,test,...){
m=lm(form,train,...)
p=predict(m,test)
p=ifelse(p<0,0,p)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean((mean(resp(form,train))-resp(form,test))^2))
}
---对于模型交叉验证
res=experimentalComparison(
c(dataset(a1~.,clean.algae[,1:12],'a1')),
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1))),
cvSettings(3,10,1234))
cv.lm variant=cv.lm.defaults
cv.rpart variant=cv.rpart.v1
cv.rpart variant=cv.rpart.v2
summary(res)
---绘制图形
plot(res)
--由图可以看出模型NMSE最好,但是不明显,通过下面给出
getVariant("cv.rpart.v1",res)
对所有的7个预测模型任务进行与上面相似的比较实验
DSs=sapply(names(clean.algae)[12:18],
function(x,names.attrs){
f=as.formula(paste(x,"~ ."))
dataset(f,clean.algae[,c(names.attrs,x)],x)
},
names(clean.algae)[1:11])
res.all=experimentalComparison(
DSs,
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1))
),
cvSettings(5,10,1234))
plot(res.all)
bestScores(res.all)
**********组合模型-随机森林***********
library(randomForest)
cv.rf=function(form,train,test,...){
m=randomForest(form,train,...)
p=predict(m,test)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean((mean(resp(form,train))-resp(form,test))^2))
}
res.all=experimentalComparison(
DSs,
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1)),
variants('cv.rf',ntree=c(200,500,700))
),
cvSettings(5,10,1234))
--应用函数bestScores()能正是组合方法的优势
bestScores(res.all)
---论述统计的显著性
compAnalysis(res.all,against='cv.rf.v3',
datasets=c('a1','a2','a4','a6'))
****预测7类海藻的频率***
--获得7个模型
bestModelNames=sapply(bestScores(res.all),
function(x) x['nmse','system'])
learners=c(rf='randomForest',rpart='rpartXse')
funs=learners[sapply(strsplit(bestModelsNames,'\\.'),
function(x) x[2])]
parSetts=lapply(bestModelsNames,function(x)getVariant(x,res.all)@pars)
bestModels=list()
for(a in 1:7){
form=as.formula(paste(names(clean.algae)[11+a],'~.'))
bestModels[[a]]=do.call(funcs[a],c(list(form,clean.algae[,c(1:11,11+a)]),parSetts[[a]]))}
--用训练数据集来填补测试数据集的缺失数据集
clean.test.algae=knnImputation(test.algae,k=10,distData=algae[,1:11])
--获取整个测试数据集的预测值矩阵
preds=matrus(ncol=7,nrow=140)
for(i in 1:nrow(clean.test.algae))
preds[i,]=sapply(1:7,function(x)
predict(bestModel[[x]],clean.test.algae[i,]))
--计算模型NMSE值
avg.preds=apply(algae[,12:18],2,mean)
apply(((algae.sols-preds)^2), 2,mean)/apply((scale(algae.sols,avg.preds,F)^2),2,mean)
---加载数据包
library(DMwR)
head(algae)
---对于数据给出行名称
algae=read.table("Analysis.txt",
header=F,
dec='.',
col.names=c('season','size','speed','mxPH','mno2','cl','no3','nh4','opo4','po4','chla','a1','a2','a3','a4','a5','a6','a7'),
na.strings=c('XXXXXXX'))
---绘制PH直方图
hist(algae$mxPH,prob=T)
---绘制PH直方图加密度图,用QQ图查看数据是否符合正态分布
library(car)
par(mfrow=c(1,2))
hist(algae$mxPH,prob=T,xlab='',main='Histogram of maximum ph value',ylim=0:1)
lines(density(algae$mxPH,na.rm=T))
rug(jitter(algae$mxPH))
qq.plot(algae$mxPH,main='Normal QQ Plot of maximum PH')
par(mfrow=c(1,1))
---绘制opo4箱线图
boxplot(algae$opo4,ylab="orthophosphate (opo4)")
rug(jitter(algae$opo4),side=2)
abline(h=mean(algae$opo4,na.rm=T),lty=2)
---离群值的检测,三条线分别表示均值,均值加标准差,中位数
plot(algae$nh4,xlab="")
abline(h=mean(algae$nh4,na.rm=T),lty=1)
abline(h=mean(algae$nh4,na.rm=T)+sd(algae$nh4,na.rm=T),lty=2)
abline(h=median(algae$nh4,na.rm=T),lty=3)
identify(algae$nh4)
---离群值的检测
plot(algae$nh4,xlab="")
clicked.lines=identify(algae$nh4)
algae[clicked.lines, ]
algae[algae$nh4.line>19000, ]
---因子变量绘制lattice箱线图(在规模较小的河流中,a1的频率较高)
library(lattice)
bwplot(size~a1,data=algae,ylab='Rive Size',xlab='Algal A1')
---分位箱线图
library(Hmisc)
bwplot(size~a1,data=algae,panel=panel.bpplot,
probs=seq(.01,.49,by=.01),datadensity=TRUE,
yalb='River Size',xlab='Algal A1'
)
---两个条件的影响绘图
mino2=equal.count(na.omit(algae$mno2),number=5,overlap=1/5)
stripplot(season ~ a3/mino2,data=algae[!is.na(algae$mno2), ])
******数据缺失处理*******
---剔除
library(DMwR)
data(algae)
algae[!complete.cases(algae),]
nrow(algae[!complete.cases(algae),])
algae=na.omit(algae)
algae=algae[-c(62,199),]
---找出每行数据缺失数据
apply(algae,1,function(x) sum(is.na(x)))
---设定值
data(algae)
manyNAs(algae,0.2)
algae=algae[-manyNAs(algae),]
---用最高频值填补缺失值
--均值
algae[48,"mxPH"]=mean(algae$mxPH,na.rm=T)
--中位数
algae[is.na(algae$Chla),"Chla"]=median(algae$Chla,na.rm=T)
--众数(函数包)
data(algae)
algae=algae[-manyNAs(algae),]
algae=centralImputation(algae)
--通过变量的相关关系来填写缺失值
cor(algae[,4:18],use="complete.obs")
symnum(cor(algae[,4:18],use="complete.obs"))
---通过线性相关po4和opo4值的关系填写缺失值
data(algae)
algae=algae[-manyNAs(algae),]
lm(PO4~oPO4,data=algae)
algae[28,"PO4"]=42.897+1.293*algae[28,"oPO4"]
---构造函数计算opo4和po4的值
data(algae)
algae=algae[manyNAs(algae),]
fillPO4=function(op){
if(is.na(op))
return(NA)
else return(42.897+1.239*op)
}
algae[is.na(algae$PO4),"PO4"]=sapply(algae[is.na(algae$PO4),
"oPO4"],fillPO4)
---不同季节直方图
histogram(~mxPH | season ,data=algae)
algae$season=factor(algae$season, levels=c("spring","summer","autum","winter"))
---扩展
histogram(~mxPH | size*speed,data=algae)
stripplot(size~mxPH | speed,data=algae,jitter= T)
*****通过探索案例之间的相似性来填补缺失值
data(algae)
algae=algae[-manyNAs(algae),]
---一般采用欧式距离
algae=knnImputation(algae,k=10)
--中位数
algae=knnImputation(algae,k=10,meth="median")
*********获取预测模型***********
---多元线性回归函数模型
--获取不含缺失值的数据框
library(rpart)
data(algae)
algae=algae[-manyNAs(algae),]
clean.algae=knnImputation(algae,k=10)
---建立一个预测海藻的线性回归模型
lm.a1=lm(a1~.,data=clean.algae[,1:12])
---获取更多线性模型
summary(lm.a1)
---利用anova做精简的线性模型,进行比较
anova(lm.a1)
---做出微小调整
lm2.a1=update(lm.a1,.~.-season)
summary(lm2.a1)
---对于两个模型进行正式比较
anova(lm.a1,lm2.a1)
---采用向后消元法得到一个新的模型
final.lm=step(lm.a1)
---获得最后模型
summary(final.lm)
--这个模型解释的方差比例仍然不是很乐观,这样的假设对于海藻案例应用不是很合适
**********决策树预测************
决策树模型可以处理缺失值
library(rpart)
data(algae)
algae=algae[-manyNAs(algae),]
---获取回归树
rt.a1=rpart(a1~.,data=algae[,1:12])
rt.a1
---绘制树形图
prettyTree(rt.a1)
---复杂度损失修剪方法
printcp(rt.a1)
--通过使用不同的CP值来建立这棵树
rt2.a1=prune(rt.a1,cp=0.08)
rt2.a1
---rpartXse函数自动完成
(rt.a1=rpartXse(a1~.,data=algae[,1:12]))
--使用函数snip.rpart()交互对树进行剪枝
first.tree=rpart(a1~.,data=algae[,1:12])
snip.rpart(first.tree,c(4,7))
---画出回归树
prettyTree(first.tree)
snip.rpart(first.tree)
********模型的评估**********
--回归模型的评估方法是将目标变量的预测值与实际值进行比较得到,并从这些比较中计算耨写平均误差的度量。
一种度量方法是平均绝对误差(MAE)
lm.predictions.a1=predict(final.lm,clean.algae)
rt.predictions.a1=predict(rt.a1,algae)
(mae.a1.lm=mean(abs(lm.predictions.a1-algae[,"a1"])))
(mae.a1.rt=mean(abs(rt.predictions.a1-algae[,"a1"])))
---另外一种度量误差的方法是均方误差(MSE)
(mse.a1.lm=mean(lm.predictions.a1-algae[,"a1"])^2)
(mse.a1.rt=mean(rt.predictions.a1-algae[,"a1"])^2)
---还有一种度量方法是标准化后的平均绝对误差(NMSE)
(nmse.a1.lm=mean((lm.predictions.a1-algae[,'a1'])^2)/
mean((mean(algae[,'a1'])-algae[,'a1'])^2))
(nmse.a1.rt=mean((rt.predictions.a1-algae[,'a1'])^2)/
mean((mean(algae[,'a1'])-algae[,'a1'])^2))
---NMSE的值越小越好,模型的性能越好
---利用函数来计算回归模型的性能指标
regr.eval(algae[,"a1"],rt.predictions.a1,train.y=algae[,"a1"])
---可视化展现预测模型的值
old.par=par(mfrow=c(1,2))
plot(lm.predictions.a1,algae[,"a1"],main="Liner model",xlab="predictions",ylab="True values")
abline(0,1,lty=2)
plot(rt.predictions.a1,algae[,"a1"],main="Regrsson Tree",xlab="Predictions",ylab="True value")
abline(0,1,lty=2)
par=(old.par)
--用函数identify()来检查哪些预测特别差的检查,该函数可以让用户通过互动方式点击图形的点
plot(lm.predictions.a1,algae[,'a1'],main="Liner Model",xlab="Predictions ",ylab="True Values")
abline(0,1,lty=2)
algae[identify(lm.predictions.a1,algae[,'a1']),]
--去除小于零的数
sensible.lm.predictions.a1=ifelse(lm.predictions.a1<0,0,lm.predictions.a1)
regr.eval(algae[,"a1"],lm.predictions.a1,stats=c("mae","mse"))
regr.eval(algae[,"a1"],sensible.lm.predictions.a1,stats=c("mae","mse"))
******预测任务的决策
1.为预测任务选择模型
2.比较模型性能评估指标
3.选择获取评估指标的可靠估计的实验方法
---给出目标模型函数
cv.rpart=function(form.train,test,...){
m=rpartXse(form,train,...)
p=predict(m,test)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean(mean(resp(form,train))-resp(form,test))^2)
}
cv.lm=function(form,train,test,...){
m=lm(form,train,...)
p=predict(m,test)
p=ifelse(p<0,0,p)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean((mean(resp(form,train))-resp(form,test))^2))
}
---对于模型交叉验证
res=experimentalComparison(
c(dataset(a1~.,clean.algae[,1:12],'a1')),
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1))),
cvSettings(3,10,1234))
cv.lm variant=cv.lm.defaults
cv.rpart variant=cv.rpart.v1
cv.rpart variant=cv.rpart.v2
summary(res)
---绘制图形
plot(res)
--由图可以看出模型NMSE最好,但是不明显,通过下面给出
getVariant("cv.rpart.v1",res)
对所有的7个预测模型任务进行与上面相似的比较实验
DSs=sapply(names(clean.algae)[12:18],
function(x,names.attrs){
f=as.formula(paste(x,"~ ."))
dataset(f,clean.algae[,c(names.attrs,x)],x)
},
names(clean.algae)[1:11])
res.all=experimentalComparison(
DSs,
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1))
),
cvSettings(5,10,1234))
plot(res.all)
bestScores(res.all)
**********组合模型-随机森林***********
library(randomForest)
cv.rf=function(form,train,test,...){
m=randomForest(form,train,...)
p=predict(m,test)
mse=mean((p-resp(form,test))^2)
c(nmse=mse/mean((mean(resp(form,train))-resp(form,test))^2))
}
res.all=experimentalComparison(
DSs,
c(variants('cv.lm'),
variants('cv.rpart',se=c(0,0.5,1)),
variants('cv.rf',ntree=c(200,500,700))
),
cvSettings(5,10,1234))
--应用函数bestScores()能正是组合方法的优势
bestScores(res.all)
---论述统计的显著性
compAnalysis(res.all,against='cv.rf.v3',
datasets=c('a1','a2','a4','a6'))
****预测7类海藻的频率***
--获得7个模型
bestModelNames=sapply(bestScores(res.all),
function(x) x['nmse','system'])
learners=c(rf='randomForest',rpart='rpartXse')
funs=learners[sapply(strsplit(bestModelsNames,'\\.'),
function(x) x[2])]
parSetts=lapply(bestModelsNames,function(x)getVariant(x,res.all)@pars)
bestModels=list()
for(a in 1:7){
form=as.formula(paste(names(clean.algae)[11+a],'~.'))
bestModels[[a]]=do.call(funcs[a],c(list(form,clean.algae[,c(1:11,11+a)]),parSetts[[a]]))}
--用训练数据集来填补测试数据集的缺失数据集
clean.test.algae=knnImputation(test.algae,k=10,distData=algae[,1:11])
--获取整个测试数据集的预测值矩阵
preds=matrus(ncol=7,nrow=140)
for(i in 1:nrow(clean.test.algae))
preds[i,]=sapply(1:7,function(x)
predict(bestModel[[x]],clean.test.algae[i,]))
--计算模型NMSE值
avg.preds=apply(algae[,12:18],2,mean)
apply(((algae.sols-preds)^2), 2,mean)/apply((scale(algae.sols,avg.preds,F)^2),2,mean)
相关文章推荐
- 数据挖掘与R语言——第二章(预测海藻数量)
- 【数据挖掘与R语言笔记】预测海藻数量(一)观察数据和处理数据缺失
- 【数据挖掘与R语言笔记】预测海藻数量(二)线性模型和回归树模型
- R语言实战之基本统计分析
- R语言实战(二)——数据分析基础知识
- R语言实战:机器学习与数据分析源代码1
- 【数据分析 R语言实战】学习笔记 第六章 参数估计与R实现(下)
- 【数据分析 R语言实战】学习笔记 第七章 假设检验及R实现
- 【数据分析 R语言实战】学习笔记 第四章 数据的图形描述
- R语言实战--源码分析
- 【数据分析 R语言实战】学习笔记 第三章 数据预处理 (下)
- R语言实战笔记--第十章 功效分析&样本量
- 盘口语言的实战分析
- R语言实战:机器学习与数据分析源代码6(最终弹)
- spark高级数据分析实战---用决策树预测森林植被
- R语言实战:机器学习与数据分析源代码2
- 【数据分析 R语言实战】学习笔记 第六章 参数估计与R实现(上)
- 【数据分析 R语言实战】学习笔记 第一章 数据分析导引
- 【数据分析 R语言实战】学习笔记 第十一章 对应分析
- R语言实战:机器学习与数据分析源代码4