install.packages(
'rpart.plot')
install.packages(
'rattle')
install.packages(
'RColorBrewer')
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
model <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris, method=
"class")
plot(model)
text(model)
fancyRpartPlot(model)
Prediction <- predict(model, test, type =
"class")
model1 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris, method=
"class",control=rpart.control(minsplit=
2, cp=
0))
fancyRpartPlot(model1)
install.packages(
'C50')
select<-sample(
1:nrow(iris),
length(iris)*
0.7)
train=iris[-select,]
test=iris[select,]
train<-na.omit(train)
library(C50)
ls(
'package:C50')
tc<-C5
.0Control(subset =F,CF=
0.25,winnow=F,noGlobalPruning=F,minCases =
20)
model2 <- C5
.0(Species ~.,data=train,rules=F,control =tc)
summary( model2 )
plot(model2)
C5imp(model2)
library(plyr)
cal_HD <-
function(trainData, nClass){
if ( !(is.data.frame(trainData) & is.
numeric(nClass)) )
"input error"
if (
length(trainData) < nClass)
"nClass is larger than the length of trainData"
rownum <- nrow(trainData)
calss.freq <- count(trainData,nClass)
calss.freq <- mutate(calss.freq, freq2 = (freq / rownum)*
log2(freq / rownum))
-
sum(calss.freq[,
"freq2"])
}
cal_HDA <-
function(trainData, nClass, nA){
rownum <- nrow(trainData)
nA.freq <- count(trainData,nA)
i <-
1
sub.hd <- c()
for (nA.
value in nA.freq[,
1]){
sub.trainData <- trainData[which(trainData[,nA] == nA.
value),]
sub.hd[i] <- cal_HD(sub.trainData,nClass)
i <- i+
1
}
nA.freq <- mutate(nA.freq, freq2 = (freq / rownum)*sub.hd)
sum(nA.freq[,
"freq2"])
}
g_DA <-
function(trainData, nClass, nA){
cal_HD(trainData, nClass) - cal_HDA(trainData, nClass, nA)
}
gen_decision_tree <-
function(trainData, strRoot, strRootAttri, nClass, cAttri, e){
decision_tree <- data.frame()
nClass.freq <- count(trainData,nClass)
nClass.freq <- arrange(nClass.freq, desc(freq))
col.name <- names(trainData)
if nrow(nClass.freq) ==
1{
rbind(decision_tree, c(strRoot, strRootAttri, nClass.freq[
1,
1],
''))
return decision_tree
}
if length(cAttri) ==
0{
rbind(decision_tree, c(strRoot, strRootAttri, nClass.freq[
1,
1],
''))
return decision_tree
}
maxDA <-
0
maxAttriName <-
''
maxAttriIndex <-
''
for(i
in cAttri){
curDA <- g_DA(trainData,nClass,i)
if (maxDA <= curDA){
maxDA <- curDA
maxAttriName <- col.name[i]
}
}
if (maxDA < e){
rbind(decision_tree, c(strRoot, strRootAttri, nClass.freq[
1,
1],
''))
return decision_tree
}
for (oneValue
in unique(trainData[,maxAttriName])){
sub.train <- trainData[which(trainData[,maxAttriName] == oneValue),]
rbind(decision_tree, c(strRoot, strRootAttri, maxAttriName , oneValue))
next.cAttri <- cAttri[which(cAttri !=maxAttriIndex)]
next.dt <-gen_decision_tree(sub.train, maxAttriName,
oneValue, nClass, next.cAttri, e)
rbind(decision_tree, next.dt)
}
names(decision_tree) <- c(
'preName',
'preValue',
'curName',
'curValue')
decision_tree
}