-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandomForest_votingAggregation.R
90 lines (73 loc) · 2.65 KB
/
randomForest_votingAggregation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#Random Forest using CARET package
#goal: evaluate different models to predict bug-covering questions
## Model-1 based on majority votin
## Model-2 based on threshold of number of YES answers
## Model-3 based on ranking of number of YES answers
install.packages('randomForest')
library(randomForest)
install.packages("rpart.plot")
install.packages('rattle')
install.packages('rpart.plot')
install.packages('RColorBrewer')
library(rpart)
library(RColorBrewer)
library(rattle)
library(rpart.plot)
source("C://Users//chris//OneDrive//Documentos//GitHub//randomForestWorkerConfidenceDifficulty//loadAnswers.R");
##
# Import data
dataf <- loadAnswers("answerList_data.csv");
summary(dataf$Answer.confidence)
#Consolidate answers by question.
#count the number of YES, NO, IDK for each question
dataf$Answer.option
totalData = length(summaryTable$Question.ID);
trainingSize = trunc(totalData * 0.7);
startTestIndex = totalData - trainingSize;
endTestIndex = totalData;
model <- randomForest(as.factor(bugCovering) ~ ranking,
data = as.data.frame(summaryTable[1:trainingSize,]),
importance=TRUE, ntree=2000, type="class");
varImpPlot(model);
# Predict YES answers
Prediction <- predict(model, test,'vote');
submit <- data.frame(Question.ID = test$Question.ID, PredictedLevel = Prediction, Actual = test$bugCovering);
write.csv(submit, file = "C://Users//chris//OneDrive//Documentos//GitHub//randomForestWorkerConfidenceDifficulty//firstforest_ranking.csv", row.names = FALSE);
model$predicted
model$confusion
model$votes
#---------------------------------------------------------------------
library(party)
# Decision Tree
fit <- rpart(as.factor(bugCovering) ~ ranking, method="class",
data = as.data.frame(summaryTable));
# data = as.data.frame(summaryTable[1:trainingSize,]));
printcp(fit);
plotcp(fit);
summary(fit);
plot(fit)
plot(fit, uniform=TRUE,main="Classification of bugCovering by ranking");
text(fit, use.n = TRUE, all=TRUE, cex=1);
#ranking higher than 2.5 implies non-bug.
#Interpret these results from summary
#Run the decision tree for majority and threshold
######################################################################
#RANDOM FOREST USING CARET package
install.packages(ElemStatLearn)
library(ElemStatLearn)
library(klaR)
library(caret)
#To compute Area Under the Curve and
myControl <- trainControl(
method = "repeatcv",
number = 10,
repeats = 5,
summaryFunction = twoClassSummary,
classProbs = TRUE, # IMPORTANT!
verboseIter = TRUE
)
model <- train(bugCovering ~ .,
tuneLength =4 ,
data = wine, method = "ranger",
trControl = myControl,
)