-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaggregateVotes.R
89 lines (69 loc) · 2.82 KB
/
aggregateVotes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#score all voting aggregations
#RankingVote: ranks questions based on number of YES
#MajorityVote: balance between number of YES and NO
#ThresholdVote: above a certain number of YES
scoreMajorityVote<- function(summaryTable){
summaryTable["majorityVote"] <- summaryTable$Yes.Count - summaryTable$No.Count;
return(summaryTable);
}
scorePositiveByNegativeVote<- function(summaryTable){
summaryTable["positiveByNegativeVote"] <- summaryTable$Yes.Count/summaryTable$No.Count;
return(summaryTable);
}
scoreThresholdVote<- function(summaryTable, threshold){
summaryTable["thresholdVote"] <- summaryTable[,"Yes.Count"]>threshold;
return(summaryTable);
}
#Provides the list of questions that have to be considered
#and the list of questions that covers bugs
scoreRanking <- function(summaryTable){
for(i in 1:8){
selection <- summaryTable[summaryTable$JavaMethod==i,];
#Sort
selection <- selection[with(selection,order(-Yes.Count)),];
#remove duplicates
uniqueLevels <- unique(selection$Yes.Count)
labels<-matrix(NA,length(uniqueLevels),2);
labels[,1]<- uniqueLevels;
labels[,2]<- c(1:length(uniqueLevels));
selection <- rankQuestions(selection,labels);
summaryTable[selection$Question.ID+1,"rankingVote"]<-selection$rankingVote;
}
return(summaryTable);
}
#rank questions based on nmber of YES's
rankQuestions <- function(selection, labels){
for(i in 1:length(labels[,2])){
matchedRows <- which(selection$Yes.Count==labels[i,1]);
selection[matchedRows,"rankingVote"]<-labels[i,2];
}
return(selection);
}
#######################################
#Select top questions within each JavaMethod
selectTopQuestionsByJavaMethod <- function (utility_Table,
sampled_dataf,
questionsToSelect){
topQuestions <- list();
javaMethodList<- unique(sampled_dataf$FailingMethod);
for(javaMethod in javaMethodList){
javaMethod_df <- sampled_dataf[sampled_dataf$FailingMethod==javaMethod,];
questionList <- unique(javaMethod_df$Question.ID);
utilitySelection <- utility_Table[questionList,];
utilityValues <- unique(utilitySelection$utility);
sizeL<-length(utilityValues);
if(sizeL>=questionsToSelect){
#take only the top values
index <- questionsToSelect;
}
else{
index <- length(utilityValues); #fewer questions than the questionsToSelect step
}
utilityValues<-utilityValues[order(utilityValues,decreasing = TRUE)];
utilityValues <- utilityValues[1:index];
#take the top questions by utility and sample another set answers from them
topQuestions <- rbind(topQuestions,
data.frame(utilitySelection[utilitySelection$utility>=min(utilityValues),]));
}
return(topQuestions);
}