|
| 1 | +#Set random seed |
| 2 | +set.seed(123) |
| 3 | + |
| 4 | +#Set working directory |
| 5 | +setwd("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/R/") |
| 6 | + |
| 7 | +#Function to aggregate confusion matrices |
| 8 | +confusion.matrix <- function(ED_matrix, confusion_list, elements, progress) { |
| 9 | + |
| 10 | + #Create matrix for storing confusion matrices |
| 11 | + confusion_matrix <- matrix() |
| 12 | + |
| 13 | + #Create array for storing confusion matrices |
| 14 | + confusion_array <- array(matrix(0,ncol(ED_matrix),ncol(ED_matrix)), dim = c(ncol(ED_matrix),ncol(ED_matrix),length(elements))) |
| 15 | + |
| 16 | + #Aggregate confusion matrices within the hierarchical level |
| 17 | + for (k in 1:length(elements)) { |
| 18 | + |
| 19 | + #Create template with all classes for storing confusion matrix results |
| 20 | + confusion_template <- matrix(0,ncol(ED_matrix),ncol(ED_matrix)) |
| 21 | + |
| 22 | + #Store confusion matrix k |
| 23 | + confusion_matrix <- confusion_list[[elements[k]]] |
| 24 | + |
| 25 | + #Store confusion matrix from one model in the template (that has all the ecosystem types) |
| 26 | + for (i in 1:ncol(confusion_matrix)) { |
| 27 | + for (j in 1:nrow(confusion_matrix)) { |
| 28 | + |
| 29 | + #Add values from confusion matrix k to the template |
| 30 | + confusion_template[as.numeric(rownames(confusion_matrix)[j]),as.numeric(colnames(confusion_matrix)[i])] <- confusion_template[as.numeric(rownames(confusion_matrix)[j]),as.numeric(colnames(confusion_matrix)[i])] + confusion_matrix[j,i] |
| 31 | + } |
| 32 | + } |
| 33 | + |
| 34 | + #Add results from confusion matrix k to the array |
| 35 | + confusion_array[,,k] <- confusion_template |
| 36 | + |
| 37 | + #Print progress |
| 38 | + if (progress == T) { |
| 39 | + print(k/length(elements)) |
| 40 | + } |
| 41 | + } |
| 42 | + |
| 43 | + #Aggregate the confusion matrices with the mean |
| 44 | + mean_confusion_matrix <- apply(confusion_array, c(1,2), mean) |
| 45 | + |
| 46 | + #Return the aggregated confusion matrix |
| 47 | + return(mean_confusion_matrix) |
| 48 | +} |
| 49 | + |
| 50 | +#Specify file paths |
| 51 | +file_paths <- list.files("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/", pattern = "xlsx$", full.names = TRUE) |
| 52 | + |
| 53 | +#Create list for class codes |
| 54 | +conversion_schemes <- list() |
| 55 | + |
| 56 | +#Import files with class names and store them in list |
| 57 | +for (i in 1:length(file_paths)) { |
| 58 | + conversion_schemes[[i]] <- colnames(as.data.frame(readxl::read_xlsx(file_paths[i]))) |
| 59 | +} |
| 60 | + |
| 61 | +#Specify file paths |
| 62 | +file_paths <- list.files("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/", pattern = "xlsx$", full.names = TRUE) |
| 63 | + |
| 64 | +#Create list for class codes |
| 65 | +conversion_list <- list() |
| 66 | + |
| 67 | +#Import files with class names and store them in list |
| 68 | +for (i in 1:length(file_paths)) { |
| 69 | + conversion_list[[i]] <- as.data.frame(readxl::read_xlsx(file_paths[i])) |
| 70 | +} |
| 71 | + |
| 72 | +#Import confusion matrices for classifiers |
| 73 | +classifier_confusion <- readRDS("results/classifier_confusion.rds") |
| 74 | + |
| 75 | +#Import confusion matrices for interpreters |
| 76 | +interpreter_confusion <- readRDS("results/interpreter_confusion.rds") |
| 77 | + |
| 78 | +#Import model results |
| 79 | +classfier_data <- read.csv("results/results.csv")[,-c(1)] |
| 80 | + |
| 81 | +#Import model results |
| 82 | +interpreter_data <- read.csv("results/interpreter_results.csv")[,-c(1)] |
| 83 | + |
| 84 | +#Create a data frame with all combinations of interpreters and hierarchical levels |
| 85 | +data_frame_classifier <- interpreter_data[,1:2] |
| 86 | + |
| 87 | +#Create list for storing confusion matrices |
| 88 | +aggregated_classifier_list <- list() |
| 89 | + |
| 90 | +for (h in 1:nrow(data_frame_classifier)) { |
| 91 | + |
| 92 | + #Specify the correct conversion key (depending on hierarchical level and map scale) |
| 93 | + if(data_frame_classifier$hierarchicallevel[h] == "gtype1") { |
| 94 | + conversion_scheme <- conversion_list[[1]] |
| 95 | + if(data_frame_classifier$interpreter[h] %in% c("A5","F5","D5","C5","B5","E5","X5")) { |
| 96 | + conversion_scheme <- conversion_list[[2]] |
| 97 | + } |
| 98 | + } |
| 99 | + if(data_frame_classifier$hierarchicallevel[h] == "htype1") { |
| 100 | + conversion_scheme <- conversion_list[[3]] |
| 101 | + } |
| 102 | + if(data_frame_classifier$hierarchicallevel[h] == "htypegr1") { |
| 103 | + conversion_scheme <- conversion_list[[4]] |
| 104 | + } |
| 105 | + |
| 106 | + #Identify the rows with the same hierarchical levels and map scales |
| 107 | + interpreter_rows <- which(classfier_data$interpreter == data_frame_classifier$interpreter[h]) |
| 108 | + hierarchicallevel_rows <- which(classfier_data$hierarchicallevel == data_frame_classifier$hierarchicallevel[h]) |
| 109 | + |
| 110 | + #Specify the row |
| 111 | + rows <- interpreter_rows[which(interpreter_rows %in% hierarchicallevel_rows)] |
| 112 | + |
| 113 | + #Aggregate confusion matrices for classifiers |
| 114 | + aggregated_classifier_list[[h]] <- confusion.matrix(conversion_scheme, classifier_confusion, rows, progress = F) |
| 115 | +} |
| 116 | + |
| 117 | +#Create data frame with all combinations of interpreters and hierarchical levels |
| 118 | +data_frame_interpreter <- interpreter_data[,1:2] |
| 119 | + |
| 120 | +#Create list for storing confusion matrices |
| 121 | +aggregated_interpreter_list <- list() |
| 122 | + |
| 123 | +for (h in 1:nrow(data_frame_interpreter)) { |
| 124 | + |
| 125 | + #Specify the correct conversion key (depending on hierarchical level and map scale) |
| 126 | + if(data_frame_interpreter$hierarchicallevel[h] == "gtype1") { |
| 127 | + conversion_scheme <- conversion_list[[1]] |
| 128 | + if(data_frame_interpreter$interpreter[h] %in% c("A5","F5","D5","C5","B5","E5","X5")) { |
| 129 | + conversion_scheme <- conversion_list[[2]] |
| 130 | + } |
| 131 | + } |
| 132 | + if(data_frame_interpreter$hierarchicallevel[h] == "htype1") { |
| 133 | + conversion_scheme <- conversion_list[[3]] |
| 134 | + } |
| 135 | + if(data_frame_interpreter$hierarchicallevel[h] == "htypegr1") { |
| 136 | + conversion_scheme <- conversion_list[[4]] |
| 137 | + } |
| 138 | + |
| 139 | + #Specify row |
| 140 | + rows <- h |
| 141 | + |
| 142 | + #Aggregate confusion matrices for classifiers |
| 143 | + aggregated_interpreter_list[[h]] <- confusion.matrix(conversion_scheme, interpreter_confusion, rows, progress = F) |
| 144 | +} |
| 145 | + |
| 146 | +#Create column for correlation estimates for each interpreter hierarchical level combination |
| 147 | +data_frame_classifier$confusion_correlation <- NA |
| 148 | + |
| 149 | +#Loop over all lists with aggregated confusion matrices |
| 150 | +for (i in 1:length(aggregated_interpreter_list)) { |
| 151 | + |
| 152 | + #Convert the interpreter confusion matrix for to a vector |
| 153 | + interpreter_confusion_vector <- as.numeric(aggregated_interpreter_list[[i]]) |
| 154 | + |
| 155 | + #Convert the classifier confusion matrix for to a vector |
| 156 | + classifier_confusion_vector <- as.numeric(aggregated_classifier_list[[i]]) |
| 157 | + |
| 158 | + #Identify confusion matrix cells in which there has been assigned ecosystem types |
| 159 | + existent_values <- unique(c(which(interpreter_confusion_vector != 0), which(classifier_confusion_vector != 0))) |
| 160 | + |
| 161 | + #Store the correlation between classifiers and interpreter in the data frame |
| 162 | + data_frame_classifier$confusion_correlation[i] <- cor.test(interpreter_confusion_vector[existent_values], classifier_confusion_vector[existent_values], alternative = "two.sided", method = "pearson")$estimate |
| 163 | +} |
| 164 | + |
| 165 | +#Print the results |
| 166 | +mean(data_frame_classifier$confusion_correlation) |
| 167 | +sd(data_frame_classifier$confusion_correlation) |
| 168 | + |
0 commit comments