Skip to content

Commit 72ba4de

Browse files
authored
Add files via upload
1 parent ddf8fbb commit 72ba4de

File tree

2 files changed

+1003
-0
lines changed

2 files changed

+1003
-0
lines changed

Diff for: generate_results/interpreter_error.R

+370
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
#Import libraries
2+
library(readxl)
3+
library(sf)
4+
library(caret)
5+
6+
#Define function for calculating the mode
7+
calculate.mode <- function(vector_object, map_scale) {
8+
9+
#Order the vector based on experience
10+
if (map_scale == "fine") {
11+
12+
vector_object <- vector_object[c(1, 5, 4, 3, 6, 2)]
13+
} else {
14+
15+
vector_object <- vector_object[c(4, 3, 1, 6, 2, 1)]
16+
}
17+
18+
#Identify the classes for each data point
19+
unique_values <- apply(vector_object, 1, unique)
20+
21+
#Identify the frequency of which each type was mapped
22+
frequency_values <- apply(vector_object, 1, table)
23+
24+
#Sort them in decreasing order
25+
sorted_list <- lapply(frequency_values, function(x) sort(x, decreasing = TRUE))
26+
27+
#Check if there were ties
28+
ties <- lapply(sorted_list, function(x) return(any(x[-1] == x[1])))
29+
30+
#Identify the elements with ties
31+
tied_elements <- which(unlist(ties))
32+
33+
#Apply the function to each element in the list
34+
ties_count <- lapply(sorted_list, function(x) return(sum(x[-1] == x[1])))
35+
36+
#Check how many groups are tied with the mode
37+
length_tie <- length(which(ties_count != 0))
38+
39+
if (length_tie > 1) {
40+
41+
#Create vector for storing the aggregated experience for the tied groups
42+
experience_sum <- list()
43+
44+
tied_indices <- which(ties_count != 0)
45+
46+
#Create vector for storing the index for the mode
47+
mode_index <- numeric()
48+
49+
for (i in 1:length_tie) {
50+
51+
tie_result <- sorted_list[tied_elements][[i]]
52+
53+
match_tie <- match(vector_object[tied_elements,][i,],names(tie_result))
54+
55+
sum_tie <- numeric()
56+
for (k in 1:length(tie_result)) {
57+
sum_tie[k] <- sum(which(match_tie == k))
58+
}
59+
experience_sum[[i]] <- sum_tie
60+
61+
#Identify mode object
62+
mode_index[i] <- which.min(sum_tie)
63+
64+
sorted_list[tied_elements][[i]] <- sorted_list[tied_elements][[i]][mode_index[i]]
65+
66+
length_experience_tie <- list()
67+
68+
#Check how many groups are tied with the minimum experience sum
69+
length_experience_tie[[i]] <- length(which(experience_sum[[i]] == min(experience_sum[[i]])))
70+
71+
if (length_experience_tie[[i]] > 1) {
72+
73+
#Create vector for storing the index of the group with the most experienced interpreter
74+
max_experience <- list()
75+
for (j in 1:length_experience_tie[[i]]) {
76+
77+
#Storing the ranked experience in each group
78+
max_experience[[i]] <- which.min(which(vector_object[tied_indices[[i]],] %in% unique_values[tied_indices[i]][[1]][1:(ties_count[[tied_indices[i]]]+1)]))
79+
80+
names(sorted_list[tied_elements][[i]]) <- as.character(vector_object[tied_indices[[i]],][max_experience[[i]]])
81+
}
82+
}
83+
}
84+
}
85+
86+
#Extract the first element from each element in the list
87+
mode <- unlist(sapply(sorted_list, function(x) names(x)[1]))
88+
89+
90+
mode <- as.character(mode)
91+
92+
#Return the mode class
93+
return(mode)
94+
}
95+
96+
#Define function for renaming columns depending on map scale
97+
conversion.classes <- function(data, interpreter_scale, map_scale) {
98+
for (i in interpreter_scale) {
99+
100+
#Rename columns by adding suffix depending on the map scale for the interpreter
101+
colnames(data)[grepl(paste("gtype1_",i,sep = ""), colnames(data))] <- paste("gtype1_",map_scale,"_",i,sep = "")
102+
}
103+
return(data)
104+
}
105+
106+
#Define function for converting from character labels to integer values
107+
convert.labels <- function(x, y){
108+
as.integer(factor(x, levels = y))
109+
}
110+
111+
#Define function to compute ecological distance
112+
compute.ED <- function(x, test_data, column, test_predictions) {
113+
conversion_list[[which(hierarchical_level == column)]][test_predictions[x],test_data[,column][x]]
114+
}
115+
116+
#Set random seed
117+
set.seed(123)
118+
119+
#Set working directory
120+
setwd("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/R/")
121+
122+
#Specify file paths
123+
file_paths <- list.files("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/", pattern = "xlsx$", full.names = TRUE)
124+
125+
#Create list for class codes
126+
conversion_list <- list()
127+
128+
#Import files with class names and store them in a list
129+
for (i in 1:length(file_paths)) {
130+
conversion_list[[i]] <- as.data.frame(read_xlsx(file_paths[i]))
131+
}
132+
133+
#Specify file paths
134+
file_paths <- list.files("C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/", pattern = "xlsx$", full.names = TRUE)
135+
136+
#Create list for class codes
137+
conversion_schemes <- list()
138+
139+
#Import files with class names and store them in a list
140+
for (i in 1:length(file_paths)) {
141+
conversion_schemes[[i]] <- colnames(as.data.frame(read_xlsx(file_paths[i])))
142+
}
143+
144+
#Import test data
145+
test_data <- read.csv("data/processed/test/test_data.csv")[-c(1)]
146+
147+
#Specify column names
148+
test_labels <- colnames(test_data)[1:4]
149+
150+
#Convert test labels from numeric to factor
151+
for (i in test_labels) {
152+
test_data[,i] <- as.factor(test_data[,i])
153+
}
154+
155+
#Specify file names
156+
interpreters <- c("A5","F5","D5","C5","B5","E5","I20","K20","H20","G20","L20","J20")
157+
158+
#Create list for storing vector layers
159+
interpreter_list <- list()
160+
161+
#Import and reproject vector data
162+
for (i in 1:length(interpreters)) {
163+
164+
#Import vectors
165+
interpreter_list[[i]] <- st_read(dsn = "data/raw/target/", layer = interpreters[i])
166+
167+
#Reproject vectors
168+
interpreter_list[[i]] <- st_transform(interpreter_list[[i]], crs = "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs")
169+
}
170+
171+
#Import data
172+
vector_layer <- st_read(dsn = "data/raw/test/", layer = "consensus")
173+
174+
#Reproject data
175+
vector_layer <- st_transform(vector_layer, crs = "+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs")
176+
177+
#Obtain coordinates for the vector
178+
coords <- st_coordinates(vector_layer)
179+
180+
#Create list for storing interpreter matrices
181+
interpreter_matrices <- list()
182+
183+
#Find interpreter results for test data points
184+
for (i in 1:length(interpreters)) {
185+
186+
#Find intersection between interpreter polygons and test points
187+
intersection <- st_intersection(interpreter_list[[i]], vector_layer)
188+
189+
#Select columns with classes
190+
interpreter_matrices[[i]] <- as.data.frame(intersection[,c("gtype1","htype1","htypegr1")])[,c(1,2,3)]
191+
192+
#Reset the row names
193+
rownames(interpreter_matrices[[i]]) <- NULL
194+
195+
#Add interpreter to column names
196+
colnames(interpreter_matrices[[i]]) <- paste(colnames(interpreter_matrices[[i]]),"_",interpreters[i], sep = "")
197+
}
198+
199+
#Bind interpreter matrices
200+
interpreter_matrix <- do.call(cbind, interpreter_matrices)
201+
202+
#Specify hierarchical levels
203+
hierarchical_levels <- c("gtype1","htype1","htypegr1")
204+
205+
206+
#Specify interpreters with 1:5000 maps
207+
fine <- c("A5","F5","D5","C5","B5","E5","X5")
208+
209+
#Specify interpreters with 1:20000 maps
210+
coarse <- c("I20","K20","H20","G20","L20","J20","X20")
211+
212+
#Create mode classification for the hierarchical levels in each block
213+
for (j in 1:length(hierarchical_levels)) {
214+
215+
#Specify hierarchical level
216+
columns <- interpreter_matrix[c(grepl(hierarchical_levels[j], colnames(interpreter_matrix)))]
217+
218+
#Select 1:5000 labels
219+
data_frame <- columns[,sub(paste(hierarchical_levels[j],"_", sep = ""),"",colnames(columns)) %in% fine]
220+
221+
#Calculate the mode of all labels for each observation
222+
interpreter_matrix$mode_column <- calculate.mode(data_frame, "fine")
223+
224+
#Rename the column
225+
colnames(interpreter_matrix)[ncol(interpreter_matrix)] <- paste(hierarchical_levels[j],"_X5", sep = "")
226+
227+
#Select 1:20 000 labels
228+
data_frame <- columns[,sub(paste(hierarchical_levels[j],"_", sep = ""),"",colnames(columns)) %in% coarse]
229+
230+
#Calculate the mode of all labels for each observation
231+
interpreter_matrix$mode_column <- calculate.mode(data_frame, "coarse")
232+
233+
#Rename the column
234+
colnames(interpreter_matrix)[ncol(interpreter_matrix)] <- paste(hierarchical_levels[j],"_X20", sep = "")
235+
}
236+
237+
#Specify hierarchical levels
238+
hierarchical_levels <- c("gtype1_20","gtype1_5","htype1","htypegr1")
239+
240+
#Convert column names depending on map scale and convert character labels to integer values
241+
for (i in 1:length(hierarchical_levels)) {
242+
243+
#Convert column name for 1:5000 interpreters
244+
interpreter_matrix <- conversion.classes(interpreter_matrix, fine, "5")
245+
246+
#Convert column name for 1:20000 interpreters
247+
interpreter_matrix <- conversion.classes(interpreter_matrix, coarse, "20")
248+
249+
#Specify vector with conversion key
250+
conversion_scheme <- conversion_schemes[[i]]
251+
252+
#Specify columns that will be converted
253+
columns <- grepl(hierarchical_levels[[i]],colnames(interpreter_matrix))
254+
255+
#Apply function to convert from character to integer values
256+
converted_labels <- apply(interpreter_matrix[,columns], 2, convert.labels, conversion_scheme)
257+
258+
#Convert to data frame
259+
interpreter_matrix[,columns] <- as.data.frame(converted_labels)
260+
}
261+
262+
#Specify hierarchical levels
263+
hierarchical_level <- c("gtype1_20","gtype1_5","htype1","htypegr1")
264+
265+
#Specify hierarchical levels
266+
hierarchical_levels <- c("gtype1","htype1","htypegr1")
267+
268+
#Specify interpreters
269+
interpreters <- c("A5","F5","D5","C5","B5","E5","X5","I20","K20","H20","G20","L20","J20","X20")
270+
271+
#Create matrix for storing results
272+
output_matrix <- expand.grid(interpreter = interpreters,
273+
hierarchicallevel = hierarchical_levels)
274+
275+
#Create column for map scale
276+
output_matrix$mapscale <- ifelse(output_matrix$interpreter %in% c("A5","F5","D5","C5","B5","E5","X5"), 5, 20)
277+
278+
#Create column for interpreter error
279+
output_matrix$interpreter_error <- NA
280+
281+
#Create column for interpreter ecological distance
282+
output_matrix$interpreter_ED <- NA
283+
284+
#Create column for number of classes
285+
output_matrix$interpreter_classes <- NA
286+
287+
#Create data frame for storing spatial error results
288+
spatial_error <- as.data.frame(matrix(NA, nrow(output_matrix), nrow(test_data)))
289+
290+
#Create data frame for storing spatial ecological distance results
291+
spatial_ED <- as.data.frame(matrix(NA, nrow(output_matrix), nrow(test_data)))
292+
293+
#Create list for storing confusion matrices
294+
confusion_list <- list()
295+
296+
#Soecify start row
297+
start_row <- 1
298+
299+
#Specify end row
300+
end_row <- nrow(output_matrix)
301+
302+
#Generate results for each interpreter
303+
for (i in start_row:end_row) {
304+
305+
#Specify interpreter
306+
interpreter <- output_matrix$interpreter[i]
307+
308+
#Specify hierarchical level
309+
hierarchicallevel <- output_matrix$hierarchicallevel[i]
310+
311+
#Specify map scale
312+
mapscale <- output_matrix$mapscale[i]
313+
314+
#Select column based on interpreter, map scale, and hierarchical level
315+
column_name <- paste(hierarchicallevel,"_",interpreter, sep = "")
316+
if(hierarchicallevel == "gtype1"){
317+
if(mapscale == 5){
318+
column_name <- paste(hierarchicallevel,"_5_",interpreter, sep = "")
319+
test <- as.factor(test_data[,c("gtype1_5")])
320+
}
321+
else {
322+
column_name <- paste(hierarchicallevel,"_20_",interpreter, sep = "")
323+
test <- as.factor(test_data[,c("gtype1_20")])
324+
}
325+
} else {
326+
test <- as.factor(test_data[,as.character(hierarchicallevel)])
327+
}
328+
329+
#Convert the vector from numeric to factor
330+
interpreter_vector <- as.factor(interpreter_matrix[,column_name])
331+
332+
#Harmonize the classes in the test data and model predictions and opposite
333+
test <- factor(test, unique(c(levels(interpreter_vector), levels(test))))
334+
interpreter_vector <- factor(interpreter_vector, unique(c(levels(interpreter_vector), levels(test))))
335+
336+
#Compute error
337+
output_matrix$interpreter_error[i] <- 1-(sum(interpreter_vector == test)/length(interpreter_vector))
338+
339+
#Generate the confusion matrix
340+
confusion_matrix <- confusionMatrix(interpreter_vector, test)$table
341+
342+
#Store the confusion matrix in a list
343+
confusion_list[[i]] <- confusion_matrix
344+
345+
#Specify column (interpreter, map scale, and hierarchical level)
346+
test_column <- gsub(paste("_",interpreter, sep = ""), "", column_name)
347+
348+
#Specify the relevant ecological distance matrix
349+
ED_matrix <- conversion_list[[which(hierarchical_level == test_column)]][as.numeric(colnames(confusion_matrix)),as.numeric(colnames(confusion_matrix))]
350+
351+
#Compute ecological distance
352+
output_matrix$interpreter_ED[i] <- sum(confusion_matrix*ED_matrix)/sum(confusion_matrix)
353+
354+
#Store the number of classes
355+
output_matrix$interpreter_classes[i] <- length(levels(interpreter_vector))
356+
357+
#Compute error for each test point
358+
spatial_error[i,] <- as.numeric(interpreter_vector == test)
359+
360+
#Compute ecological distance for each test point
361+
spatial_ED[i,] <- sapply(1:length(test), compute.ED, test_data, test_column, interpreter_vector)
362+
}
363+
364+
#Save files
365+
#write.csv(output_matrix, "results/interpreter_results.csv")
366+
#write.csv(spatial_error, "results/spatial_interpreter_error.csv")
367+
#write.csv(spatial_ED, "results/spatial_interpreter_ED.csv")
368+
369+
#Save confusion matrices
370+
#saveRDS(confusion_list, "results/interpreter_confusion.rds")

0 commit comments

Comments
 (0)