1
+ # Import libraries
2
+ library(readxl )
3
+ library(sf )
4
+ library(caret )
5
+
6
+ # Define function for calculating the mode
7
+ calculate.mode <- function (vector_object , map_scale ) {
8
+
9
+ # Order the vector based on experience
10
+ if (map_scale == " fine" ) {
11
+
12
+ vector_object <- vector_object [c(1 , 5 , 4 , 3 , 6 , 2 )]
13
+ } else {
14
+
15
+ vector_object <- vector_object [c(4 , 3 , 1 , 6 , 2 , 1 )]
16
+ }
17
+
18
+ # Identify the classes for each data point
19
+ unique_values <- apply(vector_object , 1 , unique )
20
+
21
+ # Identify the frequency of which each type was mapped
22
+ frequency_values <- apply(vector_object , 1 , table )
23
+
24
+ # Sort them in decreasing order
25
+ sorted_list <- lapply(frequency_values , function (x ) sort(x , decreasing = TRUE ))
26
+
27
+ # Check if there were ties
28
+ ties <- lapply(sorted_list , function (x ) return (any(x [- 1 ] == x [1 ])))
29
+
30
+ # Identify the elements with ties
31
+ tied_elements <- which(unlist(ties ))
32
+
33
+ # Apply the function to each element in the list
34
+ ties_count <- lapply(sorted_list , function (x ) return (sum(x [- 1 ] == x [1 ])))
35
+
36
+ # Check how many groups are tied with the mode
37
+ length_tie <- length(which(ties_count != 0 ))
38
+
39
+ if (length_tie > 1 ) {
40
+
41
+ # Create vector for storing the aggregated experience for the tied groups
42
+ experience_sum <- list ()
43
+
44
+ tied_indices <- which(ties_count != 0 )
45
+
46
+ # Create vector for storing the index for the mode
47
+ mode_index <- numeric ()
48
+
49
+ for (i in 1 : length_tie ) {
50
+
51
+ tie_result <- sorted_list [tied_elements ][[i ]]
52
+
53
+ match_tie <- match(vector_object [tied_elements ,][i ,],names(tie_result ))
54
+
55
+ sum_tie <- numeric ()
56
+ for (k in 1 : length(tie_result )) {
57
+ sum_tie [k ] <- sum(which(match_tie == k ))
58
+ }
59
+ experience_sum [[i ]] <- sum_tie
60
+
61
+ # Identify mode object
62
+ mode_index [i ] <- which.min(sum_tie )
63
+
64
+ sorted_list [tied_elements ][[i ]] <- sorted_list [tied_elements ][[i ]][mode_index [i ]]
65
+
66
+ length_experience_tie <- list ()
67
+
68
+ # Check how many groups are tied with the minimum experience sum
69
+ length_experience_tie [[i ]] <- length(which(experience_sum [[i ]] == min(experience_sum [[i ]])))
70
+
71
+ if (length_experience_tie [[i ]] > 1 ) {
72
+
73
+ # Create vector for storing the index of the group with the most experienced interpreter
74
+ max_experience <- list ()
75
+ for (j in 1 : length_experience_tie [[i ]]) {
76
+
77
+ # Storing the ranked experience in each group
78
+ max_experience [[i ]] <- which.min(which(vector_object [tied_indices [[i ]],] %in% unique_values [tied_indices [i ]][[1 ]][1 : (ties_count [[tied_indices [i ]]]+ 1 )]))
79
+
80
+ names(sorted_list [tied_elements ][[i ]]) <- as.character(vector_object [tied_indices [[i ]],][max_experience [[i ]]])
81
+ }
82
+ }
83
+ }
84
+ }
85
+
86
+ # Extract the first element from each element in the list
87
+ mode <- unlist(sapply(sorted_list , function (x ) names(x )[1 ]))
88
+
89
+
90
+ mode <- as.character(mode )
91
+
92
+ # Return the mode class
93
+ return (mode )
94
+ }
95
+
96
+ # Define function for renaming columns depending on map scale
97
+ conversion.classes <- function (data , interpreter_scale , map_scale ) {
98
+ for (i in interpreter_scale ) {
99
+
100
+ # Rename columns by adding suffix depending on the map scale for the interpreter
101
+ colnames(data )[grepl(paste(" gtype1_" ,i ,sep = " " ), colnames(data ))] <- paste(" gtype1_" ,map_scale ," _" ,i ,sep = " " )
102
+ }
103
+ return (data )
104
+ }
105
+
106
+ # Define function for converting from character labels to integer values
107
+ convert.labels <- function (x , y ){
108
+ as.integer(factor (x , levels = y ))
109
+ }
110
+
111
+ # Define function to compute ecological distance
112
+ compute.ED <- function (x , test_data , column , test_predictions ) {
113
+ conversion_list [[which(hierarchical_level == column )]][test_predictions [x ],test_data [,column ][x ]]
114
+ }
115
+
116
+ # Set random seed
117
+ set.seed(123 )
118
+
119
+ # Set working directory
120
+ setwd(" C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/R/" )
121
+
122
+ # Specify file paths
123
+ file_paths <- list.files(" C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/" , pattern = " xlsx$" , full.names = TRUE )
124
+
125
+ # Create list for class codes
126
+ conversion_list <- list ()
127
+
128
+ # Import files with class names and store them in a list
129
+ for (i in 1 : length(file_paths )) {
130
+ conversion_list [[i ]] <- as.data.frame(read_xlsx(file_paths [i ]))
131
+ }
132
+
133
+ # Specify file paths
134
+ file_paths <- list.files(" C:/Users/adamen/OneDrive - Universitetet i Oslo/documents/Doktorgrad/Artikkel 4/ED/" , pattern = " xlsx$" , full.names = TRUE )
135
+
136
+ # Create list for class codes
137
+ conversion_schemes <- list ()
138
+
139
+ # Import files with class names and store them in a list
140
+ for (i in 1 : length(file_paths )) {
141
+ conversion_schemes [[i ]] <- colnames(as.data.frame(read_xlsx(file_paths [i ])))
142
+ }
143
+
144
+ # Import test data
145
+ test_data <- read.csv(" data/processed/test/test_data.csv" )[- c(1 )]
146
+
147
+ # Specify column names
148
+ test_labels <- colnames(test_data )[1 : 4 ]
149
+
150
+ # Convert test labels from numeric to factor
151
+ for (i in test_labels ) {
152
+ test_data [,i ] <- as.factor(test_data [,i ])
153
+ }
154
+
155
+ # Specify file names
156
+ interpreters <- c(" A5" ," F5" ," D5" ," C5" ," B5" ," E5" ," I20" ," K20" ," H20" ," G20" ," L20" ," J20" )
157
+
158
+ # Create list for storing vector layers
159
+ interpreter_list <- list ()
160
+
161
+ # Import and reproject vector data
162
+ for (i in 1 : length(interpreters )) {
163
+
164
+ # Import vectors
165
+ interpreter_list [[i ]] <- st_read(dsn = " data/raw/target/" , layer = interpreters [i ])
166
+
167
+ # Reproject vectors
168
+ interpreter_list [[i ]] <- st_transform(interpreter_list [[i ]], crs = " +proj=utm +zone=32 +datum=WGS84 +units=m +no_defs" )
169
+ }
170
+
171
+ # Import data
172
+ vector_layer <- st_read(dsn = " data/raw/test/" , layer = " consensus" )
173
+
174
+ # Reproject data
175
+ vector_layer <- st_transform(vector_layer , crs = " +proj=utm +zone=32 +datum=WGS84 +units=m +no_defs" )
176
+
177
+ # Obtain coordinates for the vector
178
+ coords <- st_coordinates(vector_layer )
179
+
180
+ # Create list for storing interpreter matrices
181
+ interpreter_matrices <- list ()
182
+
183
+ # Find interpreter results for test data points
184
+ for (i in 1 : length(interpreters )) {
185
+
186
+ # Find intersection between interpreter polygons and test points
187
+ intersection <- st_intersection(interpreter_list [[i ]], vector_layer )
188
+
189
+ # Select columns with classes
190
+ interpreter_matrices [[i ]] <- as.data.frame(intersection [,c(" gtype1" ," htype1" ," htypegr1" )])[,c(1 ,2 ,3 )]
191
+
192
+ # Reset the row names
193
+ rownames(interpreter_matrices [[i ]]) <- NULL
194
+
195
+ # Add interpreter to column names
196
+ colnames(interpreter_matrices [[i ]]) <- paste(colnames(interpreter_matrices [[i ]])," _" ,interpreters [i ], sep = " " )
197
+ }
198
+
199
+ # Bind interpreter matrices
200
+ interpreter_matrix <- do.call(cbind , interpreter_matrices )
201
+
202
+ # Specify hierarchical levels
203
+ hierarchical_levels <- c(" gtype1" ," htype1" ," htypegr1" )
204
+
205
+
206
+ # Specify interpreters with 1:5000 maps
207
+ fine <- c(" A5" ," F5" ," D5" ," C5" ," B5" ," E5" ," X5" )
208
+
209
+ # Specify interpreters with 1:20000 maps
210
+ coarse <- c(" I20" ," K20" ," H20" ," G20" ," L20" ," J20" ," X20" )
211
+
212
+ # Create mode classification for the hierarchical levels in each block
213
+ for (j in 1 : length(hierarchical_levels )) {
214
+
215
+ # Specify hierarchical level
216
+ columns <- interpreter_matrix [c(grepl(hierarchical_levels [j ], colnames(interpreter_matrix )))]
217
+
218
+ # Select 1:5000 labels
219
+ data_frame <- columns [,sub(paste(hierarchical_levels [j ]," _" , sep = " " )," " ,colnames(columns )) %in% fine ]
220
+
221
+ # Calculate the mode of all labels for each observation
222
+ interpreter_matrix $ mode_column <- calculate.mode(data_frame , " fine" )
223
+
224
+ # Rename the column
225
+ colnames(interpreter_matrix )[ncol(interpreter_matrix )] <- paste(hierarchical_levels [j ]," _X5" , sep = " " )
226
+
227
+ # Select 1:20 000 labels
228
+ data_frame <- columns [,sub(paste(hierarchical_levels [j ]," _" , sep = " " )," " ,colnames(columns )) %in% coarse ]
229
+
230
+ # Calculate the mode of all labels for each observation
231
+ interpreter_matrix $ mode_column <- calculate.mode(data_frame , " coarse" )
232
+
233
+ # Rename the column
234
+ colnames(interpreter_matrix )[ncol(interpreter_matrix )] <- paste(hierarchical_levels [j ]," _X20" , sep = " " )
235
+ }
236
+
237
+ # Specify hierarchical levels
238
+ hierarchical_levels <- c(" gtype1_20" ," gtype1_5" ," htype1" ," htypegr1" )
239
+
240
+ # Convert column names depending on map scale and convert character labels to integer values
241
+ for (i in 1 : length(hierarchical_levels )) {
242
+
243
+ # Convert column name for 1:5000 interpreters
244
+ interpreter_matrix <- conversion.classes(interpreter_matrix , fine , " 5" )
245
+
246
+ # Convert column name for 1:20000 interpreters
247
+ interpreter_matrix <- conversion.classes(interpreter_matrix , coarse , " 20" )
248
+
249
+ # Specify vector with conversion key
250
+ conversion_scheme <- conversion_schemes [[i ]]
251
+
252
+ # Specify columns that will be converted
253
+ columns <- grepl(hierarchical_levels [[i ]],colnames(interpreter_matrix ))
254
+
255
+ # Apply function to convert from character to integer values
256
+ converted_labels <- apply(interpreter_matrix [,columns ], 2 , convert.labels , conversion_scheme )
257
+
258
+ # Convert to data frame
259
+ interpreter_matrix [,columns ] <- as.data.frame(converted_labels )
260
+ }
261
+
262
+ # Specify hierarchical levels
263
+ hierarchical_level <- c(" gtype1_20" ," gtype1_5" ," htype1" ," htypegr1" )
264
+
265
+ # Specify hierarchical levels
266
+ hierarchical_levels <- c(" gtype1" ," htype1" ," htypegr1" )
267
+
268
+ # Specify interpreters
269
+ interpreters <- c(" A5" ," F5" ," D5" ," C5" ," B5" ," E5" ," X5" ," I20" ," K20" ," H20" ," G20" ," L20" ," J20" ," X20" )
270
+
271
+ # Create matrix for storing results
272
+ output_matrix <- expand.grid(interpreter = interpreters ,
273
+ hierarchicallevel = hierarchical_levels )
274
+
275
+ # Create column for map scale
276
+ output_matrix $ mapscale <- ifelse(output_matrix $ interpreter %in% c(" A5" ," F5" ," D5" ," C5" ," B5" ," E5" ," X5" ), 5 , 20 )
277
+
278
+ # Create column for interpreter error
279
+ output_matrix $ interpreter_error <- NA
280
+
281
+ # Create column for interpreter ecological distance
282
+ output_matrix $ interpreter_ED <- NA
283
+
284
+ # Create column for number of classes
285
+ output_matrix $ interpreter_classes <- NA
286
+
287
+ # Create data frame for storing spatial error results
288
+ spatial_error <- as.data.frame(matrix (NA , nrow(output_matrix ), nrow(test_data )))
289
+
290
+ # Create data frame for storing spatial ecological distance results
291
+ spatial_ED <- as.data.frame(matrix (NA , nrow(output_matrix ), nrow(test_data )))
292
+
293
+ # Create list for storing confusion matrices
294
+ confusion_list <- list ()
295
+
296
+ # Soecify start row
297
+ start_row <- 1
298
+
299
+ # Specify end row
300
+ end_row <- nrow(output_matrix )
301
+
302
+ # Generate results for each interpreter
303
+ for (i in start_row : end_row ) {
304
+
305
+ # Specify interpreter
306
+ interpreter <- output_matrix $ interpreter [i ]
307
+
308
+ # Specify hierarchical level
309
+ hierarchicallevel <- output_matrix $ hierarchicallevel [i ]
310
+
311
+ # Specify map scale
312
+ mapscale <- output_matrix $ mapscale [i ]
313
+
314
+ # Select column based on interpreter, map scale, and hierarchical level
315
+ column_name <- paste(hierarchicallevel ," _" ,interpreter , sep = " " )
316
+ if (hierarchicallevel == " gtype1" ){
317
+ if (mapscale == 5 ){
318
+ column_name <- paste(hierarchicallevel ," _5_" ,interpreter , sep = " " )
319
+ test <- as.factor(test_data [,c(" gtype1_5" )])
320
+ }
321
+ else {
322
+ column_name <- paste(hierarchicallevel ," _20_" ,interpreter , sep = " " )
323
+ test <- as.factor(test_data [,c(" gtype1_20" )])
324
+ }
325
+ } else {
326
+ test <- as.factor(test_data [,as.character(hierarchicallevel )])
327
+ }
328
+
329
+ # Convert the vector from numeric to factor
330
+ interpreter_vector <- as.factor(interpreter_matrix [,column_name ])
331
+
332
+ # Harmonize the classes in the test data and model predictions and opposite
333
+ test <- factor (test , unique(c(levels(interpreter_vector ), levels(test ))))
334
+ interpreter_vector <- factor (interpreter_vector , unique(c(levels(interpreter_vector ), levels(test ))))
335
+
336
+ # Compute error
337
+ output_matrix $ interpreter_error [i ] <- 1 - (sum(interpreter_vector == test )/ length(interpreter_vector ))
338
+
339
+ # Generate the confusion matrix
340
+ confusion_matrix <- confusionMatrix(interpreter_vector , test )$ table
341
+
342
+ # Store the confusion matrix in a list
343
+ confusion_list [[i ]] <- confusion_matrix
344
+
345
+ # Specify column (interpreter, map scale, and hierarchical level)
346
+ test_column <- gsub(paste(" _" ,interpreter , sep = " " ), " " , column_name )
347
+
348
+ # Specify the relevant ecological distance matrix
349
+ ED_matrix <- conversion_list [[which(hierarchical_level == test_column )]][as.numeric(colnames(confusion_matrix )),as.numeric(colnames(confusion_matrix ))]
350
+
351
+ # Compute ecological distance
352
+ output_matrix $ interpreter_ED [i ] <- sum(confusion_matrix * ED_matrix )/ sum(confusion_matrix )
353
+
354
+ # Store the number of classes
355
+ output_matrix $ interpreter_classes [i ] <- length(levels(interpreter_vector ))
356
+
357
+ # Compute error for each test point
358
+ spatial_error [i ,] <- as.numeric(interpreter_vector == test )
359
+
360
+ # Compute ecological distance for each test point
361
+ spatial_ED [i ,] <- sapply(1 : length(test ), compute.ED , test_data , test_column , interpreter_vector )
362
+ }
363
+
364
+ # Save files
365
+ # write.csv(output_matrix, "results/interpreter_results.csv")
366
+ # write.csv(spatial_error, "results/spatial_interpreter_error.csv")
367
+ # write.csv(spatial_ED, "results/spatial_interpreter_ED.csv")
368
+
369
+ # Save confusion matrices
370
+ # saveRDS(confusion_list, "results/interpreter_confusion.rds")
0 commit comments