forked from seanyx/lake-ice-classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClassification.Rmd
86 lines (61 loc) · 2.94 KB
/
Classification.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
---
title: "Classification"
author: "Xiao Yang"
date: "8/22/2019"
output: html_document
---
```{r setup, include=FALSE}
require(tidyverse)
require(rpart)
```
```{r}
dat = read_csv("data/outputTraining_11042019.csv") %>%
select(-`system:index`)
# mutate(scl_int = factor(scl_int, levels = 1:4, labels = c("Thin ice", "Cloud", "Opaque ice/snow", "Water")))
dat %>% select(class) %>% distinct()
target_classes = c("opaque_ice", "clear_ice", "FSI_clouds", "water", "snow")
datfil = dat %>%
filter(class %in% target_classes) %>%
filter(fmask != 0) %>%
mutate(class_factor = factor(class, levels = target_classes, labels = c("Opaque Ice", "Clear Ice", "Cloud", "Water", "Snow")),
class_factor_sim = factor(class, levels = target_classes, labels = c("Snow/Ice", "Snow/Ice", "Cloud", "Water", "Snow/Ice")),
fmask_factor = factor(fmask, levels = c(3, 2, 4, 1), labels = c("Snow/Ice", "Cloud", "Cloud", "Water")),
sensor_id = factor(substr(LANDSAT, start = 3, stop = 3), levels = c("5", "7", "8"), labels = c("TM", "ETM+", "OLI")))
# filter(sensor_id == "TM")
datfil %>% ggplot() + geom_boxplot(aes(x = class_factor, y = Nir_stdDev))
datfil %>% ggplot() + geom_boxplot(aes(x = class_factor_sim, y = Swir1))
datfil %>% ggplot() + geom_boxplot(aes(x = class_factor_sim, y = Blue_log10_grad))
fit = rpart(class_factor_sim ~ Blue + Red + Green + Nir + Swir1 + Swir2 + Nir_stdDev + Blue_stdDev + Nir_log10_grad + Blue_log10_grad + WICI + hue + saturation + value + sensor_id, method = "class", data = datfil)
fitf = randomForest(class_factor_sim ~ Blue + Red + Green + Nir + Swir1 + Swir2 + Nir_stdDev + Blue_stdDev + Nir_log10_grad + Blue_log10_grad + WICI + hue + saturation + value + sensor_id, data = datfil)
varImpPlot(fitf, type = 2)
importance(fitf, type = 1)
# plot tree
plot(fit, uniform=F, branch = 0, compress = F,
main="Classification Tree for lake surface types")
text(fit, use.n=TRUE, cex=.8, all = F)
printcp(fit) # display the results
plotcp(fit) # visualize cross-validation results
summary(fit) # detailed summary of splits
# create additional plots
# par(mfrow=c(1,2)) # two plots on one page
rsq.rpart(fit) # visualize cross-validation results
datfil %>%
filter(Swir1 < 0.0929) %>%
ggplot() +
geom_point(aes(x = Blue, y = Blue_log10_grad, color = class_factor_sim), alpha = 0.3, size = 0.2) +
facet_wrap(~class_factor_sim)
## confusion matrix
predicted_sim_class = predict(fit, newdata = datfil, type = "class")
reference_sim_class = datfil$class_factor_sim
caret::confusionMatrix(data = predicted_sim_class, reference = reference_sim_class)
fmask_sim_class = datfil$fmask_factor
caret::confusionMatrix(data = fmask_sim_class, reference = reference_sim_class)
```
## example of using R tree in GEE
https://code.earthengine.google.com/af98af92f61b76874d55b680e0382831
## lab
```{r}
require(party)
fit_ctree = ctree(scl_int ~ ., data = dat)
# plot(fit_ctree, main="Conditional Inference Tree for lake surface types")
```