Skip to content

Commit 27e5963

Browse files
Merge pull request #10 from trias-project/craete-new-cubes-with-kingdom
Craete new cubes with kingdom
2 parents ca526b8 + c725759 commit 27e5963

File tree

5 files changed

+39
-35
lines changed

5 files changed

+39
-35
lines changed

data/raw/gbif_downloads.tsv

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
gbif_download_key input_checklist gbif_download_created gbif_download_status gbif_download_doi
2-
0003809-190415153152247 NA 2019-04-24 17:52:31 SUCCEEDED https://doi.org/10.15468/dl.7gwm6j
3-
0008507-190621201848488 NA 2019-07-09 08:31:24 SUCCEEDED https://doi.org/10.15468/dl.1eycss
4-
0030713-190918142434337 NA 2019-10-28 09:05:48 SUCCEEDED https://doi.org/10.15468/dl.g1z7y7
5-
0000537-200127171203522 NA 2020-01-28 14:23:32 SUCCEEDED https://doi.org/10.15468/dl.apwtzv
6-
0002883-200127171203522 NA 2020-02-01 18:40:05 SUCCEEDED https://doi.org/10.15468/dl.oztfun
7-
0003154-200613084148143 NA 2020-06-17 08:36:35 SUCCEEDED https://doi.org/10.15468/dl.97jyjt
8-
0003160-200613084148143 NA 2020-06-17 08:52:20 SUCCEEDED https://doi.org/10.15468/dl.as99qq
9-
0123848-200613084148143 NA 2020-11-27 14:29:15 SUCCEEDED https://doi.org/10.15468/dl.49ksep
10-
0124676-200613084148143 NA 2020-11-28 19:34:08 SUCCEEDED https://doi.org/10.15468/dl.v5xe9r
2+
0003809-190415153152247 NA 2019-04-24T17:52:31Z SUCCEEDED https://doi.org/10.15468/dl.7gwm6j
3+
0008507-190621201848488 NA 2019-07-09T08:31:24Z SUCCEEDED https://doi.org/10.15468/dl.1eycss
4+
0030713-190918142434337 NA 2019-10-28T09:05:48Z SUCCEEDED https://doi.org/10.15468/dl.g1z7y7
5+
0000537-200127171203522 NA 2020-01-28T14:23:32Z SUCCEEDED https://doi.org/10.15468/dl.apwtzv
6+
0002883-200127171203522 NA 2020-02-01T18:40:05Z SUCCEEDED https://doi.org/10.15468/dl.oztfun
7+
0003154-200613084148143 NA 2020-06-17T08:36:35Z SUCCEEDED https://doi.org/10.15468/dl.97jyjt
8+
0003160-200613084148143 NA 2020-06-17T08:52:20Z SUCCEEDED https://doi.org/10.15468/dl.as99qq
9+
0123848-200613084148143 NA 2020-11-27T14:29:15Z SUCCEEDED https://doi.org/10.15468/dl.49ksep
10+
0124676-200613084148143 NA 2020-11-28T19:34:08Z SUCCEEDED https://doi.org/10.15468/dl.v5xe9r
11+
0173465-210914110416597 2022-03-05T15:42:33Z SUCCEEDED https://doi.org/10.15468/dl.8aasr6
12+
0174724-210914110416597 2022-03-07T14:46:49Z SUCCEEDED https://doi.org/10.15468/dl.kmh5hs
13+
0174958-210914110416597 2022-03-07T17:10:00Z SUCCEEDED https://doi.org/10.15468/dl.b22yu3

src/1_download.Rmd

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ library(lubridate) # To work with dates
3737
We define country we want to get data cube:
3838

3939
```{r define_countries}
40-
countries <- c("LT")
40+
countries <- c("IT")
4141
```
4242

4343
## Basis of record
@@ -83,15 +83,15 @@ Trigger download:
8383

8484
```{r trigger_gbif_download}
8585
# Reuse existing download (comment to trigger new download)
86-
# gbif_download_key <- "0124676-200613084148143"
86+
gbif_download_key <- "0173465-210914110416597"
8787
8888
# Trigger new download (commented by default)
89-
gbif_download_key <- occ_download(
90-
pred_in("country", countries),
91-
pred_in("basisOfRecord", basis_of_record),
92-
pred_gte("year", year_begin),
93-
pred_lte("year", year_end),
94-
pred("hasCoordinate", hasCoordinate))
89+
# gbif_download_key <- occ_download(
90+
# pred_in("country", countries),
91+
# pred_in("basisOfRecord", basis_of_record),
92+
# pred_gte("year", year_begin),
93+
# pred_lte("year", year_end),
94+
# pred("hasCoordinate", hasCoordinate),
9595
# user = rstudioapi::askForPassword("GBIF username"),
9696
# pwd = rstudioapi::askForPassword("GBIF password"),
9797
# email = rstudioapi::askForPassword("Email address for notification")

src/2_create_db.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ library(RSQLite) # To interact with SQlite databases
3737
Define key returned in `1_download.Rmd` and country:
3838

3939
```{r define_key_countries}
40-
key <- "0003154-200613084148143"
41-
countries <- c("BE")
40+
key <- "0173465-210914110416597"
41+
countries <- c("IT")
4242
```
4343

4444
Download the occurrences from GBIF:

src/3_assign_grid.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ library(RSQLite) # To interact with SQlite databases
3535
Define key returned in `1_download.Rmd` and country:
3636

3737
```{r define_key_countries}
38-
key <- "0003154-200613084148143"
39-
countries <- c("BE")
38+
key <- "0173465-210914110416597"
39+
countries <- c("IT")
4040
```
4141

4242
Name and path of `.sqlite` file:

src/4_aggregate.Rmd

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ library(RSQLite) # To interact with SQlite databases
3333
Define key returned in `1_download.Rmd` and country:
3434

3535
```{r define_key_countries}
36-
key <- "0003154-200613084148143"
37-
countries <- c("BE")
36+
key <- "0173465-210914110416597"
37+
countries <- c("IT")
3838
```
3939

4040
Name and path of `.sqlite` file:
@@ -62,7 +62,7 @@ sqlite_occ <- dbConnect(SQLite(), dbname = sqlite_path)
6262

6363
Species, synonyms of that species and infraspecific taxa of that species all share the same `speciesKey`.
6464

65-
For speeding up the aggregation, we create an index on `year`, `eea_cell_code` and `speciesKey` if not present:
65+
For speeding up the aggregation, we create an index on `year`, `eea_cell_code` and `speciesKey` if not present:
6666

6767
```{r create_idx_speciesKey}
6868
idx_species_year_cell <- "idx_species_year_cell"
@@ -106,8 +106,8 @@ occ_cube_species <-
106106
dbGetQuery(sqlite_occ, query) %>%
107107
rename(
108108
n = "COUNT(_ROWID_)",
109-
min_coord_uncertainty = "MIN(`coordinateUncertaintyInMeters`)"
110-
)
109+
min_coord_uncertainty = "MIN(`coordinateUncertaintyInMeters`)") %>%
110+
mutate(speciesKey = as.numeric(speciesKey))
111111
```
112112

113113
Preview:
@@ -120,7 +120,7 @@ Number of occurrences linked to taxa with higher rank than species:
120120

121121
```{r occs_higher_rank_than_species}
122122
occ_cube_species %>%
123-
filter(speciesKey == 0) %>%
123+
filter(speciesKey == 0 | is.na(speciesKey)) %>%
124124
select(n) %>%
125125
colSums()
126126
```
@@ -130,14 +130,14 @@ We will discard them:
130130
```{r remove_occs_with_higher_rank}
131131
occ_cube_species <-
132132
occ_cube_species %>%
133-
filter(speciesKey != 0)
133+
filter(speciesKey != 0 & !is.na(speciesKey))
134134
```
135135

136136
## Map taxa
137137

138-
Grouping by `speciesKey`, we loose informations about which taxa share the same `speciesKey`. This information could be sometimes helpful. We extract it in a separate data.frame, `taxa_species`.
138+
Grouping by `speciesKey`, we loose informations about which taxa share the same `speciesKey`. This information could be sometimes helpful. We extract it in a separate data.frame, `taxa_species`.
139139

140-
For speeding up the extraction, we create an index on `speciesKey`, `taxonKey` and `scientificName` if not present:
140+
For speeding up the extraction, we create an index on `speciesKey`, `taxonKey` and `scientificName` if not present:
141141

142142
```{r create_idx_speciesKey}
143143
idx_species_taxon_name <- "idx_species_taxon_name"
@@ -227,7 +227,7 @@ occ_cube_species_taxa %>%
227227
arrange(speciesKey, taxonKey)
228228
```
229229

230-
We create `taxa_species` by adding the taxonomic rank, `SPECIES`, and the taxonomic status of the species, one of `ACCEPTED` or `DOUBTFUL`, and create a column called `include` which contains all taxa whose occurrences are linked to the species:
230+
We create `taxa_species` by adding the taxonomic rank, `SPECIES`, and the taxonomic status of the species, one of `ACCEPTED` or `DOUBTFUL`, and create a column called `include` which contains all taxa whose occurrences are linked to the species:
231231

232232
```{r make_taxa_species}
233233
taxa_species <-
@@ -249,7 +249,7 @@ taxa_species <-
249249
reduce(full_join) %>%
250250
251251
# select columns of interest
252-
select(speciesKey, scientificName, rank, taxonomicStatus) %>%
252+
select(speciesKey, scientificName, rank, taxonomicStatus, kingdom) %>%
253253
254254
# rename 'scientificName' to 'species_scientificName'
255255
rename(species_scientificName = scientificName) %>%
@@ -261,7 +261,8 @@ taxa_species <-
261261
group_by(speciesKey,
262262
species_scientificName,
263263
rank,
264-
taxonomicStatus) %>%
264+
taxonomicStatus,
265+
kingdom) %>%
265266
266267
# create 'includes' column
267268
summarize(includes = paste(
@@ -304,11 +305,11 @@ taxa_species_filename <- paste0(
304305
".csv"
305306
)
306307
write_csv(taxa_species,
307-
path = here::here("data",
308+
file = here::here("data",
308309
"processed",
309310
taxa_species_filename),
310311
na = "",
311-
quote_escape = FALSE
312+
escape = "none"
312313
)
313314
```
314315

0 commit comments

Comments
 (0)