Merge branch 'dev'

gbif · Jan 4, 2024 · bbcc17e · bbcc17e
2 parents ae30c6e + a4b31f7
commit bbcc17e
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 74 deletions.
diff --git a/registry-persistence/src/main/resources/liquibase/129-duplicate-dataset-key-constraint.xml b/registry-persistence/src/main/resources/liquibase/129-duplicate-dataset-key-constraint.xml
@@ -0,0 +1,13 @@
+<databaseChangeLog
+    xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-2.0.xsd">
+
+  <changeSet id="129" author="mlopez" runInTransaction="false">
+    <sql splitStatements="false" stripComments="false">
+      <![CDATA[
+        ALTER TABLE dataset ADD CONSTRAINT duplicate_key_not_equal_key CHECK(duplicate_of_dataset_key != key)
+      ]]>
+    </sql>
+  </changeSet>
+</databaseChangeLog>
diff --git a/registry-persistence/src/main/resources/liquibase/master.xml b/registry-persistence/src/main/resources/liquibase/master.xml
@@ -134,4 +134,5 @@
   <include file="liquibase/126-wikidata-ncbi-identifier-types.xml" />
   <include file="liquibase/127-organization-country-not-null.xml" />
   <include file="liquibase/128-parent-code-occurrence-mapping.xml" />
+  <include file="liquibase/129-duplicate-dataset-key-constraint.xml" />
 </databaseChangeLog>
diff --git a/...-search/src/main/java/org/gbif/registry/search/dataset/indexing/DatasetJsonConverter.java b/...-search/src/main/java/org/gbif/registry/search/dataset/indexing/DatasetJsonConverter.java
@@ -88,11 +88,6 @@ public class DatasetJsonConverter {
 
   private static final int MAX_FACET_LIMIT = 1200000;
 
-  // Collections
-  private static final String PROCESSING_NAMESPACE = "processing.gbif.org";
-  private static final String INSTITUTION_TAG_NAME = "institutionCode";
-  private static final String COLLECTION_TAG_NAME = "collectionCode";
-
   // Gridded datasets
   private static final String GRIDDED_DATASET_NAMESPACE = "griddedDataSet.jwaller.gbif.org";
   private static final String GRIDDED_DATASET_NAME = "griddedDataset";
@@ -405,27 +400,6 @@ private void addTaxonKeys(Dataset dataset, ObjectNode datasetObjectNode) {
   }
 
   private void addMachineTags(Dataset dataset, ObjectNode datasetObjectNode) {
-    datasetObjectNode
-        .putArray("institutionKey")
-        .addAll(
-            dataset.getMachineTags().stream()
-                .filter(
-                    mt ->
-                        PROCESSING_NAMESPACE.equals(mt.getNamespace())
-                            && INSTITUTION_TAG_NAME.equals(mt.getName()))
-                .map(v -> new TextNode(v.getValue().split(":")[0]))
-                .collect(Collectors.toList()));
-    datasetObjectNode
-        .putArray("collectionKey")
-        .addAll(
-            dataset.getMachineTags().stream()
-                .filter(
-                    mt ->
-                        PROCESSING_NAMESPACE.equals(mt.getNamespace())
-                            && COLLECTION_TAG_NAME.equals(mt.getName()))
-                .map(v -> new TextNode(v.getValue().split(":")[0]))
-                .collect(Collectors.toList()));
-
     // Gridded dataset
     dataset.getMachineTags().stream()
         .filter(

diff --git a/registry-search/src/main/resources/dataset-es-mapping.json b/registry-search/src/main/resources/dataset-es-mapping.json
@@ -136,8 +136,6 @@
     "programmeAcronym": {"type": "keyword", "copy_to": "all"},
     "keyword": {"type": "keyword", "copy_to": "all"},
     "doi": {"type": "keyword", "copy_to": "all"},
-    "institutionKey": {"type": "keyword"},
-    "collectionKey": {"type": "keyword"},
     "installationKey": {"type": "keyword"},
     "publishingOrganizationKey": {"type": "keyword"},
     "publishingOrganizationTitle": {"type": "text", "copy_to": "all"},

diff --git a/registry-ws/src/main/java/org/gbif/registry/ws/resources/DatasetResource.java b/registry-ws/src/main/java/org/gbif/registry/ws/resources/DatasetResource.java
@@ -45,13 +45,7 @@
 import org.gbif.api.service.registry.DatasetSearchService;
 import org.gbif.api.service.registry.DatasetService;
 import org.gbif.api.util.iterables.Iterables;
-import org.gbif.api.vocabulary.Continent;
-import org.gbif.api.vocabulary.Country;
-import org.gbif.api.vocabulary.DatasetSubtype;
-import org.gbif.api.vocabulary.DatasetType;
-import org.gbif.api.vocabulary.IdentifierType;
-import org.gbif.api.vocabulary.License;
-import org.gbif.api.vocabulary.MetadataType;
+import org.gbif.api.vocabulary.*;
 import org.gbif.common.messaging.api.MessagePublisher;
 import org.gbif.common.messaging.api.messages.Platform;
 import org.gbif.common.messaging.api.messages.StartCrawlMessage;
@@ -237,13 +231,6 @@ public DatasetResource(
   @Retention(RetentionPolicy.RUNTIME)
   @Parameters(
       value = {
-        @Parameter(
-            name = "country",
-            description =
-                "The 2-letter country code (as per ISO-3166-1) of the country publishing the dataset.",
-            schema = @Schema(implementation = Country.class),
-            in = ParameterIn.QUERY,
-            explode = Explode.FALSE),
         @Parameter(
             name = "type",
             description = "The primary type of the dataset.",
@@ -256,24 +243,6 @@ public DatasetResource(
             schema = @Schema(implementation = DatasetSubtype.class),
             in = ParameterIn.QUERY,
             explode = Explode.TRUE),
-        @Parameter(
-            name = "license",
-            description = "The dataset's licence.",
-            schema = @Schema(implementation = License.class),
-            in = ParameterIn.QUERY,
-            explode = Explode.TRUE),
-        @Parameter(
-            name = "identifier",
-            description = "An identifier such as a DOI or UUID.",
-            schema = @Schema(implementation = String.class),
-            in = ParameterIn.QUERY),
-        @Parameter(
-            name = "keyword",
-            description =
-                "Filters datasets by a case insensitive plain text keyword. The search is done on the merged "
-                    + "collection of tags, the dataset keywordCollections and temporalCoverages.",
-            schema = @Schema(implementation = String.class),
-            in = ParameterIn.QUERY),
         @Parameter(
             name = "publishingOrg",
             description = "Filters datasets by their publishing organization UUID key",
@@ -285,9 +254,11 @@ public DatasetResource(
             schema = @Schema(implementation = UUID.class),
             in = ParameterIn.QUERY),
         @Parameter(
-            name = "endorsingNodeKey",
-            description = "Node key that endorsed this dataset's publisher",
-            schema = @Schema(implementation = UUID.class),
+            name = "keyword",
+            description =
+                "Filters datasets by a case insensitive plain text keyword. The search is done on the merged "
+                    + "collection of tags, the dataset keywordCollections and temporalCoverages.",
+            schema = @Schema(implementation = String.class),
             in = ParameterIn.QUERY),
         @Parameter(
             name = "decade",
@@ -304,14 +275,6 @@ public DatasetResource(
             schema = @Schema(implementation = Country.class),
             in = ParameterIn.QUERY,
             explode = Explode.FALSE),
-        @Parameter(
-            name = "projectId",
-            description =
-                "Filter or facet based on the project ID of a given dataset. A dataset can have a project id if "
-                    + "it is the result of a project. multiple datasets can have the same project id.",
-            schema = @Schema(implementation = String.class),
-            in = ParameterIn.QUERY,
-            example = "AA003-AA003311F"),
         @Parameter(
             name = "hostingCountry",
             description =
@@ -326,11 +289,64 @@ public DatasetResource(
             in = ParameterIn.QUERY,
             deprecated = true,
             explode = Explode.FALSE),
+        @Parameter(
+            name = "license",
+            description = "The dataset's licence.",
+            schema = @Schema(implementation = License.class),
+            in = ParameterIn.QUERY,
+            explode = Explode.TRUE),
+        @Parameter(
+            name = "projectId",
+            description =
+                "Filter or facet based on the project ID of a given dataset. A dataset can have a project id if "
+                    + "it is the result of a project. multiple datasets can have the same project id.",
+            schema = @Schema(implementation = String.class),
+            in = ParameterIn.QUERY,
+            example = "AA003-AA003311F"),
+        @Parameter(
+            name = "taxonKey",
+            description = "A taxon key from the GBIF backbone.",
+            schema = @Schema(implementation = Integer.class),
+            in = ParameterIn.QUERY),
+        @Parameter(
+            name = "recordCount",
+            description =
+                "Number of records of the dataset. Accepts ranges and a '*' can be used as a wildcard.",
+            schema = @Schema(implementation = String.class),
+            in = ParameterIn.QUERY,
+            example = "100,*"),
+        @Parameter(
+            name = "modifiedDate",
+            description =
+                "Date when the dataset was modified the last time. Accepts ranges and a '*' can be used as a wildcard.",
+            schema = @Schema(implementation = String.class),
+            in = ParameterIn.QUERY,
+            example = "2022-05-01,*"),
+        @Parameter(
+            name = "doi",
+            description = "A DOI identifier.",
+            schema = @Schema(implementation = String.class),
+            in = ParameterIn.QUERY),
         @Parameter(
             name = "networkKey",
             description = "Network associated to a dataset",
             schema = @Schema(implementation = UUID.class),
             in = ParameterIn.QUERY),
+        @Parameter(
+          name = "endorsingNodeKey",
+          description = "Node key that endorsed this dataset's publisher",
+          schema = @Schema(implementation = UUID.class),
+          in = ParameterIn.QUERY),
+        @Parameter(
+          name = "installationKey",
+          description = "Key of the installation that hosts the dataset.",
+          schema = @Schema(implementation = UUID.class),
+          in = ParameterIn.QUERY),
+        @Parameter(
+          name = "endpointType",
+          description = "Type of the endpoint of the dataset.",
+          schema = @Schema(implementation = EndpointType.class),
+          in = ParameterIn.QUERY),
         @Parameter(name = "request", hidden = true),
         @Parameter(name = "searchRequest", hidden = true),
         @Parameter(name = "suggestRequest", hidden = true)
@@ -340,9 +356,9 @@ public DatasetResource(
   @Target({ElementType.METHOD, ElementType.TYPE})
   @Retention(RetentionPolicy.RUNTIME)
   @Parameter(
-    name = "metadataKey",
-    description = "Key for the *metadata document* (not a dataset UUID).",
-    in = ParameterIn.PATH)
+      name = "metadataKey",
+      description = "Key for the *metadata document* (not a dataset UUID).",
+      in = ParameterIn.PATH)
   @interface MetadataDocumentKeyParameter {}
 
   @Operation(