Issue #404/#481 test_vectorcube streamlining

soxofaan · soxofaan · commit 54fa08b79b72 · 2023-10-18T15:07:04.000+02:00
- use `dummy_backend` for more compact setup
- cover more combinations of server-side support, `auto_validate` and explicit `validate`
- leverage fixture parameterization more to avoid custom fixtures
diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py
@@ -1568,7 +1568,7 @@ def execute(
             json=pg_with_metadata,
             expected_status=200,
             timeout=timeout or DEFAULT_TIMEOUT_SYNCHRONOUS_EXECUTE,
-        ).json()
+        ).json()  # TODO: only do JSON decoding when mimetype is actually JSON?
 
     def create_job(
         self,
diff --git a/tests/rest/conftest.py b/tests/rest/conftest.py
@@ -72,7 +72,10 @@ def assert_oidc_device_code_flow(url: str = "https://oidc.test/dc", elapsed: flo
 
 @pytest.fixture
 def api_capabilities() -> dict:
-    """Fixture to be overridden for customizing the capabilities doc used by connection fixtures."""
+    """
+    Fixture to be overridden for customizing the capabilities doc used by connection fixtures.
+    To be used as kwargs for `build_capabilities`
+    """
     return {}
 
 
diff --git a/tests/rest/datacube/conftest.py b/tests/rest/datacube/conftest.py
@@ -67,29 +67,19 @@ def setup_collection_metadata(requests_mock, cid: str, bands: List[str]):
     })
 
 
-@pytest.fixture
-def support_udp() -> bool:
-    """Per-test overridable `build_capabilities_kwargs(udp=...)` value for connection fixtures"""
-    return False
-
 
 @pytest.fixture
-def connection(api_version, requests_mock) -> Connection:
+def connection(api_version, requests_mock, api_capabilities) -> Connection:
     """Connection fixture to a backend of given version with some image collections."""
-    return _setup_connection(api_version, requests_mock)
+    return _setup_connection(api_version, requests_mock, build_capabilities_kwargs=api_capabilities)
 
 
 @pytest.fixture
-def con100(requests_mock, support_udp) -> Connection:
+def con100(requests_mock, api_capabilities) -> Connection:
     """Connection fixture to a 1.0.0 backend with some image collections."""
-    return _setup_connection("1.0.0", requests_mock, build_capabilities_kwargs={"udp": support_udp})
+    return _setup_connection("1.0.0", requests_mock, build_capabilities_kwargs=api_capabilities)
 
 
-@pytest.fixture
-def connection_with_pgvalidation_datacube(api_version, requests_mock) -> Connection:
-    """Connection fixture to a backend that supports validation of the process graph."""
-    return _setup_connection("1.0.0", requests_mock, build_capabilities_kwargs={"udp": support_udp, "validation": True})
-
 
 @pytest.fixture
 def s2cube(connection, api_version) -> DataCube:
diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py
@@ -4,20 +4,20 @@
 - 1.0.0-style DataCube
 
 """
-import json
 import pathlib
 from datetime import date, datetime
 from unittest import mock
 
 import numpy as np
 import pytest
-import requests
 import shapely
 import shapely.geometry
 
 from openeo.rest import BandMathException
+from openeo.rest._testing import build_capabilities
 from openeo.rest.connection import Connection
 from openeo.rest.datacube import DataCube
+from openeo.util import dict_no_none
 
 from ... import load_json_resource
 from .. import get_download_graph
@@ -812,94 +812,176 @@ def test_save_result_format_options_vs_execute_batch(elf, s2cube, get_create_job
         }
 
 
-class TestProcessGraphValidation:
-    JOB_ID = "j-123"
-    PROCESS_GRAPH_DICT = {"add1": {"process_id": "add", "arguments": {"x": 3, "y": 5}, "result": True}}
-    PROCESS_GRAPH_STRING = json.dumps(PROCESS_GRAPH_DICT)
+class TestDataCubeValidation:
+    """
+    Test (auto) validation of datacube execution with `download`, `execute`, ...
+    """
 
-    @pytest.fixture
-    def cube_add(self, requests_mock, connection_with_pgvalidation_datacube: Connection) -> DataCube:
-        requests_mock.post(API_URL + "/result", content=self._post_result_handler_json)
-        return connection_with_pgvalidation_datacube.datacube_from_json(self.PROCESS_GRAPH_STRING)
+    _PG_S2 = {
+        "loadcollection1": {
+            "process_id": "load_collection",
+            "arguments": {"id": "S2", "spatial_extent": None, "temporal_extent": None},
+            "result": True,
+        },
+    }
+    _PG_S2_SAVE = {
+        "loadcollection1": {
+            "process_id": "load_collection",
+            "arguments": {"id": "S2", "spatial_extent": None, "temporal_extent": None},
+        },
+        "saveresult1": {
+            "process_id": "save_result",
+            "arguments": {"data": {"from_node": "loadcollection1"}, "format": "GTiff", "options": {}},
+            "result": True,
+        },
+    }
 
-    def _post_jobs_handler_json(self, response: requests.Request, context):
-        context.headers["OpenEO-Identifier"] = self.JOB_ID
-        return b""
+    @pytest.fixture(params=[False, True])
+    def auto_validate(self, request) -> bool:
+        """Fixture to parametrize auto_validate setting."""
+        return request.param
 
-    def _post_result_handler_json(self, response: requests.Request, context):
-        pg = response.json()["process"]["process_graph"]
-        assert pg == self.PROCESS_GRAPH_DICT
-        return b'{"answer": 8}'
+    @pytest.fixture
+    def connection(self, api_version, requests_mock, api_capabilities, auto_validate) -> Connection:
+        requests_mock.get(API_URL, json=build_capabilities(api_version=api_version, **api_capabilities))
+        con = Connection(API_URL, **dict_no_none(auto_validate=auto_validate))
+        return con
+
+    @pytest.fixture(autouse=True)
+    def dummy_backend_setup(self, dummy_backend):
+        dummy_backend.next_validation_errors = [{"code": "NoAdd", "message": "Don't add numbers"}]
+
+    # Reusable list of (fixture) parameterization
+    # of ["api_capabilities", "auto_validate", "validate", "validation_expected"]
+    _VALIDATION_PARAMETER_SETS = [
+        # No validation supported by backend: don't attempt to validate
+        ({}, None, None, False),
+        ({}, True, True, False),
+        # Validation supported by backend, default behavior -> validate
+        ({"validation": True}, None, None, True),
+        # (Validation supported by backend) no explicit validation enabled: follow auto_validate setting
+        ({"validation": True}, True, None, True),
+        ({"validation": True}, False, None, False),
+        # (Validation supported by backend) follow explicit `validate` toggle regardless of auto_validate
+        ({"validation": True}, False, True, True),
+        ({"validation": True}, True, False, False),
+    ]
 
-    @pytest.mark.parametrize("validate", [True, False])
-    def test_create_job_with_pg_validation(
-        self,
-        requests_mock,
-        connection_with_pgvalidation_datacube: Connection,
-        validate,
-    ):
+    @pytest.mark.parametrize(
+        ["api_capabilities", "auto_validate", "validate", "validation_expected"],
+        _VALIDATION_PARAMETER_SETS,
+    )
+    def test_cube_download_validation(self, dummy_backend, connection, validate, validation_expected, caplog, tmp_path):
         """The DataCube should pass through request for the validation to the
         connection and the validation endpoint should only be called when
         validation was requested.
         """
-        m = requests_mock.post(API_URL + "/validation", json={"errors": []})
+        cube = connection.load_collection("S2")
 
-        requests_mock.post(API_URL + "/jobs", status_code=201, content=self._post_jobs_handler_json)
-        cube: DataCube = connection_with_pgvalidation_datacube.load_collection("S2")
-        cube.create_job(validate=validate)
+        output = tmp_path / "result.tiff"
+        cube.download(outputfile=output, **dict_no_none(validate=validate))
+        assert output.read_bytes() == b'{"what?": "Result data"}'
+        assert dummy_backend.get_sync_pg() == self._PG_S2_SAVE
 
-        # Validation should be called if and only if it was requested
-        expected_call_count = 1 if validate else 0
-        assert m.call_count == expected_call_count
+        if validation_expected:
+            assert dummy_backend.validation_requests == [self._PG_S2_SAVE]
+            assert caplog.messages == ["Preflight process graph validation raised: [NoAdd] Don't add numbers"]
+        else:
+            assert dummy_backend.validation_requests == []
+            assert caplog.messages == []
 
-    @pytest.mark.parametrize("validate", [True, False])
-    def test_execute_with_pg_validation(
-        self,
-        requests_mock,
-        cube_add: DataCube,
-        validate,
-    ):
+    @pytest.mark.parametrize("api_capabilities", [{"validation": True}])
+    def test_cube_download_validation_broken(self, dummy_backend, connection, requests_mock, caplog, tmp_path):
+        """Test resilience against broken validation response."""
+        requests_mock.post(
+            connection.build_url("/validation"), status_code=500, json={"code": "Internal", "message": "nope!"}
+        )
+
+        cube = connection.load_collection("S2")
+
+        output = tmp_path / "result.tiff"
+        cube.download(outputfile=output, validate=True)
+        assert output.read_bytes() == b'{"what?": "Result data"}'
+        assert dummy_backend.get_sync_pg() == self._PG_S2_SAVE
+
+        assert caplog.messages == ["Preflight process graph validation failed: [500] Internal: nope!"]
+
+    @pytest.mark.parametrize(
+        ["api_capabilities", "auto_validate", "validate", "validation_expected"],
+        _VALIDATION_PARAMETER_SETS,
+    )
+    def test_cube_execute_validation(self, dummy_backend, connection, validate, validation_expected, caplog):
         """The DataCube should pass through request for the validation to the
         connection and the validation endpoint should only be called when
         validation was requested.
         """
-        m = requests_mock.post(API_URL + "/validation", json={"errors": []})
-        requests_mock.post(API_URL + "/jobs", status_code=201, content=self._post_jobs_handler_json)
-        requests_mock.post(API_URL + "/result", content=self._post_result_handler_json)
+        cube = connection.load_collection("S2")
 
-        cube_add.execute(validate=validate)
+        res = cube.execute(**dict_no_none(validate=validate))
+        assert res == {"what?": "Result data"}
+        assert dummy_backend.get_sync_pg() == self._PG_S2
 
-        # Validation should be called if and only if it was requested
-        expected_call_count = 1 if validate else 0
-        assert m.call_count == expected_call_count
+        if validation_expected:
+            assert dummy_backend.validation_requests == [self._PG_S2]
+            assert caplog.messages == ["Preflight process graph validation raised: [NoAdd] Don't add numbers"]
+        else:
+            assert dummy_backend.validation_requests == []
+            assert caplog.messages == []
 
-    @pytest.mark.parametrize("validate", [True, False])
-    def test_execute_batch_with_pg_validation(
-        self,
-        requests_mock,
-        cube_add: DataCube,
-        validate,
+    @pytest.mark.parametrize(
+        ["api_capabilities", "auto_validate", "validate", "validation_expected"],
+        _VALIDATION_PARAMETER_SETS,
+    )
+    def test_cube_create_job_validation(
+        self, dummy_backend, connection: Connection, validate, validation_expected, caplog
     ):
         """The DataCube should pass through request for the validation to the
         connection and the validation endpoint should only be called when
         validation was requested.
         """
-        m = requests_mock.post(API_URL + "/validation", json={"errors": []})
-        requests_mock.post(API_URL + "/jobs", status_code=201, content=self._post_jobs_handler_json)
-        requests_mock.post(API_URL + f"/jobs/{self.JOB_ID}/results", status_code=202)
-        job_metadata = {
-            "id": self.JOB_ID,
-            "title": f"Job {self.JOB_ID,}",
-            "description": f"Job {self.JOB_ID,}",
-            "process": self.PROCESS_GRAPH_DICT,
-            "status": "finished",
-            "created": "2017-01-01T09:32:12Z",
-            "links": [],
-        }
-        requests_mock.get(API_URL + f"/jobs/{self.JOB_ID}", status_code=200, json=job_metadata)
+        cube = connection.load_collection("S2")
+        job = cube.create_job(**dict_no_none(validate=validate))
+        assert job.job_id == "job-000"
+        assert dummy_backend.get_batch_pg() == self._PG_S2_SAVE
+
+        if validation_expected:
+            assert dummy_backend.validation_requests == [self._PG_S2_SAVE]
+            assert caplog.messages == ["Preflight process graph validation raised: [NoAdd] Don't add numbers"]
+        else:
+            assert dummy_backend.validation_requests == []
+            assert caplog.messages == []
 
-        cube_add.execute_batch(validate=validate)
+    @pytest.mark.parametrize("api_capabilities", [{"validation": True}])
+    def test_cube_create_job_validation_broken(self, dummy_backend, connection, requests_mock, caplog, tmp_path):
+        """Test resilience against broken validation response."""
+        requests_mock.post(
+            connection.build_url("/validation"), status_code=500, json={"code": "Internal", "message": "nope!"}
+        )
 
-        # Validation should be called if and only if it was requested
-        expected_call_count = 1 if validate else 0
-        assert m.call_count == expected_call_count
+        cube = connection.load_collection("S2")
+        job = cube.create_job(validate=True)
+        assert job.job_id == "job-000"
+        assert dummy_backend.get_batch_pg() == self._PG_S2_SAVE
+
+        assert caplog.messages == ["Preflight process graph validation failed: [500] Internal: nope!"]
+
+    @pytest.mark.parametrize(
+        ["api_capabilities", "auto_validate", "validate", "validation_expected"],
+        _VALIDATION_PARAMETER_SETS,
+    )
+    def test_cube_execute_batch_validation(self, dummy_backend, connection, validate, validation_expected, caplog):
+        """The DataCube should pass through request for the validation to the
+        connection and the validation endpoint should only be called when
+        validation was requested.
+        """
+        cube = connection.load_collection("S2")
+        job = cube.execute_batch(**dict_no_none(validate=validate))
+        assert job.job_id == "job-000"
+        assert dummy_backend.get_batch_pg() == self._PG_S2_SAVE
+
+        if validation_expected:
+            assert dummy_backend.validation_requests == [self._PG_S2_SAVE]
+            assert caplog.messages == ["Preflight process graph validation raised: [NoAdd] Don't add numbers"]
+        else:
+            assert dummy_backend.validation_requests == []
+            assert caplog.messages == []
diff --git a/tests/rest/datacube/test_datacube100.py b/tests/rest/datacube/test_datacube100.py
@@ -27,7 +27,6 @@
 from openeo.internal.warnings import UserDeprecationWarning
 from openeo.processes import ProcessBuilder
 from openeo.rest import OpenEoClientException
-from openeo.rest._testing import build_capabilities
 from openeo.rest.connection import Connection
 from openeo.rest.datacube import THIS, UDF, DataCube
 
@@ -1966,9 +1965,9 @@ def test_custom_process_arguments_namespacd(con100: Connection):
     assert res.flat_graph() == expected
 
 
-@pytest.mark.parametrize("support_udp", [True])
+
+@pytest.mark.parametrize("api_capabilities", [{"udp": True}])
 def test_save_user_defined_process(con100, requests_mock):
-    requests_mock.get(API_URL + "/", json=build_capabilities(udp=True))
     requests_mock.get(API_URL + "/processes", json={"processes": [{"id": "add"}]})
 
     expected_body = load_json_resource("data/1.0.0/save_user_defined_process.json")
@@ -1990,9 +1989,8 @@ def check_body(request):
     assert adapter.called
 
 
-@pytest.mark.parametrize("support_udp", [True])
+@pytest.mark.parametrize("api_capabilities", [{"udp": True}])
 def test_save_user_defined_process_public(con100, requests_mock):
-    requests_mock.get(API_URL + "/", json=build_capabilities(udp=True))
     requests_mock.get(API_URL + "/processes", json={"processes": [{"id": "add"}]})
 
     expected_body = load_json_resource("data/1.0.0/save_user_defined_process.json")
diff --git a/tests/rest/datacube/test_vectorcube.py b/tests/rest/datacube/test_vectorcube.py
diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py