diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9a93fda..998047f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] steps: - name: Checkout repository and submodules uses: actions/checkout@v4 @@ -32,8 +32,8 @@ jobs: run: make test - name: Lint with flake8 ⚙️ run: make lint - if: matrix.python-version == 3.7 + if: matrix.python-version == 3.9 - name: Build docs 🏗️ run: make docs - if: matrix.python-version == 3.7 + if: matrix.python-version == 3.9 diff --git a/docs/source/conf.py b/docs/source/conf.py index 1f36816..e866f47 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,7 +41,7 @@ "sphinx.ext.todo", "sphinx.ext.viewcode", "IPython.sphinxext.ipython_console_highlighting", - "nbsphinx", + # "nbsphinx", "pywps.ext_autodoc", ] @@ -49,23 +49,51 @@ # List of all tested working mock imports from all birds so new birds can # inherit without having to test which work which do not. autodoc_mock_imports = [ - "numpy", "xarray", "fiona", "rasterio", "shapely", - "osgeo", "geopandas", "pandas", "statsmodels", - "affine", "rasterstats", "spotpy", "matplotlib", - "scipy", "unidecode", "gdal", "sentry_sdk", "dask", - "numba", "parse", "siphon", "sklearn", "cftime", - "netCDF4", "bottleneck", "ocgis", "geotiff", "geos", - "hdf4", "hdf5", "zlib", "pyproj", "proj", "cartopy", - "scikit-learn", "cairo" + "numpy", + "xarray", + "fiona", + "rasterio", + "shapely", + "osgeo", + "geopandas", + "pandas", + "statsmodels", + "affine", + "rasterstats", + "spotpy", + "matplotlib", + "scipy", + "unidecode", + "gdal", + "sentry_sdk", + "dask", + "numba", + "parse", + "siphon", + "sklearn", + "cftime", + "netCDF4", + "bottleneck", + "ocgis", + "geotiff", + "geos", + "hdf4", + "hdf5", + "zlib", + "pyproj", + "proj", + "cartopy", + "scikit-learn", + "cairo", ] # Monkeypatch constant because the following are mock imports. # Only works if numpy is actually installed and at the same time being mocked. -#import numpy -#numpy.pi = 3.1416 +# import numpy +# numpy.pi = 3.1416 # We are using mock imports in readthedocs, so probably safer to not run the notebooks -nbsphinx_execute = 'never' +nbsphinx_execute = "never" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -112,7 +140,7 @@ todo_include_todos = False # Suppress "WARNING: unknown mimetype for ..." when building EPUB. -suppress_warnings = ['epub.unknown_project_files'] +suppress_warnings = ["epub.unknown_project_files"] # Avoid "configuration.rst:4:duplicate label configuration, other instance in configuration.rst" autosectionlabel_prefix_document = True diff --git a/docs/source/index.rst b/docs/source/index.rst index 318de0c..aed6750 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,7 +6,6 @@ installation configuration - notebooks/index dev_guide processes authors diff --git a/docs/source/notebooks/example.ipynb b/docs/source/notebooks/example.ipynb deleted file mode 100644 index a68d809..0000000 --- a/docs/source/notebooks/example.ipynb +++ /dev/null @@ -1,48 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Usage Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import parrot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/source/notebooks/index.rst b/docs/source/notebooks/index.rst deleted file mode 100644 index cdbc518..0000000 --- a/docs/source/notebooks/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -======== -Examples -======== - -.. toctree:: - :maxdepth: 1 - - example diff --git a/environment.yml b/environment.yml index d43ebef..10d38c4 100644 --- a/environment.yml +++ b/environment.yml @@ -5,9 +5,18 @@ channels: dependencies: - pip - python>=3.8,<3.12 -- pywps>=4.5.1,<4.6 +- pywps>=4.5.2,<4.7 - jinja2 - click - psutil # tests - pytest +# provenance +- prov>=2.0.0 +- pydot +- graphviz +- rdflib +- rdflib-sqlalchemy +- sqlalchemy<2 +- pandas +- pyyaml diff --git a/parrot/db.py b/parrot/db.py new file mode 100644 index 0000000..3868c28 --- /dev/null +++ b/parrot/db.py @@ -0,0 +1,41 @@ +from rdflib import Graph, URIRef +from rdflib.plugins.sparql import prepareQuery + +from pywps import configuration + +# Provide the path to the SQLite database in the local folder +# DB_URL = configuration.get_config_value("provenance", "db_url") +DB_URL = configuration.get_config_value("logging", "database") + + +class GraphDB(object): + def __init__(self): + # Create a graph with a specific backend store + self.graph = Graph( + store="SQLAlchemy", identifier=URIRef("http://example.org/graph") + ) + self.graph.open(DB_URL, create=True) + + def add(self, data): + new_graph = Graph() + new_graph.parse(data=data, format="turtle") + + # add rdf to existing graph + for triple in new_graph: + self.graph.add(triple) + # Commit changes to the store + self.graph.commit() + + def query(self, query_str): + namespaces = { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "foaf": "http://xmlns.com/foaf/0.1/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "prov": "http://www.w3.org/ns/prov#", + "provone": "http://purl.dataone.org/provone/2015/01/15/ontology#", + "dcterms": "http://purl.org/dc/terms/", + "clint": "urn:clint:", + } + query = prepareQuery(query_str, initNs=namespaces) + results = self.graph.query(query) + return results diff --git a/parrot/processes/__init__.py b/parrot/processes/__init__.py index 437441e..7a2dd77 100644 --- a/parrot/processes/__init__.py +++ b/parrot/processes/__init__.py @@ -1,5 +1,7 @@ from .wps_say_hello import SayHello +from .wps_dashboard import Dashboard processes = [ SayHello(), + Dashboard(), ] diff --git a/parrot/processes/wps_dashboard.py b/parrot/processes/wps_dashboard.py new file mode 100644 index 0000000..c2ea766 --- /dev/null +++ b/parrot/processes/wps_dashboard.py @@ -0,0 +1,102 @@ +from pathlib import Path + +from pywps import Process, LiteralInput, ComplexOutput, Format + +from parrot import query + + +class Dashboard(Process): + def __init__(self): + inputs = [ + LiteralInput( + "time", + "Time Period", + abstract="The time period for the report seperated by /" + "Example: 2023-09-01/2023-09-30", + data_type="string", + default="2023-09-01/2023-09-30", + min_occurs=0, + max_occurs=1, + ), + ] + outputs = [ + ComplexOutput( + "report", + "Generated HTML Report", + as_reference=True, + supported_formats=[Format("text/html")], + ), + ] + + super(Dashboard, self).__init__( + self._handler, + identifier="dashboard", + title="Generate HTML Report", + version="1.0", + abstract="Generate an HTML report from a provenance database.", + inputs=inputs, + outputs=outputs, + status_supported=True, + store_supported=True, + ) + + def _handler(self, request, response): + workdir = Path(self.workdir) + # input_csv = request.inputs['input_csv'][0].file + + # Query the provenance database ... result is a Pandas DataFrame + df = query.query() + + # Generate an HTML report from the DataFrame + html_report = self.write_html(df, workdir) + + print(f"report: {html_report}") + response.outputs["report"].file = html_report + # response.outputs["report"].output_format = Format("text/html") + + return response + + def write_html(self, df, workdir): + # Convert the DataFrame to an HTML table + html_table = df.to_html(escape=False, index=False) + + # Define the HTML template + html_template = f""" + + +
+{content}" + + +def query(): + query_str = """ + SELECT ?process ?dataset ?variable ?startTime ?endTime ?input ?output ?info ?histogram + WHERE { + ?exec rdf:type provone:Execution ; + rdfs:label ?process ; + clint:dataset_name ?dataset ; + clint:variable_name ?variable ; + prov:startedAtTime ?startTime ; + prov:endedAtTime ?endTime ; + clint:info ?info ; + clint:histogram ?histogram . + + ?input rdf:type prov:Entity . + + ?output rdf:type prov:Entity ; + prov:qualifiedDerivation [ prov:entity ?input; prov:hadActivity ?exec ] . + } + """ # noqa + graph_db = GraphDB() + results = graph_db.query(query_str) + + data = [] + for row in results: + # print(row) + process = row.process.split("/")[-1] + dataset = row.dataset.value + variable = row.variable.value + start_time = row.startTime.value + end_time = row.endTime.value + input = row.input.split("/")[-1] + input = input.split("urn:clint:")[-1] + output = row.output.split("/")[-1] + output = output.split("urn:clint:")[-1] + # min = row.min.value + # max = row.max.value + # mean = row.mean.value + # stddev = row.stddev.value + info = json.loads(row.info.value) + histogram = row.histogram.value + entry = { + "Process": process, + "Dataset": dataset, + "Variable": variable, + "Start Time": start_time, + "End Time": end_time, + "Input": input, + "Output": output, + # "Min": min, + # "Max": max, + # "Mean": mean, + # "StdDev": stddev, + "Histogram": display_image(histogram), + } + for key in info: + entry[key] = display_json(info[key]) + data.append(entry) + df = pd.DataFrame(data) + return df diff --git a/requirements.txt b/requirements.txt index 01cdf08..ca3fd9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,12 @@ click jinja2 psutil -pywps>=4.5.1,<4.6 +pywps>=4.5.2,<4.7 +# provenance +prov>=2.0.0 +pydot +rdflib +rdflib-sqlalchemy +sqlalchemy<2 +pandas +pyyaml diff --git a/tests/test_wps_caps.py b/tests/test_wps_caps.py index 693bdbf..e5fdac7 100644 --- a/tests/test_wps_caps.py +++ b/tests/test_wps_caps.py @@ -6,11 +6,11 @@ def test_wps_caps(): client = client_for(Service(processes=processes)) - resp = client.get(service='wps', request='getcapabilities', version='1.0.0') - names = resp.xpath_text('/wps:Capabilities' - '/wps:ProcessOfferings' - '/wps:Process' - '/ows:Identifier') + resp = client.get(service="wps", request="getcapabilities", version="1.0.0") + names = resp.xpath_text( + "/wps:Capabilities" "/wps:ProcessOfferings" "/wps:Process" "/ows:Identifier" + ) assert sorted(names.split()) == [ - 'hello', + "dashboard", + "hello", ]