Merge pull request #2 from climateintelligence/add-report-process

Add report process
climateintelligence · Apr 12, 2024 · 241eb21 · 241eb21
2 parents ac57812 + 5f345fa
commit 241eb21
Show file tree

Hide file tree

Showing 12 changed files with 290 additions and 81 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
     - name: Checkout repository and submodules
       uses: actions/checkout@v4
@@ -32,8 +32,8 @@ jobs:
       run: make test
     - name: Lint with flake8 ⚙️
       run: make lint
-      if: matrix.python-version == 3.7
+      if: matrix.python-version == 3.9
     - name: Build docs 🏗️
       run: make docs
-      if: matrix.python-version == 3.7
+      if: matrix.python-version == 3.9
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -41,31 +41,59 @@
     "sphinx.ext.todo",
     "sphinx.ext.viewcode",
     "IPython.sphinxext.ipython_console_highlighting",
-    "nbsphinx",
+    # "nbsphinx",
     "pywps.ext_autodoc",
 ]
 
 # To avoid having to install these and burst memory limit on ReadTheDocs.
 # List of all tested working mock imports from all birds so new birds can
 # inherit without having to test which work which do not.
 autodoc_mock_imports = [
-    "numpy", "xarray", "fiona", "rasterio", "shapely",
-    "osgeo", "geopandas", "pandas", "statsmodels",
-    "affine", "rasterstats", "spotpy", "matplotlib",
-    "scipy", "unidecode", "gdal", "sentry_sdk", "dask",
-    "numba", "parse", "siphon", "sklearn", "cftime",
-    "netCDF4", "bottleneck", "ocgis", "geotiff", "geos",
-    "hdf4", "hdf5", "zlib", "pyproj", "proj", "cartopy",
-    "scikit-learn", "cairo"
+    "numpy",
+    "xarray",
+    "fiona",
+    "rasterio",
+    "shapely",
+    "osgeo",
+    "geopandas",
+    "pandas",
+    "statsmodels",
+    "affine",
+    "rasterstats",
+    "spotpy",
+    "matplotlib",
+    "scipy",
+    "unidecode",
+    "gdal",
+    "sentry_sdk",
+    "dask",
+    "numba",
+    "parse",
+    "siphon",
+    "sklearn",
+    "cftime",
+    "netCDF4",
+    "bottleneck",
+    "ocgis",
+    "geotiff",
+    "geos",
+    "hdf4",
+    "hdf5",
+    "zlib",
+    "pyproj",
+    "proj",
+    "cartopy",
+    "scikit-learn",
+    "cairo",
 ]
 
 # Monkeypatch constant because the following are mock imports.
 # Only works if numpy is actually installed and at the same time being mocked.
-#import numpy
-#numpy.pi = 3.1416
+# import numpy
+# numpy.pi = 3.1416
 
 # We are using mock imports in readthedocs, so probably safer to not run the notebooks
-nbsphinx_execute = 'never'
+nbsphinx_execute = "never"
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -112,7 +140,7 @@
 todo_include_todos = False
 
 # Suppress "WARNING: unknown mimetype for ..." when building EPUB.
-suppress_warnings = ['epub.unknown_project_files']
+suppress_warnings = ["epub.unknown_project_files"]
 
 # Avoid "configuration.rst:4:duplicate label configuration, other instance in configuration.rst"
 autosectionlabel_prefix_document = True

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -6,7 +6,6 @@
 
    installation
    configuration
-   notebooks/index
    dev_guide
    processes
    authors

diff --git a/docs/source/notebooks/example.ipynb b/docs/source/notebooks/example.ipynb
diff --git a/docs/source/notebooks/index.rst b/docs/source/notebooks/index.rst
diff --git a/environment.yml b/environment.yml
@@ -5,9 +5,18 @@ channels:
 dependencies:
 - pip
 - python>=3.8,<3.12
-- pywps>=4.5.1,<4.6
+- pywps>=4.5.2,<4.7
 - jinja2
 - click
 - psutil
 # tests
 - pytest
+# provenance
+- prov>=2.0.0
+- pydot
+- graphviz
+- rdflib
+- rdflib-sqlalchemy
+- sqlalchemy<2
+- pandas
+- pyyaml
diff --git a/parrot/db.py b/parrot/db.py
@@ -0,0 +1,41 @@
+from rdflib import Graph, URIRef
+from rdflib.plugins.sparql import prepareQuery
+
+from pywps import configuration
+
+# Provide the path to the SQLite database in the local folder
+# DB_URL = configuration.get_config_value("provenance", "db_url")
+DB_URL = configuration.get_config_value("logging", "database")
+
+
+class GraphDB(object):
+    def __init__(self):
+        # Create a graph with a specific backend store
+        self.graph = Graph(
+            store="SQLAlchemy", identifier=URIRef("http://example.org/graph")
+        )
+        self.graph.open(DB_URL, create=True)
+
+    def add(self, data):
+        new_graph = Graph()
+        new_graph.parse(data=data, format="turtle")
+
+        # add rdf to existing graph
+        for triple in new_graph:
+            self.graph.add(triple)
+        # Commit changes to the store
+        self.graph.commit()
+
+    def query(self, query_str):
+        namespaces = {
+            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+            "foaf": "http://xmlns.com/foaf/0.1/",
+            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+            "prov": "http://www.w3.org/ns/prov#",
+            "provone": "http://purl.dataone.org/provone/2015/01/15/ontology#",
+            "dcterms": "http://purl.org/dc/terms/",
+            "clint": "urn:clint:",
+        }
+        query = prepareQuery(query_str, initNs=namespaces)
+        results = self.graph.query(query)
+        return results
diff --git a/parrot/processes/__init__.py b/parrot/processes/__init__.py
@@ -1,5 +1,7 @@
 from .wps_say_hello import SayHello
+from .wps_dashboard import Dashboard
 
 processes = [
     SayHello(),
+    Dashboard(),
 ]
diff --git a/parrot/processes/wps_dashboard.py b/parrot/processes/wps_dashboard.py
@@ -0,0 +1,102 @@
+from pathlib import Path
+
+from pywps import Process, LiteralInput, ComplexOutput, Format
+
+from parrot import query
+
+
+class Dashboard(Process):
+    def __init__(self):
+        inputs = [
+            LiteralInput(
+                "time",
+                "Time Period",
+                abstract="The time period for the report seperated by /"
+                "Example: 2023-09-01/2023-09-30",
+                data_type="string",
+                default="2023-09-01/2023-09-30",
+                min_occurs=0,
+                max_occurs=1,
+            ),
+        ]
+        outputs = [
+            ComplexOutput(
+                "report",
+                "Generated HTML Report",
+                as_reference=True,
+                supported_formats=[Format("text/html")],
+            ),
+        ]
+
+        super(Dashboard, self).__init__(
+            self._handler,
+            identifier="dashboard",
+            title="Generate HTML Report",
+            version="1.0",
+            abstract="Generate an HTML report from a provenance database.",
+            inputs=inputs,
+            outputs=outputs,
+            status_supported=True,
+            store_supported=True,
+        )
+
+    def _handler(self, request, response):
+        workdir = Path(self.workdir)
+        # input_csv = request.inputs['input_csv'][0].file
+
+        # Query the provenance database ... result is a Pandas DataFrame
+        df = query.query()
+
+        # Generate an HTML report from the DataFrame
+        html_report = self.write_html(df, workdir)
+
+        print(f"report: {html_report}")
+        response.outputs["report"].file = html_report
+        # response.outputs["report"].output_format = Format("text/html")
+
+        return response
+
+    def write_html(self, df, workdir):
+        # Convert the DataFrame to an HTML table
+        html_table = df.to_html(escape=False, index=False)
+
+        # Define the HTML template
+        html_template = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>Provenance Report</title>
+            <style>
+                table {{
+                    border-collapse: collapse;
+                    width: 100%;
+                    border: 1px solid #ddd;
+                }}
+
+                th, td {{
+                    text-align: left;
+                    padding: 8px;
+                }}
+
+                th {{
+                    background-color: #f2f2f2;
+                }}
+
+                tr:nth-child(even) {{
+                    background-color: #f2f2f2;
+                }}
+            </style>
+        </head>
+        <body>
+            <h1>Provenance Report</h1>
+            {html_table}
+        </body>
+        </html>
+        """
+
+        # Write the HTML template to a file
+        outfile = workdir / "provenance_report.html"
+        with outfile.open(mode="w") as file:
+            file.write(html_template)
+
+        return outfile