From 7f47f0f61135a52746b2ec2cfb162ea4e95d6e96 Mon Sep 17 00:00:00 2001 From: Pingu Carsti Date: Fri, 12 Apr 2024 14:12:40 +0200 Subject: [PATCH] added missing dashboard code --- parrot/data_stats.py | 96 ++++++++++++++++++++++++++++++++++++++++++++ parrot/db.py | 41 +++++++++++++++++++ parrot/query.py | 2 +- 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 parrot/data_stats.py create mode 100644 parrot/db.py diff --git a/parrot/data_stats.py b/parrot/data_stats.py new file mode 100644 index 0000000..1530a25 --- /dev/null +++ b/parrot/data_stats.py @@ -0,0 +1,96 @@ +import xarray as xr +import matplotlib.pyplot as plt +import numpy as np +import yaml +import pathlib +import io +import base64 + + +def get_stats(data): + return { + "min": float(np.nanmin(data)), + "max": float(np.nanmax(data)), + "mean": float(np.nanmean(data)), + "std": float(np.nanstd(data)), + } + + +class DataStats(object): + def __init__(self, output_dir): + if isinstance(output_dir, pathlib.Path): + self.output_dir = output_dir + else: + self.output_dir = pathlib.Path(output_dir) + self.info = None + self.histogram = None + + def gen_data_stats(self, filename, var, nbins=100): + ds = xr.open_dataset(filename) + + vstats = get_stats(ds[var].values) + bins = np.linspace(vstats["min"], vstats["max"], num=nbins + 1) + + ntime, nlon, nlat = ds[var].shape + mratio = np.zeros(ntime) + hist = np.zeros((ntime, nbins)) + for i in range(ntime): + a = ds[var].values[i] + idx = ~np.isnan(a) + mratio[i] = idx.sum() + a = np.histogram(a[idx], bins=bins)[0] + hist[i] = a / max(a) + + mratio = 1 - mratio / (nlon * nlat) + + # TODO: It would be great to store the distribution graph in a database + if True: + plt.close() + plt.imshow( + hist, + aspect="auto", + origin="lower", + extent=[vstats["min"], vstats["max"], 0, ntime], + cmap="gist_ncar", + ) + ax = plt.gca() + ax.grid(color="gray", linestyle="-.", linewidth=1) + plt.xlabel(var) + plt.ylabel("Timesteps") + # outfile = self.output_dir / "histogram.png" + # print(f"histogram: {outfile}") + # plt.savefig(outfile.as_posix(), dpi=50) + # store as base64 + # Save the plot to a BytesIO object + buffer = io.BytesIO() + plt.savefig(buffer, format="png") + buffer.seek(0) + + # Encode the BytesIO object as base64 + base64_encoded_plot = base64.b64encode(buffer.read()).decode("utf-8") + # print(f"{base64_encoded_plot}") + self.histogram = base64_encoded_plot + # close plot + plt.close() + + # The following information should be stored in a database + attrs = {} + orig_attrs = dict(ds.attrs) + for key in orig_attrs: + value = orig_attrs[key] + if isinstance(value, str): + attrs[key] = value + + self.info = {} + self.info["Attrs"] = attrs + self.info["Dims"] = dict(ds.dims) + self.info["Vars"] = list(dict(ds.variables).keys()) + self.info["Vstats"] = vstats + self.info["Mstats"] = get_stats(mratio) + # print(self.info) + + def write_json(self): + outfile = self.output_dir / "info.txt" + with open(outfile.as_posix(), "w") as f: + yaml.dump(self.info, f) + return outfile diff --git a/parrot/db.py b/parrot/db.py new file mode 100644 index 0000000..ad3ae5f --- /dev/null +++ b/parrot/db.py @@ -0,0 +1,41 @@ +from rdflib import Graph, URIRef +from rdflib.plugins.sparql import prepareQuery +# from pywps import configuration + +# Provide the path to the SQLite database in the local folder +DB_URL = "sqlite:////var/lib/pywps/db/provenance.sqlite" +# DB_URL = "sqlite:////tmp/provenance.sqlite" +# DB_URL = configuration.get_config_value('provenance', 'db_url') + + +class GraphDB(object): + def __init__(self): + # Create a graph with a specific backend store + self.graph = Graph( + store="SQLAlchemy", identifier=URIRef("http://example.org/graph") + ) + self.graph.open(DB_URL, create=True) + + def add(self, data): + new_graph = Graph() + new_graph.parse(data=data, format="turtle") + + # add rdf to existing graph + for triple in new_graph: + self.graph.add(triple) + # Commit changes to the store + self.graph.commit() + + def query(self, query_str): + namespaces = { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "foaf": "http://xmlns.com/foaf/0.1/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "prov": "http://www.w3.org/ns/prov#", + "provone": "http://purl.dataone.org/provone/2015/01/15/ontology#", + "dcterms": "http://purl.org/dc/terms/", + "clint": "urn:clint:", + } + query = prepareQuery(query_str, initNs=namespaces) + results = self.graph.query(query) + return results diff --git a/parrot/query.py b/parrot/query.py index 4876937..457637e 100644 --- a/parrot/query.py +++ b/parrot/query.py @@ -1,4 +1,4 @@ -from duck.db import GraphDB +from parrot.db import GraphDB import pandas as pd import json import yaml