From 375d4dfab8ca7e475c28d20bfcde0f0d0f45b430 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 9 Dec 2022 17:45:19 -0600 Subject: [PATCH 1/4] WIP: sorting out add and iadd" --- cf_pandas/vocab.py | 140 +++++++++++++++++++++++++++++++++++++++----- tests/test_vocab.py | 20 +++++-- 2 files changed, 140 insertions(+), 20 deletions(-) diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index 0574322..397862a 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -4,7 +4,7 @@ import pathlib from collections import defaultdict -from typing import DefaultDict, Dict, Optional, Union +from typing import DefaultDict, Dict, Optional, Sequence, Union from .utils import astype @@ -51,21 +51,96 @@ def make_entry( expressions = astype(expressions, list) entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict) entry[nickname][attr] = "|".join(expressions) - self.__add__(entry) - - def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): - """Add two Vocab objects together... - - by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. - - Parameters - ---------- - other_vocab: Vocab - Other Vocab object to combine with. - """ + # import pdb; pdb.set_trace() + self.__iadd__(entry) + + # def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + # """Add two Vocab objects together... + + # by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. + + # Parameters + # ---------- + # other_vocab: Vocab + # Other Vocab object to combine with. + + # Returns + # ------- + # Vocab + # vocab + other_vocab + # """ + + # if isinstance(other_vocab, Vocab): + # other_vocab = other_vocab.vocab + + # new_vocab = Vocab() + + # nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) + # for nickname in nicknames: + + # # gather all attributes under nickname as a set to compare their expressions + # attributes = set( + # list(self.vocab[nickname].keys()) + list(other_vocab[nickname].keys()) + # ) + + # # pipe together expressions for nickname-attribute pairs + # for attribute in attributes: + # new_expressions = ( + # self.vocab[nickname].get(attribute, "") + # + "|" + # + other_vocab[nickname].get(attribute, "") + # ).strip("|") + # new_vocab.vocab[nickname][attribute] = new_expressions + # # self.vocab[nickname][attribute] = new_expressions + # return new_vocab + + # def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + # """Add two Vocab objects together... + + # by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. + + # Parameters + # ---------- + # other_vocab: Vocab + # Other Vocab object to combine with. + + # Returns + # ------- + # Vocab + # vocab + other_vocab + # """ + + # if isinstance(other_vocab, Vocab): + # other_vocab = other_vocab.vocab + + # # new_vocab = Vocab() + + # nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) + # for nickname in nicknames: + + # # gather all attributes under nickname as a set to compare their expressions + # attributes = set( + # list(self.vocab[nickname].keys()) + list(other_vocab[nickname].keys()) + # ) + + # # pipe together expressions for nickname-attribute pairs + # for attribute in attributes: + # new_expressions = ( + # self.vocab[nickname].get(attribute, "") + # + "|" + # + other_vocab[nickname].get(attribute, "") + # ).strip("|") + # # new_vocab.vocab[nickname][attribute] = new_expressions + # self.vocab[nickname][attribute] = new_expressions + # return self + + def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], + final_vocab) -> "Vocab": if isinstance(other_vocab, Vocab): other_vocab = other_vocab.vocab + + # new_vocab = Vocab() nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) for nickname in nicknames: @@ -82,8 +157,34 @@ def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) + "|" + other_vocab[nickname].get(attribute, "") ).strip("|") - self.vocab[nickname][attribute] = new_expressions - return self + # new_vocab.vocab[nickname][attribute] = new_expressions + final_vocab.vocab[nickname][attribute] = new_expressions + return final_vocab + + def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): + return self.add(other_vocab, Vocab()) + + def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): + return self.add(other_vocab, self) + + def __radd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + return self.__add__(other_vocab) + + # def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + # """Allows for vocab1 += vocab2 + + # Parameters + # ---------- + # other_vocab: Vocab + # Other Vocab object to combine with. + + # Returns + # ------- + # Vocab + # vocab plus other_vocab + # """ + # self = self.__add__(other_vocab) + # return self def save(self, savename: Union[str, pathlib.PurePath]): """Save to file. @@ -108,3 +209,12 @@ def open_file(self, openname: Union[str, pathlib.PurePath]): return json.loads( open(pathlib.PurePath(openname).with_suffix(".json"), "r").read() ) + + +def merge(vocabs: Sequence[Vocab]) -> Vocab: + + final_vocab = vocabs[0] + for vocab in vocabs[1:]: + final_vocab += vocab + return final_vocab + diff --git a/tests/test_vocab.py b/tests/test_vocab.py index ebcb6d6..d69a7f7 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -20,18 +20,28 @@ def test_make_entry(): def test_add_vocabs(): vocab = cfp.Vocab() - vocab.make_entry("temp", ["a", "b"], attr="standard_name") - vocab.make_entry("salt", ["a", "b"], attr="name") + vocab.vocab = defaultdict(dict,{"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} ) + # vocab.vocab = {"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} + # vocab.make_entry("temp", ["a", "b"], attr="standard_name") + # vocab.make_entry("salt", ["a", "b"], attr="name") compare = {"temp": {"standard_name": "a|b|a|b"}, "salt": {"name": "a|b|a|b"}} + # import pdb; pdb.set_trace() assert (vocab + vocab).vocab == compare vocab2 = cfp.Vocab() - vocab2.make_entry("temp", ["a", "b"], attr="name") + vocab2.vocab = defaultdict(dict,{"temp": {"name": "a|b"}} ) + # vocab2.vocab = {"temp": {"name": "a|b"}} + # vocab2.make_entry("temp", ["a", "b"], attr="name") compare = { - "temp": {"name": "a|b", "standard_name": "a|b|a|b"}, - "salt": {"name": "a|b|a|b"}, + "temp": {"name": "a|b", "standard_name": "a|b"}, + "salt": {"name": "a|b"}, } assert (vocab + vocab2).vocab == compare + + # also iadd + vocab += vocab2 + assert vocab.vocab == compare + def test_make_more_entries(): From 0a582f6a8fc92fb529d0b2a13d8a53c10795720c Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 9 Dec 2022 18:23:11 -0600 Subject: [PATCH 2/4] finished adding add and iadd, and merge, plus tests --- cf_pandas/__init__.py | 2 +- cf_pandas/vocab.py | 147 ++++++++++++------------------------------ tests/test_vocab.py | 12 ++-- 3 files changed, 47 insertions(+), 114 deletions(-) diff --git a/cf_pandas/__init__.py b/cf_pandas/__init__.py index 4c249a8..610a4dc 100644 --- a/cf_pandas/__init__.py +++ b/cf_pandas/__init__.py @@ -8,7 +8,7 @@ from .options import set_options # noqa from .reg import Reg from .utils import always_iterable, astype, match_criteria_key, standard_names -from .vocab import Vocab +from .vocab import merge, Vocab from .widget import Selector, dropdown diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index 397862a..2d91154 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -51,96 +51,35 @@ def make_entry( expressions = astype(expressions, list) entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict) entry[nickname][attr] = "|".join(expressions) - # import pdb; pdb.set_trace() self.__iadd__(entry) + + def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], + method: str) -> "Vocab": + """Add two Vocab objects together... - # def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": - # """Add two Vocab objects together... - - # by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. + by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. + This is used for both `__add__` and `__iadd__`. - # Parameters - # ---------- - # other_vocab: Vocab - # Other Vocab object to combine with. - - # Returns - # ------- - # Vocab - # vocab + other_vocab - # """ - - # if isinstance(other_vocab, Vocab): - # other_vocab = other_vocab.vocab - - # new_vocab = Vocab() - - # nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) - # for nickname in nicknames: - - # # gather all attributes under nickname as a set to compare their expressions - # attributes = set( - # list(self.vocab[nickname].keys()) + list(other_vocab[nickname].keys()) - # ) - - # # pipe together expressions for nickname-attribute pairs - # for attribute in attributes: - # new_expressions = ( - # self.vocab[nickname].get(attribute, "") - # + "|" - # + other_vocab[nickname].get(attribute, "") - # ).strip("|") - # new_vocab.vocab[nickname][attribute] = new_expressions - # # self.vocab[nickname][attribute] = new_expressions - # return new_vocab - - # def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": - # """Add two Vocab objects together... - - # by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. - - # Parameters - # ---------- - # other_vocab: Vocab - # Other Vocab object to combine with. - - # Returns - # ------- - # Vocab - # vocab + other_vocab - # """ - - # if isinstance(other_vocab, Vocab): - # other_vocab = other_vocab.vocab + Parameters + ---------- + other_vocab: Vocab + Other Vocab object to combine with. + method : str + Whether to run as "add" which returns a new Vocab object or "iadd" which adds to the original object. - # # new_vocab = Vocab() - - # nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) - # for nickname in nicknames: - - # # gather all attributes under nickname as a set to compare their expressions - # attributes = set( - # list(self.vocab[nickname].keys()) + list(other_vocab[nickname].keys()) - # ) - - # # pipe together expressions for nickname-attribute pairs - # for attribute in attributes: - # new_expressions = ( - # self.vocab[nickname].get(attribute, "") - # + "|" - # + other_vocab[nickname].get(attribute, "") - # ).strip("|") - # # new_vocab.vocab[nickname][attribute] = new_expressions - # self.vocab[nickname][attribute] = new_expressions - # return self - - def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], - final_vocab) -> "Vocab": + Returns + ------- + Vocab + vocab + other_vocab either as a new object or in place. + """ if isinstance(other_vocab, Vocab): other_vocab = other_vocab.vocab - # new_vocab = Vocab() + if method == "add": + output = Vocab() + elif method == "iadd": + output = self nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) for nickname in nicknames: @@ -157,35 +96,21 @@ def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], + "|" + other_vocab[nickname].get(attribute, "") ).strip("|") - # new_vocab.vocab[nickname][attribute] = new_expressions - final_vocab.vocab[nickname][attribute] = new_expressions - return final_vocab + output.vocab[nickname][attribute] = new_expressions + return output def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): - return self.add(other_vocab, Vocab()) + """vocab1 + vocab2""" + return self.add(other_vocab, "add") def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): - return self.add(other_vocab, self) + """vocab1 += vocab2""" + return self.add(other_vocab, "iadd") def __radd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + """right add?""" return self.__add__(other_vocab) - # def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": - # """Allows for vocab1 += vocab2 - - # Parameters - # ---------- - # other_vocab: Vocab - # Other Vocab object to combine with. - - # Returns - # ------- - # Vocab - # vocab plus other_vocab - # """ - # self = self.__add__(other_vocab) - # return self - def save(self, savename: Union[str, pathlib.PurePath]): """Save to file. @@ -212,9 +137,21 @@ def open_file(self, openname: Union[str, pathlib.PurePath]): def merge(vocabs: Sequence[Vocab]) -> Vocab: + """Add together multiple Vocab objects. + + Parameters + ---------- + vocabs : Sequence[Vocab] + Sequence of Vocab objects to merge. + + Returns + ------- + Vocab + Single Vocab object made up of input vocabs. + """ - final_vocab = vocabs[0] - for vocab in vocabs[1:]: + final_vocab = Vocab() + for vocab in vocabs: final_vocab += vocab return final_vocab diff --git a/tests/test_vocab.py b/tests/test_vocab.py index d69a7f7..cbc93ca 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -21,29 +21,25 @@ def test_make_entry(): def test_add_vocabs(): vocab = cfp.Vocab() vocab.vocab = defaultdict(dict,{"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} ) - # vocab.vocab = {"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} - # vocab.make_entry("temp", ["a", "b"], attr="standard_name") - # vocab.make_entry("salt", ["a", "b"], attr="name") compare = {"temp": {"standard_name": "a|b|a|b"}, "salt": {"name": "a|b|a|b"}} - # import pdb; pdb.set_trace() assert (vocab + vocab).vocab == compare vocab2 = cfp.Vocab() vocab2.vocab = defaultdict(dict,{"temp": {"name": "a|b"}} ) - # vocab2.vocab = {"temp": {"name": "a|b"}} - # vocab2.make_entry("temp", ["a", "b"], attr="name") compare = { - "temp": {"name": "a|b", "standard_name": "a|b"}, + "temp": {"standard_name": "a|b", "name": "a|b"}, "salt": {"name": "a|b"}, } assert (vocab + vocab2).vocab == compare + + # also merge + assert cfp.merge([vocab, vocab2]).vocab == compare # also iadd vocab += vocab2 assert vocab.vocab == compare - def test_make_more_entries(): vocab = cfp.Vocab() vocab.make_entry("temp", ["a", "b"], attr="name") From 3185e92436a3949e3bbb8852368248babaa47494 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 9 Dec 2022 18:23:43 -0600 Subject: [PATCH 3/4] precommit --- cf_pandas/__init__.py | 2 +- cf_pandas/vocab.py | 26 ++++++++++++++------------ tests/test_vocab.py | 8 +++++--- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/cf_pandas/__init__.py b/cf_pandas/__init__.py index 610a4dc..2638fa9 100644 --- a/cf_pandas/__init__.py +++ b/cf_pandas/__init__.py @@ -8,7 +8,7 @@ from .options import set_options # noqa from .reg import Reg from .utils import always_iterable, astype, match_criteria_key, standard_names -from .vocab import merge, Vocab +from .vocab import Vocab, merge from .widget import Selector, dropdown diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index 2d91154..bd8568c 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -52,9 +52,10 @@ def make_entry( entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict) entry[nickname][attr] = "|".join(expressions) self.__iadd__(entry) - - def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], - method: str) -> "Vocab": + + def add( + self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], method: str + ) -> "Vocab": """Add two Vocab objects together... by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. @@ -66,7 +67,7 @@ def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], Other Vocab object to combine with. method : str Whether to run as "add" which returns a new Vocab object or "iadd" which adds to the original object. - + Returns ------- Vocab @@ -75,7 +76,7 @@ def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], if isinstance(other_vocab, Vocab): other_vocab = other_vocab.vocab - + if method == "add": output = Vocab() elif method == "iadd": @@ -98,16 +99,18 @@ def add(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], ).strip("|") output.vocab[nickname][attribute] = new_expressions return output - + def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): """vocab1 + vocab2""" return self.add(other_vocab, "add") - + def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): """vocab1 += vocab2""" return self.add(other_vocab, "iadd") - - def __radd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) -> "Vocab": + + def __radd__( + self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"] + ) -> "Vocab": """right add?""" return self.__add__(other_vocab) @@ -134,7 +137,7 @@ def open_file(self, openname: Union[str, pathlib.PurePath]): return json.loads( open(pathlib.PurePath(openname).with_suffix(".json"), "r").read() ) - + def merge(vocabs: Sequence[Vocab]) -> Vocab: """Add together multiple Vocab objects. @@ -149,9 +152,8 @@ def merge(vocabs: Sequence[Vocab]) -> Vocab: Vocab Single Vocab object made up of input vocabs. """ - + final_vocab = Vocab() for vocab in vocabs: final_vocab += vocab return final_vocab - diff --git a/tests/test_vocab.py b/tests/test_vocab.py index cbc93ca..ffe4e09 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -20,12 +20,14 @@ def test_make_entry(): def test_add_vocabs(): vocab = cfp.Vocab() - vocab.vocab = defaultdict(dict,{"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} ) + vocab.vocab = defaultdict( + dict, {"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} + ) compare = {"temp": {"standard_name": "a|b|a|b"}, "salt": {"name": "a|b|a|b"}} assert (vocab + vocab).vocab == compare vocab2 = cfp.Vocab() - vocab2.vocab = defaultdict(dict,{"temp": {"name": "a|b"}} ) + vocab2.vocab = defaultdict(dict, {"temp": {"name": "a|b"}}) compare = { "temp": {"standard_name": "a|b", "name": "a|b"}, "salt": {"name": "a|b"}, @@ -34,7 +36,7 @@ def test_add_vocabs(): # also merge assert cfp.merge([vocab, vocab2]).vocab == compare - + # also iadd vocab += vocab2 assert vocab.vocab == compare From 3281df4aaee53b255d8a6d8b06fa37626b2a9c86 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 9 Dec 2022 18:28:31 -0600 Subject: [PATCH 4/4] updated docs --- docs/demo_vocab.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/demo_vocab.md b/docs/demo_vocab.md index e370eda..113d784 100644 --- a/docs/demo_vocab.md +++ b/docs/demo_vocab.md @@ -6,7 +6,7 @@ jupytext: format_version: 0.13 jupytext_version: 1.14.0 kernelspec: - display_name: Python 3 (ipykernel) + display_name: Python 3.10.6 ('cf-pandas') language: python name: python3 --- @@ -139,6 +139,19 @@ vocab2.make_entry("other_variable_nickname", "match_that_string", attr="standard vocab1 + vocab2 ``` +Merge 2 or more Vocab objects: + +```{code-cell} ipython3 +cfp.merge([vocab1, vocab2]) +``` + +Can also add in place + +```{code-cell} ipython3 +# also works +vocab1 += vocab2 +``` + ## Use the `Reg` class to write regular expressions We used simple exact matching regular expressions above, but for anything more complicated it can be hard to write regular expressions. You can use the `Reg` class in `cf-pandas` to write regular expressions with several options, as demonstrated more in [another doc page](https://cf-pandas.readthedocs.io/en/latest/demo_reg.html), and briefly here.