-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Refactor * Make codespell and some pre-commit happy * Make precommit quasi happy * Update doc images
- Loading branch information
Showing
35 changed files
with
1,301 additions
and
330 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
|
||
__all__ = [ | ||
"open_tsdf", | ||
"open_tslong", | ||
"TSArray", | ||
"TSDtype", | ||
"TS", | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Created on Mon Jun 12 17:58:15 2023. | ||
Created on Mon Jun 12 22:23:06 2023. | ||
@author: ghiggi | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Created on Mon Apr 8 17:24:09 2024. | ||
@author: ghiggi | ||
""" | ||
import yaml | ||
|
||
from tstore.archive.io import define_metadata_filepath | ||
|
||
|
||
def _write_yaml_metadata(metadata, fpath): | ||
"""Write metadata YAML file.""" | ||
with open(fpath, "w") as file: | ||
yaml.dump(metadata, file) | ||
|
||
|
||
def write_tstore_metadata(base_dir, ts_variables, id_var, time_var, tstore_structure, partitioning): | ||
"""Write TStore metadata file.""" | ||
metadata_fpath = define_metadata_filepath(base_dir) | ||
metadata = {} | ||
metadata["ts_variables"] = ts_variables | ||
metadata["time_var"] = time_var | ||
metadata["id_var"] = id_var | ||
metadata["tstore_structure"] = tstore_structure | ||
metadata["partitioning"] = partitioning | ||
_write_yaml_metadata(metadata=metadata, fpath=metadata_fpath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Created on Mon Apr 8 17:05:26 2024. | ||
@author: ghiggi | ||
""" | ||
|
||
|
||
def get_partitioning_mapping_dict(time_var, backend="pandas"): | ||
# Mapping of partitioning components to corresponding pandas attributes | ||
if backend == "pandas": | ||
partitioning_mapping = { | ||
"year": lambda df: df[time_var].dt.year, | ||
"month": lambda df: df[time_var].dt.month, | ||
"day": lambda df: df[time_var].dt.day, | ||
"doy": lambda df: df[time_var].dt.dayofyear, | ||
"dow": lambda df: df[time_var].dt.dayofweek, | ||
# week TODO | ||
"hh": lambda df: df[time_var].dt.hour, | ||
"mm": lambda df: df[time_var].dt.minute, | ||
"ss": lambda df: df[time_var].dt.second, | ||
} | ||
elif backend == "polars": | ||
partitioning_mapping = { | ||
"year": lambda df: df[time_var].dt.year(), | ||
"month": lambda df: df[time_var].dt.month(), | ||
"day": lambda df: df[time_var].dt.day(), | ||
"doy": lambda df: df[time_var].dt.ordinal_day(), | ||
"dow": lambda df: df[time_var].dt.weekday(), | ||
# 'week': lambda df: df[time_var].dt.week(), | ||
"hh": lambda df: df[time_var].dt.hour(), | ||
"mm": lambda df: df[time_var].dt.minute(), | ||
"ss": lambda df: df[time_var].dt.second(), | ||
} | ||
|
||
else: | ||
raise NotImplementedError(f"Backend {backend}") | ||
# TODO: add quarter, daysinmonth, month_name and relevant checks | ||
# TODO: partitioning_str: (YYYY/MM/DD) or (YYYY/DOY/HH). Or list ? | ||
# TODO: provide proxy for year(YYYY) and month (MM) ? But month conflicts with minutes ? | ||
|
||
# TODO: for polars | ||
return partitioning_mapping | ||
|
||
|
||
def get_valid_partitions(): | ||
"""Get valid partitioning components.""" | ||
return list(get_partitioning_mapping_dict(time_var="dummy")) | ||
|
||
|
||
def check_partitions(partitioning_str): | ||
"""Check partitioning components of partitinoning string. | ||
Return the partitioning components. | ||
""" | ||
if partitioning_str is None: | ||
return None | ||
|
||
# Parse the partitioning string to extract partitioning components | ||
partitioning_components = partitioning_str.split("/") | ||
|
||
# Get valid partitions | ||
valid_partitions = get_valid_partitions() | ||
|
||
# Check specified partitions | ||
partitions = [] | ||
for component in partitioning_components: | ||
if component.lower() not in valid_partitions: | ||
raise ValueError(f"Invalid partitioning component '{component}'") | ||
partitions.append(component.lower()) | ||
|
||
# Ensure month/day or doy is specified | ||
if "month" in partitions and "doy" in partitions: | ||
raise ValueError("Either specify 'month' or 'doy' (day of year).") | ||
if "day" in partitions and "doy" in partitions: | ||
raise ValueError("Either specify 'day' or 'doy' (day of year).") | ||
|
||
return partitions | ||
|
||
|
||
def check_partitioning(partitioning, ts_variables): | ||
"""Check to_tstore partitioning values.""" | ||
if not isinstance(partitioning, (dict, str, type(None))): | ||
raise TypeError("") | ||
if isinstance(partitioning, str) or partitioning is None: | ||
partitioning = {ts_variable: partitioning for ts_variable in ts_variables} | ||
for ts_variable, partitioning_str in partitioning.items(): | ||
try: | ||
partitions = check_partitions(partitioning_str) | ||
if partitions is not None: | ||
partitioning[ts_variable] = "/".join(partitions) | ||
except Exception as e: | ||
raise ValueError(f"Invalid partitioning for {ts_variable}: {e}") | ||
return partitioning | ||
|
||
|
||
def add_partitioning_columns(df, partitioning_str, time_var, backend): | ||
"""Add partitioning columns to the dataframe based on the partitioning string.""" | ||
if partitioning_str is None: | ||
return df, None | ||
|
||
partitions = check_partitions(partitioning_str) | ||
partitioning_mapping = get_partitioning_mapping_dict(time_var=time_var, backend=backend) | ||
for component in partitions: | ||
if backend in ["pandas"]: | ||
df[component] = partitioning_mapping[component](df) | ||
elif backend == "polars": | ||
df_series = partitioning_mapping[component](df) | ||
df = df.with_columns(df_series.alias(component)) | ||
else: | ||
raise NotImplementedError | ||
return df, partitions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Created on Mon Jun 12 22:22:27 2023. | ||
Created on Mon Jun 12 22:23:06 2023. | ||
@author: ghiggi | ||
""" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Created on Mon Jun 12 22:23:06 2023. | ||
@author: ghiggi | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.