style: apply pre-commit hooks to all files

ltelab · Apr 24, 2024 · 3a32604 · 3a32604
1 parent 4282e01
commit 3a32604
Show file tree

Hide file tree

Showing 22 changed files with 50 additions and 20 deletions.
diff --git a/tstore/archive/attributes/pandas.py b/tstore/archive/attributes/pandas.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import pandas as pd
 
 from tstore.archive.checks import check_is_tstore, check_tstore_ids

diff --git a/tstore/archive/attributes/pyarrow.py b/tstore/archive/attributes/pyarrow.py
@@ -4,13 +4,16 @@
 
 @author: ghiggi
 """
+
 import pyarrow.parquet as pq
 
 from tstore.archive.checks import check_is_tstore, check_tstore_ids
 from tstore.archive.io import define_attributes_filepath
 
 
-def get_tstore_ids_filters(tstore_ids=None):
+def get_tstore_ids_filters(
+    tstore_ids=None,  # noqa: ARG001
+):
     """Define filters for Parquet Dataset subsetting at read-time."""
     # TODO implement logic
     filters = None

diff --git a/tstore/archive/checks.py b/tstore/archive/checks.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import datetime
 
 import numpy as np
@@ -45,7 +46,10 @@ def check_ts_variables(ts_variables, base_dir):
     return ts_variables
 
 
-def check_tstore_ids(tstore_ids, base_dir):
+def check_tstore_ids(
+    tstore_ids,
+    base_dir,  # noqa: ARG001
+):
     """Check valid tstore_ids.
 
     If tstore_ids=None, return None.

diff --git a/tstore/archive/metadata/readers.py b/tstore/archive/metadata/readers.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import yaml
 
 from tstore.archive.io import define_metadata_filepath

diff --git a/tstore/archive/metadata/writers.py b/tstore/archive/metadata/writers.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import yaml
 
 from tstore.archive.io import define_metadata_filepath

diff --git a/tstore/archive/partitions.py b/tstore/archive/partitions.py
@@ -7,6 +7,7 @@
 
 
 def get_partitioning_mapping_dict(time_var, backend="pandas"):
+    """Get partitioning mapping dict."""
     # Mapping of partitioning components to corresponding pandas attributes
     if backend == "pandas":
         partitioning_mapping = {

diff --git a/tstore/archive/ts/readers/dask.py b/tstore/archive/ts/readers/dask.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import dask.dataframe as dd
 import pandas as pd
 

diff --git a/tstore/archive/ts/readers/polars.py b/tstore/archive/ts/readers/polars.py
@@ -4,14 +4,15 @@
 
 @author: ghiggi
 """
+
 import polars as pl
 
 
 def open_ts(
     fpath,
     partitions,
-    start_time=None,
-    end_time=None,
+    start_time=None,  # noqa: ARG001
+    end_time=None,  # noqa: ARG001
     # Options
     rechunk=True,
     use_statistics=True,

diff --git a/tstore/archive/ts/readers/pyarrow.py b/tstore/archive/ts/readers/pyarrow.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import pyarrow.parquet as pq
 
 from tstore.archive.ts.utility import get_time_filters
@@ -15,7 +16,7 @@ def open_ts(
     start_time=None,
     end_time=None,
     columns=None,
-    split_row_groups=False,
+    split_row_groups=False,  # noqa: ARG001
     # pyarrow-specific
     filesystem=None,
     use_threads=True,

diff --git a/tstore/archive/ts/utility.py b/tstore/archive/ts/utility.py
@@ -6,7 +6,10 @@
 """
 
 
-def get_time_filters(start_time=None, end_time=None):
+def get_time_filters(
+    start_time=None,  # noqa: ARG001
+    end_time=None,  # noqa: ARG001
+):
     """Define filters for Parquet Dataset subsetting at read-time."""
     # TODO implement logic
     filters = None

diff --git a/tstore/archive/ts/writers/pyarrow.py b/tstore/archive/ts/writers/pyarrow.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import math
 import os
 
@@ -102,6 +103,7 @@ def file_visitor(written_file):
 
 
 def convert_size_to_bytes(size):
+    """Convert size to bytes."""
     if not isinstance(size, (str, int)):
         raise TypeError("Expecting a string (i.e. 200MB) or the integer number of bytes.")
     if isinstance(size, int):

diff --git a/tstore/tests/test_dummy.py b/tstore/tests/test_dummy.py
@@ -7,4 +7,4 @@
 
 
 def test_dummy():
-    assert 1 == 1
+    assert 1 == 1  # noqa: PLR0133
diff --git a/tstore/tsdf/extensions/array.py b/tstore/tsdf/extensions/array.py
@@ -154,7 +154,7 @@ def ts_class(self):
     # Required for all ExtensionArray subclasses
     def isna(self):
         """A 1-D array indicating if the TS is missing."""
-        return pd.isnull(self._data)
+        return pd.isna(self._data)
 
     # Required for all ExtensionArray subclasses
     def copy(self):

diff --git a/tstore/tsdf/extensions/ts_dtype.py b/tstore/tsdf/extensions/ts_dtype.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import re
 from typing import Any
 

diff --git a/tstore/tsdf/reader.py b/tstore/tsdf/reader.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import pandas as pd
 
 from tstore.archive.io import get_ts_info

diff --git a/tstore/tsdf/writer.py b/tstore/tsdf/writer.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import numpy as np
 
 from tstore.archive.io import (

diff --git a/tstore/tslong/pandas.py b/tstore/tslong/pandas.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import pandas as pd
 import pyarrow as pa
 
@@ -120,7 +121,6 @@ def to_tstore(
     # Write to disk per identifier
     for tstore_id, df_group in df.groupby(id_var):
         for ts_variable, columns in ts_variables.items():
-
             # Retrieve columns of the TS object
             if columns is None:
                 columns = [ts_variable]

diff --git a/tstore/tslong/polars.py b/tstore/tslong/polars.py
@@ -114,7 +114,6 @@ def to_tstore(
     # Write to disk per identifier
     for tstore_id, df_group in df.groupby(id_var):
         for ts_variable, columns in ts_variables.items():
-
             # Retrieve columns of the TS object
             if columns is None:
                 columns = [ts_variable]

diff --git a/tstore/tslong/pyarrow.py b/tstore/tslong/pyarrow.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 from functools import reduce
 
 import numpy as np

diff --git a/tutorials/00_example_pandas.py b/tutorials/00_example_pandas.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+# ruff: noqa: E402
 
 import dask.datasets
 import numpy as np

diff --git a/tutorials/01_example_ts.py b/tutorials/01_example_ts.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import dask.datasets
 import pandas as pd
 import polars as pl
@@ -52,7 +53,8 @@
 # - Should be based on tstore.archive.ts.writers
 # --> All code should likely exploit the pyarrow write_partitioned_dataset() function
 # --> Maybe we should call the method TS.to_parquet()
-# --> Also when writing TSLONG to TStore, we could create a TS to then write to disk (see comment in tstore.tslong.pandas/polars)
+# --> Also when writing TSLONG to TStore, we could create a TS to then write to disk (see comment in
+#     tstore.tslong.pandas/polars)
 
 # Write to disk
 fpath = "/tmp/ts_dask.parquet"
@@ -69,7 +71,8 @@
 #### Read TS from disk
 # --> Pandas, Polars and pyarrow <read_parquet> functions loads data into memory !
 # --> Should we exploit only the pyarrow read function and then convert to the backend of choice?
-# --> Should we wrap the reading with dask.delayed to enable lazy reading (not actually read data into memory till needed ?)
+# --> Should we wrap the reading with dask.delayed to enable lazy reading (not actually read data into memory till
+#     needed ?)
 # --> Dask (dd.read_parquet) and LazyPolars (scan_parquet) functions allow direct lazy reads
 
 # Dask code

diff --git a/tutorials/02_example_tsdf.py b/tutorials/02_example_tsdf.py
@@ -4,6 +4,7 @@
 
 @author: ghiggi
 """
+
 import dask.datasets
 import numpy as np
 import pandas as pd
@@ -97,7 +98,8 @@
 
 # If class inherit pandas.DataFrame or geopandas.DataFrame
 # --> Need to redefine methods to returns TSDF (similar concept applies to TSLONG, ...)
-# Or we avoid inheritance from *.DataFrame and we re-implement the relevant methods calling internally the *.DataFrame.<methods>
+# Or we avoid inheritance from *.DataFrame and we re-implement the relevant methods calling internally the
+# *.DataFrame.<methods>
 
 
 #### TSDF - Write TStore
@@ -167,21 +169,23 @@
 ####--------------------------------------------------------------------------.
 #### IMPLEMENTATIONS / THOUGHTS
 # - Arrow/Pyarrow allows for zero-copy conversion between pandas/(dask)/polars. We should exploit that !
-# - Pyarrow.Table object is difficult to manipulate (I would use it only for conversion across backend and reading/writing to/from disk)
+# - Pyarrow.Table object is difficult to manipulate (I would use it only for conversion across backend and
+#   reading/writing to/from disk)
 # - ID should be always stored as string (pyarrow.large_string), so to have some dtype when doing join/merge operations
-# - I would suggest to first start implement the I/O and conversion with pandas and polars (with data in memory) (and eventually dask dataframe)
+# - I would suggest to first start implement the I/O and conversion with pandas and polars (with data in memory) (and
+#   eventually dask dataframe)
 # - When this is set (and eventually tested), we can implement the lazy reading of TSDF
 #   - dask.delayed for pandas and polars backends
 #   - dask.dataframe for dask backend
 #   - lazy polars for lazy_polars backend ...
-# - I would expect that we enable to define a series of lazy operations (which can change the backend of the TS during computations),
-#   we must have something to control / enforce the final backend of the data to be written to disk. Because the dask delayed object does not
-#   know what will be the dtype of the TS.data
+# - I would expect that we enable to define a series of lazy operations (which can change the backend of the TS during
+#   computations), we must have something to control / enforce the final backend of the data to be written to disk.
+#   Because the dask delayed object does not know what will be the dtype of the TS.data
 # - A tool to switch TS object from an backend to another should be implemented
 # - A method to .compute() data in memory should be also implemented
 
 # - Ideally one should be able to apply functions to many TSArray etc, and the actual execution
-#  only taking place when writing to disk the timeseries (id per id),  loading data into memory only at that moment and freeing the memory
-#  when the id is processed.
+#  only taking place when writing to disk the timeseries (id per id),  loading data into memory only at that moment and
+#  freeing the memory when the id is processed.
 
 ####--------------------------------------------------------------------------.
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ @@
     @author: ghiggi
     """
     import pandas as pd
     from tstore.archive.checks import check_is_tstore, check_tstore_ids
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,4 @@


		def test_dummy():
		assert 1 == 1
		assert 1 == 1 # noqa: PLR0133