Skip to content

Commit c840e31

Browse files
authored
Merge pull request #626 from bashtage/improve-typing
TYP/DOC: Improve typign and docs
2 parents c5c697e + 3181008 commit c840e31

19 files changed

+97
-71
lines changed

examples/asset-pricing_examples.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@
412412
"name": "python",
413413
"nbconvert_exporter": "python",
414414
"pygments_lexer": "ipython3",
415-
"version": "3.10.12"
415+
"version": "3.12.7"
416416
},
417417
"pycharm": {
418418
"stem_cell": {

examples/asset-pricing_formulas.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@
139139
"name": "python",
140140
"nbconvert_exporter": "python",
141141
"pygments_lexer": "ipython3",
142-
"version": "3.10.12"
142+
"version": "3.12.7"
143143
},
144144
"pycharm": {
145145
"stem_cell": {

examples/iv_absorbing-regression.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
"name": "python",
152152
"nbconvert_exporter": "python",
153153
"pygments_lexer": "ipython3",
154-
"version": "3.10.12"
154+
"version": "3.12.7"
155155
},
156156
"pycharm": {
157157
"stem_cell": {

examples/iv_advanced-examples.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@
570570
"name": "python",
571571
"nbconvert_exporter": "python",
572572
"pygments_lexer": "ipython3",
573-
"version": "3.10.12"
573+
"version": "3.12.7"
574574
},
575575
"nbsphinx": {
576576
"allow_errors": true

examples/iv_using-formulas.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@
191191
"name": "python",
192192
"nbconvert_exporter": "python",
193193
"pygments_lexer": "ipython3",
194-
"version": "3.10.12"
194+
"version": "3.12.7"
195195
},
196196
"pycharm": {
197197
"stem_cell": {

examples/panel_data-formats.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@
230230
"name": "python",
231231
"nbconvert_exporter": "python",
232232
"pygments_lexer": "ipython3",
233-
"version": "3.10.12"
233+
"version": "3.12.7"
234234
},
235235
"pycharm": {
236236
"stem_cell": {

examples/panel_examples.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@
427427
"name": "python",
428428
"nbconvert_exporter": "python",
429429
"pygments_lexer": "ipython3",
430-
"version": "3.10.12"
430+
"version": "3.12.7"
431431
},
432432
"pycharm": {
433433
"stem_cell": {

examples/panel_using-formulas.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@
182182
"name": "python",
183183
"nbconvert_exporter": "python",
184184
"pygments_lexer": "ipython3",
185-
"version": "3.10.12"
185+
"version": "3.12.7"
186186
},
187187
"pycharm": {
188188
"stem_cell": {

examples/system_examples.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
"outputs": [],
4040
"source": [
4141
"# Common libraries\n",
42-
"%matplotlib inline\n",
4342
"import numpy as np\n",
4443
"import pandas as pd\n",
4544
"import statsmodels.api as sm"
@@ -733,6 +732,7 @@
733732
"outputs": [],
734733
"source": [
735734
"import statsmodels.api as sm\n",
735+
"\n",
736736
"from linearmodels.datasets import french\n",
737737
"\n",
738738
"data = french.load()\n",

examples/system_formulas.ipynb

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"source": [
2424
"import numpy as np\n",
2525
"import pandas as pd\n",
26+
"\n",
2627
"from linearmodels.datasets import fringe\n",
2728
"\n",
2829
"data = fringe.load()"
@@ -179,7 +180,7 @@
179180
"name": "python",
180181
"nbconvert_exporter": "python",
181182
"pygments_lexer": "ipython3",
182-
"version": "3.10.12"
183+
"version": "3.12.7"
183184
},
184185
"pycharm": {
185186
"stem_cell": {

examples/system_three-stage-ls.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@
353353
"name": "python",
354354
"nbconvert_exporter": "python",
355355
"pygments_lexer": "ipython3",
356-
"version": "3.10.12"
356+
"version": "3.12.7"
357357
},
358358
"pycharm": {
359359
"stem_cell": {

linearmodels/iv/absorbing.py

+58-33
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
from collections import defaultdict
44
from collections.abc import Hashable, Iterable
5-
from typing import Any, DefaultDict, TypeVar, Union, cast
5+
from hashlib import sha256
6+
from typing import Any, DefaultDict, Union, cast
67
import warnings
78

89
from numpy import (
@@ -58,23 +59,41 @@
5859
from linearmodels.shared.utility import DataFrameWrapper, SeriesWrapper
5960
import linearmodels.typing.data
6061

62+
HAVE_XXHASH = False
6163
try:
62-
from xxhash import xxh64 as hash_func
64+
from xxhash import xxh64
65+
66+
HAVE_XXHASH = True
6367
except ImportError:
64-
from hashlib import sha256 as hash_func
68+
pass
6569

66-
Hasher = TypeVar("Hasher", bound=hash_func)
6770

71+
class Hasher:
72+
def __init__(self):
73+
if HAVE_XXHASH:
74+
self._hasher = xxh64()
75+
self._use_xx = True
76+
else:
77+
self._hasher = sha256()
78+
self._use_xx = False
6879

69-
_VARIABLE_CACHE: DefaultDict[Hashable, dict[str, ndarray]] = defaultdict(dict)
80+
def reset(self):
81+
if self._use_xx:
82+
self._hasher = xxh64()
83+
else:
84+
self._hasher.reset()
7085

86+
def update(self, data: memoryview) -> None:
87+
self._hasher.update(data)
7188

72-
def _reset(hasher: Hasher) -> Hasher:
73-
try:
74-
hasher.reset()
75-
return hasher
76-
except AttributeError:
77-
return hash_func()
89+
def digest(self) -> bytes:
90+
return self._hasher.digest()
91+
92+
def hexdigest(self) -> str:
93+
return self._hasher.hexdigest()
94+
95+
96+
_VARIABLE_CACHE: DefaultDict[Hashable, dict[str, ndarray]] = defaultdict(dict)
7897

7998

8099
def clear_cache() -> None:
@@ -139,8 +158,8 @@ def lsmr_annihilate(
139158

140159
variable_digest = ""
141160
if use_cache:
142-
hasher = hash_func()
143-
hasher.update(ascontiguousarray(_y.data))
161+
hasher = Hasher()
162+
hasher.update(memoryview(ascontiguousarray(_y.data)))
144163
variable_digest = hasher.hexdigest()
145164

146165
if use_cache and variable_digest in _VARIABLE_CACHE[regressor_hash]:
@@ -153,7 +172,7 @@ def lsmr_annihilate(
153172
return column_stack(resids)
154173

155174

156-
def category_product(cats: linearmodels.typing.data.AnyPandas) -> Series:
175+
def category_product(cats: linearmodels.typing.AnyPandas) -> Series:
157176
"""
158177
Construct category from all combination of input categories
159178
@@ -171,7 +190,7 @@ def category_product(cats: linearmodels.typing.data.AnyPandas) -> Series:
171190
"""
172191
if isinstance(cats, Series):
173192
return cats
174-
193+
assert isinstance(cats, DataFrame)
175194
sizes = []
176195
for c in cats:
177196
# TODO: Bug in pandas-stubs
@@ -197,7 +216,7 @@ def category_product(cats: linearmodels.typing.data.AnyPandas) -> Series:
197216
dtype_val = dtype(dtype_str)
198217
codes = zeros(nobs, dtype=dtype_val)
199218
cum_size = 0
200-
for i, col in enumerate(cats):
219+
for i, col_name in enumerate(cats):
201220
if dtype_str == "int8":
202221
shift: int8 | int16 | int32 | int64 = int8(cum_size)
203222
elif dtype_str == "int16":
@@ -206,7 +225,7 @@ def category_product(cats: linearmodels.typing.data.AnyPandas) -> Series:
206225
shift = int32(cum_size)
207226
else: # elif dtype_str == "int64":
208227
shift = int64(cum_size)
209-
cat_codes = asarray(cats[col].cat.codes)
228+
cat_codes = asarray(cats[col_name].cat.codes)
210229
codes += cat_codes.astype(dtype_val) << shift
211230
cum_size += sizes[i]
212231

@@ -236,8 +255,8 @@ def category_interaction(
236255

237256

238257
def category_continuous_interaction(
239-
cat: linearmodels.typing.data.AnyPandas,
240-
cont: linearmodels.typing.data.AnyPandas,
258+
cat: linearmodels.typing.AnyPandas,
259+
cont: linearmodels.typing.AnyPandas,
241260
precondition: bool = True,
242261
) -> sp.csc_matrix:
243262
"""
@@ -420,21 +439,23 @@ def hash(self) -> list[tuple[str, ...]]:
420439
Construct a hash that will be invariant for any permutation of
421440
inputs that produce the same fit when used as regressors"""
422441
# Sorted hashes of any categoricals
423-
hasher = hash_func()
442+
hasher = Hasher()
424443
cat_hashes = []
425444
cat = self.cat
426445
for col in cat:
427-
hasher.update(ascontiguousarray(self.cat[col].cat.codes.to_numpy().data))
446+
hasher.update(
447+
memoryview(ascontiguousarray(self.cat[col].cat.codes.to_numpy().data))
448+
)
428449
cat_hashes.append(hasher.hexdigest())
429-
hasher = _reset(hasher)
450+
hasher.reset()
430451
sorted_hashes = tuple(sorted(cat_hashes))
431452

432453
hashes = []
433454
cont = self.cont
434455
for col in cont:
435-
hasher.update(ascontiguousarray(cont[col].to_numpy()).data)
456+
hasher.update(memoryview(ascontiguousarray(cont[col].to_numpy()).data))
436457
hashes.append(sorted_hashes + (hasher.hexdigest(),))
437-
hasher = _reset(hasher)
458+
hasher.reset()
438459

439460
return sorted(hashes)
440461

@@ -531,26 +552,30 @@ def approx_rank(self) -> int:
531552
@property
532553
def hash(self) -> tuple[tuple[str, ...], ...]:
533554
hashes: list[tuple[str, ...]] = []
534-
hasher = hash_func()
555+
hasher = Hasher()
535556
if self._cat is not None:
536557
for col in self._cat:
537558
hasher.update(
538-
ascontiguousarray(self._cat[col].cat.codes.to_numpy()).data
559+
memoryview(
560+
ascontiguousarray(self._cat[col].cat.codes.to_numpy()).data
561+
)
539562
)
540563
hashes.append((hasher.hexdigest(),))
541-
hasher = _reset(hasher)
564+
hasher.reset()
542565
if self._cont is not None:
543566
for col in self._cont:
544-
hasher.update(ascontiguousarray(self._cont[col].to_numpy()).data)
567+
hasher.update(
568+
memoryview(ascontiguousarray(self._cont[col].to_numpy()).data)
569+
)
545570
hashes.append((hasher.hexdigest(),))
546-
hasher = _reset(hasher)
571+
hasher.reset()
547572
if self._interactions is not None:
548573
for interact in self._interactions:
549574
hashes.extend(interact.hash)
550575
# Add weight hash if provided
551576
if self._weights is not None:
552-
hasher = hash_func()
553-
hasher.update(ascontiguousarray(self._weights.data))
577+
hasher = Hasher()
578+
hasher.update(memoryview(ascontiguousarray(self._weights.data)))
554579
hashes.append((hasher.hexdigest(),))
555580
return tuple(sorted(hashes))
556581

@@ -706,7 +731,7 @@ def __init__(
706731
self._index = self._dependent.rows
707732
self._method = "Absorbing LS"
708733

709-
self._const_col = 0
734+
self._const_col: int | None = 0
710735
self._has_constant = False
711736
self._has_constant_exog = self._check_constant()
712737
self._constant_absorbed = False
@@ -733,7 +758,7 @@ def _drop_missing(self) -> linearmodels.typing.data.BoolArray:
733758
def _check_constant(self) -> bool:
734759
col_delta = ptp(self.exog.ndarray, 0)
735760
has_constant = npany(col_delta == 0)
736-
self._const_col = where(col_delta == 0)[0][0] if has_constant else None
761+
self._const_col = int(where(col_delta == 0)[0][0]) if has_constant else None
737762
return bool(has_constant)
738763

739764
def _check_weights(self) -> None:

linearmodels/iv/data.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
type_err = "Only ndarrays, DataArrays and Series and DataFrames are supported"
1919

2020

21-
def convert_columns(
22-
s: pd.Series, drop_first: bool
23-
) -> linearmodels.typing.data.AnyPandas:
21+
def convert_columns(s: pd.Series, drop_first: bool) -> linearmodels.typing.AnyPandas:
2422
if isinstance(s.dtype, pd.CategoricalDtype):
2523
out = pd.get_dummies(s, drop_first=drop_first)
2624
# TODO: Remove once pandas typing fixed
@@ -172,7 +170,7 @@ def pandas(self) -> pd.DataFrame:
172170
return self._pandas
173171

174172
@property
175-
def ndarray(self) -> linearmodels.typing.data.NumericArray:
173+
def ndarray(self) -> linearmodels.typing.NumericArray:
176174
"""ndarray view of data, always 2d"""
177175
return self._ndarray
178176

linearmodels/panel/_utility.pxi

-4
This file was deleted.

linearmodels/panel/_utility.pyi

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from linearmodels.typing.data import IntArray
2+
3+
def _drop_singletons(
4+
meta: IntArray,
5+
orig_dest: IntArray,
6+
) -> None: ...

linearmodels/panel/data.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ def __init__(self, df: pandas.DataFrame):
6363
@classmethod
6464
def from_array(
6565
cls,
66-
values: linearmodels.typing.data.NumericArray,
67-
items: Sequence[linearmodels.typing.data.Label],
68-
major_axis: Sequence[linearmodels.typing.data.Label],
69-
minor_axis: Sequence[linearmodels.typing.data.Label],
66+
values: linearmodels.typing.NumericArray,
67+
items: Sequence[linearmodels.typing.Label],
68+
major_axis: Sequence[linearmodels.typing.Label],
69+
minor_axis: Sequence[linearmodels.typing.Label],
7070
) -> _Panel:
7171
index = list(product(minor_axis, major_axis))
7272
multi_index = MultiIndex.from_tuples(index)
@@ -103,7 +103,7 @@ def to_frame(self) -> DataFrame:
103103

104104
def convert_columns(
105105
s: pandas.Series, drop_first: bool
106-
) -> linearmodels.typing.data.AnyPandas:
106+
) -> linearmodels.typing.AnyPandas:
107107
if is_string_dtype(s.dtype) and s.map(lambda v: isinstance(v, str)).all():
108108
s = s.astype("category")
109109

@@ -338,18 +338,18 @@ def nentity(self) -> int:
338338
return self._n
339339

340340
@property
341-
def vars(self) -> list[linearmodels.typing.data.Label]:
341+
def vars(self) -> list[linearmodels.typing.Label]:
342342
"""List of variable names"""
343343
return list(self._frame.columns)
344344

345345
@property
346-
def time(self) -> list[linearmodels.typing.data.Label]:
346+
def time(self) -> list[linearmodels.typing.Label]:
347347
"""List of time index names"""
348348
index = self.index
349349
return list(index.levels[1][index.codes[1]].unique())
350350

351351
@property
352-
def entities(self) -> list[linearmodels.typing.data.Label]:
352+
def entities(self) -> list[linearmodels.typing.Label]:
353353
"""List of entity index names"""
354354
index = self.index
355355
return list(index.levels[0][index.codes[0]].unique())

0 commit comments

Comments
 (0)