Skip to content

Commit ad1afcb

Browse files
authored
Merge pull request #477 from martinfleis/ids
API: Change the handling of IDs in from_dataframe constructors
2 parents 4b99c09 + c796673 commit ad1afcb

File tree

4 files changed

+348
-147
lines changed

4 files changed

+348
-147
lines changed

libpysal/weights/contiguity.py

Lines changed: 161 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import warnings
23

34
import numpy
45

@@ -133,10 +134,17 @@ def from_iterable(cls, iterable, sparse=False, **kwargs):
133134

134135
@classmethod
135136
def from_dataframe(
136-
cls, df, geom_col=None, idVariable=None, ids=None, id_order=None, **kwargs
137+
cls,
138+
df,
139+
geom_col=None,
140+
idVariable=None,
141+
ids=None,
142+
id_order=None,
143+
use_index=None,
144+
**kwargs,
137145
):
138146
"""
139-
Construct a weights object from a pandas dataframe with a geometry
147+
Construct a weights object from a (geo)pandas dataframe with a geometry
140148
column. This will cast the polygons to PySAL polygons, then build the W
141149
using ids from the dataframe.
142150
@@ -149,16 +157,24 @@ def from_dataframe(
149157
the name of the column in `df` that contains the
150158
geometries. Defaults to active geometry column.
151159
idVariable : string
160+
DEPRECATED - use `ids` instead.
152161
the name of the column to use as IDs. If nothing is
153162
provided, the dataframe index is used
154-
ids : list
155-
a list of ids to use to index the spatial weights object.
156-
Order is not respected from this list.
163+
ids : list-like, string
164+
a list-like of ids to use to index the spatial weights object or
165+
the name of the column to use as IDs. If nothing is
166+
provided, the dataframe index is used if `use_index=True` or
167+
a positional index is used if `use_index=False`.
168+
Order of the resulting W is not respected from this list.
157169
id_order : list
158-
an ordered list of ids to use to index the spatial weights
170+
DEPRECATED - argument is deprecated and will be removed.
171+
An ordered list of ids to use to index the spatial weights
159172
object. If used, the resulting weights object will iterate
160173
over results in the order of the names provided in this
161-
argument.
174+
argument.
175+
use_index : bool
176+
use index of `df` as `ids` to index the spatial weights object.
177+
Defaults to False but in future will default to True.
162178
163179
See Also
164180
--------
@@ -167,17 +183,62 @@ def from_dataframe(
167183
"""
168184
if geom_col is None:
169185
geom_col = df.geometry.name
186+
170187
if id_order is not None:
188+
warnings.warn(
189+
"`id_order` is deprecated and will be removed in future.",
190+
FutureWarning,
191+
stacklevel=2,
192+
)
171193
if id_order is True and ((idVariable is not None) or (ids is not None)):
172194
# if idVariable is None, we want ids. Otherwise, we want the
173195
# idVariable column
174196
id_order = list(df.get(idVariable, ids))
175197
else:
176198
id_order = df.get(id_order, ids)
177-
elif idVariable is not None:
178-
ids = df.get(idVariable).tolist()
179-
elif isinstance(ids, str):
180-
ids = df.get(ids).tolist()
199+
200+
if idVariable is not None:
201+
if ids is None:
202+
warnings.warn(
203+
"`idVariable` is deprecated and will be removed in future. "
204+
"Use `ids` instead.",
205+
FutureWarning,
206+
stacklevel=2,
207+
)
208+
ids = idVariable
209+
else:
210+
warnings.warn(
211+
"Both `idVariable` and `ids` passed, using `ids`.",
212+
UserWarning,
213+
stacklevel=2,
214+
)
215+
216+
if ids is None:
217+
if use_index is None:
218+
warnings.warn(
219+
"`use_index` defaults to False but will default to True in future. "
220+
"Set True/False directly to control this behavior and silence this "
221+
"warning",
222+
FutureWarning,
223+
stacklevel=2,
224+
)
225+
use_index = False
226+
if use_index:
227+
ids = df.index.tolist()
228+
229+
else:
230+
if isinstance(ids, str):
231+
ids = df[ids]
232+
233+
if not isinstance(ids, list):
234+
ids = ids.tolist()
235+
236+
if len(ids) != len(df):
237+
raise ValueError("The length of `ids` does not match the length of df.")
238+
239+
if id_order is None:
240+
id_order = ids
241+
181242
return cls.from_iterable(
182243
df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs
183244
)
@@ -227,7 +288,7 @@ def from_xarray(
227288
Returns
228289
-------
229290
w : libpysal.weights.W/libpysal.weights.WSP
230-
instance of spatial weights class W or WSP with an index attribute
291+
instance of spatial weights class W or WSP with an index attribute
231292
232293
Notes
233294
-----
@@ -358,9 +419,18 @@ def from_iterable(cls, iterable, sparse=False, **kwargs):
358419
return w
359420

360421
@classmethod
361-
def from_dataframe(cls, df, geom_col=None, **kwargs):
422+
def from_dataframe(
423+
cls,
424+
df,
425+
geom_col=None,
426+
idVariable=None,
427+
ids=None,
428+
id_order=None,
429+
use_index=None,
430+
**kwargs,
431+
):
362432
"""
363-
Construct a weights object from a pandas dataframe with a geometry
433+
Construct a weights object from a (geo)pandas dataframe with a geometry
364434
column. This will cast the polygons to PySAL polygons, then build the W
365435
using ids from the dataframe.
366436
@@ -371,46 +441,93 @@ def from_dataframe(cls, df, geom_col=None, **kwargs):
371441
for spatial weights
372442
geom_col : string
373443
the name of the column in `df` that contains the
374-
geometries. Defaults to active geometry column
444+
geometries. Defaults to active geometry column.
375445
idVariable : string
446+
DEPRECATED - use `ids` instead.
376447
the name of the column to use as IDs. If nothing is
377448
provided, the dataframe index is used
378-
ids : list
379-
a list of ids to use to index the spatial weights object.
380-
Order is not respected from this list.
449+
ids : list-like, string
450+
a list-like of ids to use to index the spatial weights object or
451+
the name of the column to use as IDs. If nothing is
452+
provided, the dataframe index is used if `use_index=True` or
453+
a positional index is used if `use_index=False`.
454+
Order of the resulting W is not respected from this list.
381455
id_order : list
382-
an ordered list of ids to use to index the spatial weights
456+
DEPRECATED - argument is deprecated and will be removed.
457+
An ordered list of ids to use to index the spatial weights
383458
object. If used, the resulting weights object will iterate
384459
over results in the order of the names provided in this
385-
argument.
460+
argument.
461+
use_index : bool
462+
use index of `df` as `ids` to index the spatial weights object.
463+
Defaults to False but in future will default to True.
386464
387465
See Also
388466
--------
389467
:class:`libpysal.weights.weights.W`
390468
:class:`libpysal.weights.contiguity.Queen`
391469
"""
392-
idVariable = kwargs.pop("idVariable", None)
393-
ids = kwargs.pop("ids", None)
394-
id_order = kwargs.pop("id_order", None)
395470
if geom_col is None:
396471
geom_col = df.geometry.name
472+
397473
if id_order is not None:
474+
warnings.warn(
475+
"`id_order` is deprecated and will be removed in future.",
476+
FutureWarning,
477+
stacklevel=2,
478+
)
398479
if id_order is True and ((idVariable is not None) or (ids is not None)):
399480
# if idVariable is None, we want ids. Otherwise, we want the
400481
# idVariable column
401-
ids = list(df.get(idVariable, ids))
402-
id_order = ids
403-
elif isinstance(id_order, str):
404-
ids = df.get(id_order, ids)
405-
id_order = ids
406-
elif idVariable is not None:
407-
ids = df.get(idVariable).tolist()
408-
elif isinstance(ids, str):
409-
ids = df.get(ids).tolist()
410-
w = cls.from_iterable(
482+
id_order = list(df.get(idVariable, ids))
483+
else:
484+
id_order = df.get(id_order, ids)
485+
486+
if idVariable is not None:
487+
if ids is None:
488+
warnings.warn(
489+
"`idVariable` is deprecated and will be removed in future. "
490+
"Use `ids` instead.",
491+
FutureWarning,
492+
stacklevel=2,
493+
)
494+
ids = idVariable
495+
else:
496+
warnings.warn(
497+
"Both `idVariable` and `ids` passed, using `ids`.",
498+
UserWarning,
499+
stacklevel=2,
500+
)
501+
502+
if ids is None:
503+
if use_index is None:
504+
warnings.warn(
505+
"`use_index` defaults to False but will default to True in future. "
506+
"Set True/False directly to control this behavior and silence this "
507+
"warning",
508+
FutureWarning,
509+
stacklevel=2,
510+
)
511+
use_index = False
512+
if use_index:
513+
ids = df.index.tolist()
514+
515+
else:
516+
if isinstance(ids, str):
517+
ids = df[ids]
518+
519+
if not isinstance(ids, list):
520+
ids = ids.tolist()
521+
522+
if len(ids) != len(df):
523+
raise ValueError("The length of `ids` does not match the length of df.")
524+
525+
if id_order is None:
526+
id_order = ids
527+
528+
return cls.from_iterable(
411529
df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs
412530
)
413-
return w
414531

415532
@classmethod
416533
def from_xarray(
@@ -457,7 +574,7 @@ def from_xarray(
457574
Returns
458575
-------
459576
w : libpysal.weights.W/libpysal.weights.WSP
460-
instance of spatial weights class W or WSP with an index attribute
577+
instance of spatial weights class W or WSP with an index attribute
461578
462579
Notes
463580
-----
@@ -526,17 +643,17 @@ def Voronoi(points, criterion="rook", clip="ahull", **kwargs):
526643

527644
def _from_dataframe(df, **kwargs):
528645
"""
529-
Construct a voronoi contiguity weight directly from a dataframe.
646+
Construct a voronoi contiguity weight directly from a dataframe.
530647
Note that if criterion='rook', this is identical to the delaunay
531-
graph for the points.
648+
graph for the points if no clipping of the voronoi cells is applied.
532649
533650
If the input dataframe is of any other geometry type than "Point",
534-
a value error is raised.
651+
a value error is raised.
535652
536653
Parameters
537654
----------
538655
df : pandas.DataFrame
539-
dataframe containing point geometries for a
656+
dataframe containing point geometries for a
540657
voronoi diagram.
541658
542659
Returns
@@ -561,27 +678,27 @@ def _from_dataframe(df, **kwargs):
561678

562679
def _build(polygons, criterion="rook", ids=None):
563680
"""
564-
This is a developer-facing function to construct a spatial weights object.
681+
This is a developer-facing function to construct a spatial weights object.
565682
566683
Parameters
567684
----------
568685
polygons : list
569686
list of pysal polygons to use to build contiguity
570687
criterion : string
571-
option of which kind of contiguity to build. Is either "rook" or "queen"
688+
option of which kind of contiguity to build. Is either "rook" or "queen"
572689
ids : list
573690
list of ids to use to index the neighbor dictionary
574691
575692
Returns
576693
-------
577694
tuple containing (neighbors, ids), where neighbors is a dictionary
578695
describing contiguity relations and ids is the list of ids used to index
579-
that dictionary.
696+
that dictionary.
580697
581698
NOTE: this is different from the prior behavior of buildContiguity, which
582699
returned an actual weights object. Since this just dispatches for the
583700
classes above, this returns the raw ingredients for a spatial weights
584-
object, not the object itself.
701+
object, not the object itself.
585702
"""
586703
if ids and len(ids) != len(set(ids)):
587704
raise ValueError(
@@ -621,7 +738,7 @@ def buildContiguity(polygons, criterion="rook", ids=None):
621738
This is a deprecated function.
622739
623740
It builds a contiguity W from the polygons provided. As such, it is now
624-
identical to calling the class constructors for Rook or Queen.
741+
identical to calling the class constructors for Rook or Queen.
625742
"""
626743
# Warn('This function is deprecated. Please use the Rook or Queen classes',
627744
# UserWarning)

0 commit comments

Comments
 (0)