Skip to content

Commit dc505a9

Browse files
committed
Add missing drop_duplicates keyword to read_csv
1 parent 2c58d7c commit dc505a9

File tree

7 files changed

+18
-11
lines changed

7 files changed

+18
-11
lines changed

bandicoot/io.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,8 @@ def load(name, records, antennas, attributes=None, recharges=None,
408408

409409

410410
def _read_network(user, records_path, attributes_path, read_function,
411-
antennas_path=None, warnings=True, extension=".csv"):
411+
antennas_path=None, warnings=True, extension=".csv",
412+
**kwargs):
412413
connections = {}
413414
correspondents = Counter([r.correspondent_id for r in user.records])
414415

@@ -419,7 +420,7 @@ def _read_network(user, records_path, attributes_path, read_function,
419420
connections[c_id] = read_function(c_id, records_path,
420421
antennas_path, attributes_path,
421422
describe=False, network=False,
422-
warnings=False)
423+
warnings=False, **kwargs)
423424
else:
424425
connections[c_id] = None
425426

@@ -479,7 +480,7 @@ def _load_recharges(path):
479480

480481
def read_csv(user_id, records_path, antennas_path=None, attributes_path=None,
481482
recharges_path=None, network=False, duration_format='seconds',
482-
describe=True, warnings=True, errors=False):
483+
describe=True, warnings=True, errors=False, drop_duplicates=False):
483484
"""
484485
Load user records from a CSV file.
485486
@@ -521,6 +522,10 @@ def read_csv(user_id, records_path, antennas_path=None, attributes_path=None,
521522
If errors is True, returns a tuple (user, errors), where user is the
522523
user object and errors are the records which could not be loaded.
523524
525+
drop_duplicates : boolean
526+
If drop_duplicates, remove “duplicated records“ (same correspondants,
527+
direction, date and time). Not activated by default.
528+
524529
525530
Examples
526531
--------
@@ -574,12 +579,14 @@ def read_csv(user_id, records_path, antennas_path=None, attributes_path=None,
574579

575580
user, bad_records = load(user_id, records, antennas, attributes, recharges,
576581
antennas_path, attributes_path, recharges_path,
577-
describe=False, warnings=warnings)
582+
describe=False, warnings=warnings,
583+
drop_duplicates=drop_duplicates)
578584

579585
# Loads the network
580586
if network is True:
581587
user.network = _read_network(user, records_path, attributes_path,
582-
read_csv, antennas_path, warnings)
588+
read_csv, antennas_path, warnings,
589+
drop_duplicates=drop_duplicates)
583590
user.recompute_missing_neighbors()
584591

585592
if describe:

bandicoot/tests/samples/regressions/ego.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"reporting__attributes_path": "samples/attributes",
66
"reporting__recharges_path": "samples/attributes",
77
"reporting__version": "0.5.2",
8-
"reporting__code_signature": "cffc57cb9f8374de29b0fee6eb6147d2abd5d047",
8+
"reporting__code_signature": "13da1a1e7785230ea1e7fded3b43afefa76a0ece",
99
"reporting__groupby": "week",
1010
"reporting__split_week": true,
1111
"reporting__split_day": true,

bandicoot/tests/samples/regressions/empty_user.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"reporting__attributes_path": null,
66
"reporting__recharges_path": null,
77
"reporting__version": "0.5.2",
8-
"reporting__code_signature": "cffc57cb9f8374de29b0fee6eb6147d2abd5d047",
8+
"reporting__code_signature": "13da1a1e7785230ea1e7fded3b43afefa76a0ece",
99
"reporting__groupby": "week",
1010
"reporting__split_week": true,
1111
"reporting__split_day": true,

bandicoot/tests/samples/regressions/manual_a.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"reporting__attributes_path": null,
66
"reporting__recharges_path": null,
77
"reporting__version": "0.5.2",
8-
"reporting__code_signature": "cffc57cb9f8374de29b0fee6eb6147d2abd5d047",
8+
"reporting__code_signature": "13da1a1e7785230ea1e7fded3b43afefa76a0ece",
99
"reporting__groupby": "week",
1010
"reporting__split_week": true,
1111
"reporting__split_day": true,

bandicoot/tests/samples/regressions/manual_a_orange_network.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"reporting__attributes_path": "samples/attributes",
66
"reporting__recharges_path": "samples/attributes",
77
"reporting__version": "0.5.2",
8-
"reporting__code_signature": "cffc57cb9f8374de29b0fee6eb6147d2abd5d047",
8+
"reporting__code_signature": "13da1a1e7785230ea1e7fded3b43afefa76a0ece",
99
"reporting__groupby": "week",
1010
"reporting__split_week": true,
1111
"reporting__split_day": true,

bandicoot/tests/samples/regressions/sample_user.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"reporting__attributes_path": null,
66
"reporting__recharges_path": null,
77
"reporting__version": "0.5.2",
8-
"reporting__code_signature": "cffc57cb9f8374de29b0fee6eb6147d2abd5d047",
8+
"reporting__code_signature": "13da1a1e7785230ea1e7fded3b43afefa76a0ece",
99
"reporting__groupby": null,
1010
"reporting__split_week": true,
1111
"reporting__split_day": true,

docs/data_integrity.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ In this example, six records were removed:
5858

5959
bandicoot also offer the option to remove “duplicated records“ (same
6060
correspondants, direction, date and time). The option ``drop_duplicates=True``
61-
in :meth:`~bandicoot.io.read_csv` is not activated by defaul, as one user
61+
in :meth:`~bandicoot.io.read_csv` is not activated by default, as one user
6262
might send multiple text messages in less than one minute (or less, depending
6363
on the granularity of the data set).
6464

0 commit comments

Comments
 (0)