Skip to content

Commit 4d37173

Browse files
committed
Adapt docstrings as to appease pchs.
1 parent 90265e5 commit 4d37173

File tree

7 files changed

+2218
-3561
lines changed

7 files changed

+2218
-3561
lines changed

pixi.lock

+2,188-3,535
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/datajudge/__init__.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
"""datajudge allows to assess whether data from database complies with reference
2-
information.
3-
"""
1+
"""datajudge allows to assess whether data from database complies with referenceinformation."""
42

53
from .constraints.base import Constraint
64
from .db_access import Condition, DataSource

src/datajudge/constraints/stats.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ def __init__(
2424

2525
@staticmethod
2626
def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | None:
27-
"""
28-
Calculates the approximate p-value according to
27+
"""Calculate the approximate p-value.
2928
29+
The computation is according to
3030
'A procedure to find exact critical values of Kolmogorov-Smirnov Test', Silvia Fachinetti, 2009
3131
3232
Note: For environments with `scipy` installed, this method will return a quasi-exact p-value.
@@ -57,8 +57,11 @@ def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | Non
5757
def check_acceptance(
5858
d_statistic: float, n_samples: int, m_samples: int, accepted_level: float
5959
) -> bool:
60-
"""For a given test statistic, d, and the respective sample sizes `n` and `m`, this function
61-
checks whether the null hypothesis can be rejected for an accepted significance level.
60+
"""
61+
Check whether the null hypothesis can be rejected for an accepted significance level.
62+
63+
`d_statistic is the test statistic of interest, and `n_samples` and `m_samples`
64+
correspond to the respective sample sizes.
6265
6366
For more information, check out the `Wikipedia entry <https://w.wiki/5May>`_.
6467
"""

src/datajudge/constraints/uniques.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,9 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
395395

396396

397397
class CategoricalBoundConstraint(Constraint):
398-
"""`CategoricalBoundConstraint` is a constraint class that checks if the share of specific values
399-
in a column falls within predefined bounds. It compares the actual distribution of values in a
398+
"""Constraint that checks if the share of specific values in a column falls within predefined bounds.
399+
400+
It compares the actual distribution of values in a
400401
`DataSource` column with a target distribution, supplied as a dictionary.
401402
402403
Example use cases include testing for consistency in columns with expected categorical values

src/datajudge/db_access.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,7 @@ def get_table_columns(
4444

4545

4646
def apply_patches(engine: sa.engine.Engine) -> None:
47-
"""
48-
Apply patches to e.g. specific dialect not implemented by sqlalchemy
49-
"""
47+
"""Apply patches to e.g. specific dialect not implemented by sqlalchemy."""
5048
if is_bigquery(engine):
5149
# Patch for the EXCEPT operator (see BigQuery set operators
5250
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators)
@@ -499,7 +497,7 @@ def get_interval_overlaps_nd(
499497
end_columns: list[str],
500498
end_included: bool,
501499
) -> tuple[sa.sql.selectable.CompoundSelect, sa.sql.selectable.Select]:
502-
"""Create selectables for interval overlaps in n dimensions.
500+
r"""Create selectables for interval overlaps in n dimensions.
503501
504502
We define the presence of 'overlap' as presence of a non-empty intersection
505503
between two intervals.
@@ -938,7 +936,7 @@ def get_column(
938936
aggregate_operator: Callable | None = None,
939937
) -> tuple[Any, list[sa.Select]]:
940938
"""
941-
Queries the database for the values of the relevant column (as returned by `get_column(...)`).
939+
Query the database for the values of the relevant column (as returned by `get_column(...)`).
942940
943941
If an aggregation operation is passed, the results are aggregated accordingly
944942
and a single scalar value is returned.

src/datajudge/requirements.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -502,9 +502,7 @@ def add_categorical_bound_constraint(
502502
name: str | None = None,
503503
cache_size=None,
504504
) -> None:
505-
"""
506-
Check if the distribution of unique values in columns falls within the
507-
specified minimum and maximum bounds.
505+
"""Check if the distribution of unique values in columns falls within the specified minimum and maximum bounds.
508506
509507
The ``CategoricalBoundConstraint`` is added to ensure the distribution of unique values
510508
in the specified columns of a ``DataSource`` falls within the given minimum and maximum
@@ -2012,7 +2010,7 @@ def add_column_type_constraint(
20122010
name: str | None = None,
20132011
cache_size=None,
20142012
) -> None:
2015-
"Check that the columns have the same type."
2013+
"""Check that the columns have the same type."""
20162014
ref1 = DataReference(self.data_source, [column1])
20172015
ref2 = DataReference(self.data_source2, [column2])
20182016
self._constraints.append(

src/datajudge/utils.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@ def format_difference(
1111
n1: float | int, n2: float | int, decimal_separator: bool = True
1212
) -> tuple[str, str]:
1313
"""
14+
Format and highlight how two numbers differ.
15+
1416
Given two numbers, n1 and n2, return a tuple of two strings,
1517
each representing one of the input numbers with the differing part highlighted.
1618
Highlighting is done using BBCode-like tags, which are replaced by the formatter.
1719
18-
Examples:
20+
Examples
21+
--------
1922
123, 123.0
2023
-> 123, 123[numDiff].0[/numDiff]
2124
122593859432, 122593859432347
@@ -26,7 +29,8 @@ def format_difference(
2629
- n2: The second number to compare.
2730
- decimal_separator: Whether to separate the decimal part of the numbers with commas.
2831
29-
Returns:
32+
Returns
33+
-------
3034
- A tuple of two strings, each representing one of the input numbers with the differing part highlighted.
3135
"""
3236
if decimal_separator:
@@ -58,8 +62,9 @@ def output_processor_sort(
5862
collection: Collection, counts: Collection | None = None
5963
) -> tuple[Collection, Collection | None]:
6064
"""
61-
Sorts a collection of tuple elements in descending order of their counts,
62-
and for ties, makes use of the ascending order of the elements themselves.
65+
Sorts a collection of tuple elements in descending order of their counts.
66+
67+
If ties exist, the ascending order of the elements themselves is used.
6368
6469
If the first element is not instanceof tuple,
6570
each element will be transparently packaged into a 1-tuple for processing;
@@ -93,11 +98,12 @@ def output_processor_limit(
9398
collection: Collection, counts: Collection | None = None, limit: int = 100
9499
) -> tuple[Collection, Collection | None]:
95100
"""
96-
Limits the collection to the first ``limit`` elements.
97-
If the list was shortened,
98-
will add a ``limit+1``-th string element,
101+
Limits the collection to the first `limit` elements.
102+
103+
If the list was shortened, will add a `limit+1`-th string element,
99104
informing the user of the truncation.
100-
The default limit of ``100`` can be adjusted using ``functools.partial``
105+
106+
The default limit of ``100`` can be adjusted using `functools.partial`.
101107
"""
102108
collection = list(collection)
103109

0 commit comments

Comments
 (0)