Skip to content

Commit f67374f

Browse files
committed
update error codes
1 parent 58bee19 commit f67374f

File tree

4 files changed

+59
-21
lines changed

4 files changed

+59
-21
lines changed

pyproject.toml

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,29 +63,69 @@ select = [
6363
]
6464

6565
ignore = [
66-
"D104", # Missing docstring in public package
67-
"D100", # Missing docstring in public module
68-
"D211", # No blank line before class
69-
"PD901", # Avoid using 'df' for pandas dataframes. Perfectly fine in functions with limited scope
66+
"D104", # Missing docstring in public package
67+
"D100", # Missing docstring in public module
68+
"D211", # No blank line before class
69+
"PD901", # Avoid using 'df' for pandas dataframes. Perfectly fine in functions with limited scope
7070
"ANN201", # Missing return type annotation for public function (makes no sense for NoneType return types...)
7171
"ANN101", # Missing type annotation for `self`
7272
"ANN204", # Missing return type annotation for special method
7373
"ANN002", # Missing type annotation for `*args`
7474
"ANN003", # Missing type annotation for `**kwargs`
75-
"D105", # Missing docstring in magic method
76-
"D203", # 1 blank line before after class docstring
77-
"D204", # 1 blank line required after class docstring
78-
"D413", # 1 blank line after parameters
75+
"D105", # Missing docstring in magic method
76+
"D203", # 1 blank line before after class docstring
77+
"D204", # 1 blank line required after class docstring
78+
"D413", # 1 blank line after parameters
7979
"SIM108", # Simplify if/else to one line; not always clearer
80-
"D206", # Docstrings should be indented with spaces; unnecessary when running ruff-format
81-
"E501", # Line length too long; unnecessary when running ruff-format
82-
"W191", # Indentation contains tabs; unnecessary when running ruff-format
80+
"D206", # Docstrings should be indented with spaces; unnecessary when running ruff-format
81+
"E501", # Line length too long; unnecessary when running ruff-format
82+
"W191", # Indentation contains tabs; unnecessary when running ruff-format
8383

8484
# REMOVE AFTER FIXING
8585
"ANN001", # Missing type annotation for function argument `args`
8686
"ANN202", # Missing Missing return type annotation for private function
87-
"D103", # Missing docstring in public function
88-
"D101", # Missing docstring in public class
87+
"D103", # Missing docstring in public function
88+
"D101", # Missing docstring in public class
89+
"PT009", # Use a regular `assert` instead of unittest-style `assertEqual`
90+
"D102", # Missing docstring in public method
91+
"UP031", # Use format specifiers instead of percent format
92+
"D401", # First line of docstring should be in imperative mood: "Loads the vocabulary from the specified path."
93+
"RET505", # Unnecessary `elif` after `return` statement
94+
"D107", # Missing docstring in `__init__`,
95+
"PT027", # Use `pytest.raises` instead of unittest-style `assertRaisesRegex`
96+
"SIM101", # Multiple `isinstance` calls for `maybe_collection`, merge into a single call
97+
"FIX002", # Line contains TODO, consider resolving the issue
98+
"SIM103", # Return the condition directly
99+
"UP008", # Use `super()` instead of `super(__class__, self)`
100+
"N802", # Function name should be lowercase,
101+
"B008", # Do not perform function call in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable
102+
"E731", # Do not assign a `lambda` expression, use a `def`
103+
"ERA001", # Found commented-out code
104+
"B005", # Using `.strip()` with multi-character strings is misleading
105+
"SIM117", # Use a single `with` statement with multiple contexts instead of nested `with` statements
106+
"B904", # Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
107+
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed in `domain`
108+
"D417", # Missing argument descriptions in the docstring
109+
"NPY002", # Replace legacy
110+
"ARG001", # Unused function argument
111+
"D404", # First word of the docstring should not be "This"
112+
"SIM102", # Use a single `if` statement instead of nested `if` statements
113+
"UP028", # Replace `yield` over `for` loop with `yield from`
114+
"RET504", # Unnecessary assignment to variable before `return` statement
115+
"PD011", # Use `.to_numpy()` instead of `.values`
116+
"ANN206", # Missing return type annotation for classmethod
117+
"ANN102", # Missing type annotation for `cls` in classmethod
118+
"PD015", # Use `.merge` method instead of `pd.merge` function
119+
"PD003", # `.isna` is preferred to `.isnull`; functionality is equivalent
120+
"ANN205", # Missing return type annotation for staticmethod
121+
"B007", # Loop control variable not used within loop body
122+
"SIM211", # Use `not ...` instead of `False if ... else True`
123+
"ARG002", # Unused method argument
124+
"PD002", # `inplace=True` should be avoided; it has inconsistent behavior
125+
"F821", # Undefined name
126+
"SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass`
127+
"PT018", # Assertion should be broken down into multiple parts
128+
"E741", # Ambiguous variable name
89129
]
90130

91131

tensorflow_data_validation/utils/mutual_information_util_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def testCategoricalOrdinal(self):
389389
# using whatever log base we're using, in this case base 2.
390390
a = np.array([i % 2 for i in range(1000)])
391391
b = np.array([np.random.random() * (1.0 + i % 2) for i in range(1000)])
392-
filt = np.array([True if i % 2 else False for i in range(1000)])
392+
filt = np.array([bool(i % 2) for i in range(1000)])
393393
for method in ["smaller_data", "larger_data"]:
394394
self.assertAlmostEqual(
395395
-0.75 * np.log2(0.75),

tensorflow_data_validation/utils/schema_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def set_domain(
177177

178178
for d_type, d_name in feature_domains.items():
179179
if isinstance(domain, d_type):
180-
if d_type == str:
180+
if d_type is str:
181181
found_domain = False
182182
for global_domain in schema.string_domain:
183183
if global_domain.name == domain:

tensorflow_data_validation/utils/slicing_util.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def feature_value_slicer(
166166
_PARENT_INDEX_COLUMN: value_parent_indices,
167167
}
168168
)
169-
df.drop_duplicates(inplace=True)
169+
df = df.drop_duplicates()
170170
# Filter based on slice values
171171
if values is not None:
172172
df = df.loc[df[feature_name].isin(values)]
@@ -183,8 +183,7 @@ def feature_value_slicer(
183183
# we expect the merged dataframe to have sorted parent indices per
184184
# slice key.
185185
merged_df = functools.reduce(
186-
lambda base, update: pd.merge(
187-
base,
186+
lambda base, update: base.merge(
188187
update,
189188
how="inner", # pylint: disable=g-long-lambda
190189
on=_PARENT_INDEX_COLUMN,
@@ -224,7 +223,7 @@ def feature_value_slicer(
224223
return feature_value_slicer
225224

226225

227-
def _to_slice_key(feature_value: Any):
226+
def _to_slice_key(feature_value: Any): # noqa: ANN401
228227
"""Decode slice key as UTF-8."""
229228
# For bytes features we try decoding it as utf-8 (and throw an error if
230229
# fails). This is because in stats proto the slice name (dataset name) is a
@@ -260,8 +259,7 @@ def generate_slices(
260259
"""
261260
for slice_fn in slice_functions:
262261
try:
263-
for sliced_record_batch in slice_fn(record_batch, **kwargs):
264-
yield sliced_record_batch
262+
yield from slice_fn(record_batch, **kwargs)
265263
except Exception as e:
266264
raise ValueError(
267265
"One of the slice_functions %s raised an exception: %s."

0 commit comments

Comments
 (0)