Skip to content

Commit 6fd57b2

Browse files
findepinrc
andauthored
Fix clippy errors for Rust 1.80 (#11654)
* Fix some new warnings Signed-off-by: Nick Cameron <[email protected]> * Fix formatting errors reported by clippy Newest version of clippy complains when list item continuation is not indented. * Remove redundant reference Reported by clippy * Suppress non-trivial clippy suggestion To be revisited --------- Signed-off-by: Nick Cameron <[email protected]> Co-authored-by: Nick Cameron <[email protected]>
1 parent 5b7e0aa commit 6fd57b2

File tree

59 files changed

+257
-216
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+257
-216
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,5 @@ rpath = false
154154
large_futures = "warn"
155155

156156
[workspace.lints.rust]
157+
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
157158
unused_imports = "deny"

datafusion/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ path = "src/lib.rs"
3939
avro = ["apache-avro"]
4040
backtrace = []
4141
pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
42+
force_hash_collisions = []
4243

4344
[dependencies]
4445
ahash = { workspace = true }

datafusion/common/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ impl ConfigField for TableOptions {
12071207
/// # Parameters
12081208
///
12091209
/// * `key`: The configuration key specifying which setting to adjust, prefixed with the format (e.g., "format.delimiter")
1210-
/// for CSV format.
1210+
/// for CSV format.
12111211
/// * `value`: The value to set for the specified configuration key.
12121212
///
12131213
/// # Returns

datafusion/common/src/hash_utils.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,27 @@
1717

1818
//! Functionality used both on logical and physical plans
1919
20+
#[cfg(not(feature = "force_hash_collisions"))]
2021
use std::sync::Arc;
2122

2223
use ahash::RandomState;
2324
use arrow::array::*;
2425
use arrow::datatypes::*;
2526
use arrow::row::Rows;
27+
#[cfg(not(feature = "force_hash_collisions"))]
2628
use arrow::{downcast_dictionary_array, downcast_primitive_array};
2729
use arrow_buffer::IntervalDayTime;
2830
use arrow_buffer::IntervalMonthDayNano;
2931

32+
#[cfg(not(feature = "force_hash_collisions"))]
3033
use crate::cast::{
3134
as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
3235
as_large_list_array, as_list_array, as_map_array, as_primitive_array,
3336
as_string_array, as_struct_array,
3437
};
35-
use crate::error::{Result, _internal_err};
38+
use crate::error::Result;
39+
#[cfg(not(feature = "force_hash_collisions"))]
40+
use crate::error::_internal_err;
3641

3742
// Combines two hashes into one hash
3843
#[inline]
@@ -41,6 +46,7 @@ pub fn combine_hashes(l: u64, r: u64) -> u64 {
4146
hash.wrapping_mul(37).wrapping_add(r)
4247
}
4348

49+
#[cfg(not(feature = "force_hash_collisions"))]
4450
fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) {
4551
if mul_col {
4652
hashes_buffer.iter_mut().for_each(|hash| {
@@ -90,6 +96,7 @@ hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
9096
/// Builds hash values of PrimitiveArray and writes them into `hashes_buffer`
9197
/// If `rehash==true` this combines the previous hash value in the buffer
9298
/// with the new hash using `combine_hashes`
99+
#[cfg(not(feature = "force_hash_collisions"))]
93100
fn hash_array_primitive<T>(
94101
array: &PrimitiveArray<T>,
95102
random_state: &RandomState,
@@ -135,6 +142,7 @@ fn hash_array_primitive<T>(
135142
/// Hashes one array into the `hashes_buffer`
136143
/// If `rehash==true` this combines the previous hash value in the buffer
137144
/// with the new hash using `combine_hashes`
145+
#[cfg(not(feature = "force_hash_collisions"))]
138146
fn hash_array<T>(
139147
array: T,
140148
random_state: &RandomState,
@@ -180,6 +188,7 @@ fn hash_array<T>(
180188
}
181189

182190
/// Hash the values in a dictionary array
191+
#[cfg(not(feature = "force_hash_collisions"))]
183192
fn hash_dictionary<K: ArrowDictionaryKeyType>(
184193
array: &DictionaryArray<K>,
185194
random_state: &RandomState,
@@ -210,6 +219,7 @@ fn hash_dictionary<K: ArrowDictionaryKeyType>(
210219
Ok(())
211220
}
212221

222+
#[cfg(not(feature = "force_hash_collisions"))]
213223
fn hash_struct_array(
214224
array: &StructArray,
215225
random_state: &RandomState,
@@ -270,6 +280,7 @@ fn hash_map_array(
270280
Ok(())
271281
}
272282

283+
#[cfg(not(feature = "force_hash_collisions"))]
273284
fn hash_list_array<OffsetSize>(
274285
array: &GenericListArray<OffsetSize>,
275286
random_state: &RandomState,
@@ -303,6 +314,7 @@ where
303314
Ok(())
304315
}
305316

317+
#[cfg(not(feature = "force_hash_collisions"))]
306318
fn hash_fixed_list_array(
307319
array: &FixedSizeListArray,
308320
random_state: &RandomState,
@@ -488,7 +500,11 @@ pub fn create_row_hashes_v2<'a>(
488500

489501
#[cfg(test)]
490502
mod tests {
491-
use arrow::{array::*, datatypes::*};
503+
use std::sync::Arc;
504+
505+
use arrow::array::*;
506+
#[cfg(not(feature = "force_hash_collisions"))]
507+
use arrow::datatypes::*;
492508

493509
use super::*;
494510

datafusion/common/src/tree_node.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ macro_rules! handle_transform_recursion {
4343
/// There are three categories of TreeNode APIs:
4444
///
4545
/// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`:
46-
/// [`apply`], [`visit`], [`exists`].
46+
/// [`apply`], [`visit`], [`exists`].
4747
///
4848
/// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s
49-
/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
50-
/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
49+
/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
50+
/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
5151
///
5252
/// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`],
53-
/// and [`map_children`].
53+
/// and [`map_children`].
5454
///
5555
/// | Traversal Order | Inspecting | Transforming |
5656
/// | --- | --- | --- |

datafusion/common/src/utils/memory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::{DataFusionError, Result};
2424
/// # Parameters
2525
/// - `num_elements`: The number of elements expected in the hash table.
2626
/// - `fixed_size`: A fixed overhead size associated with the collection
27-
/// (e.g., HashSet or HashTable).
27+
/// (e.g., HashSet or HashTable).
2828
/// - `T`: The type of elements stored in the hash table.
2929
///
3030
/// # Details

datafusion/common/src/utils/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ pub fn get_at_indices<T: Clone, I: Borrow<usize>>(
335335
/// This function finds the longest prefix of the form 0, 1, 2, ... within the
336336
/// collection `sequence`. Examples:
337337
/// - For 0, 1, 2, 4, 5; we would produce 3, meaning 0, 1, 2 is the longest satisfying
338-
/// prefix.
338+
/// prefix.
339339
/// - For 1, 2, 3, 4; we would produce 0, meaning there is no such prefix.
340340
pub fn longest_consecutive_prefix<T: Borrow<usize>>(
341341
sequence: impl IntoIterator<Item = T>,

datafusion/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ default = [
6262
]
6363
encoding_expressions = ["datafusion-functions/encoding_expressions"]
6464
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
65-
force_hash_collisions = []
65+
force_hash_collisions = ["datafusion-physical-plan/force_hash_collisions", "datafusion-common/force_hash_collisions"]
6666
math_expressions = ["datafusion-functions/math_expressions"]
6767
parquet = ["datafusion-common/parquet", "dep:parquet"]
6868
pyarrow = ["datafusion-common/pyarrow", "parquet"]

datafusion/core/benches/sort.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
//! 1. Creates a list of tuples (sorted if necessary)
2222
//!
2323
//! 2. Divides those tuples across some number of streams of [`RecordBatch`]
24-
//! preserving any ordering
24+
//! preserving any ordering
2525
//!
2626
//! 3. Times how long it takes for a given sort plan to process the input
2727
//!

datafusion/core/src/catalog/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,12 @@ pub trait CatalogList: CatalogProviderList {}
141141
/// Here are some examples of how to implement custom catalogs:
142142
///
143143
/// * [`datafusion-cli`]: [`DynamicFileCatalogProvider`] catalog provider
144-
/// that treats files and directories on a filesystem as tables.
144+
/// that treats files and directories on a filesystem as tables.
145145
///
146146
/// * The [`catalog.rs`]: a simple directory based catalog.
147147
///
148148
/// * [delta-rs]: [`UnityCatalogProvider`] implementation that can
149-
/// read from Delta Lake tables
149+
/// read from Delta Lake tables
150150
///
151151
/// [`datafusion-cli`]: https://datafusion.apache.org/user-guide/cli/index.html
152152
/// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75

datafusion/core/src/dataframe/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,15 @@ impl Default for DataFrameWriteOptions {
114114
/// The typical workflow using DataFrames looks like
115115
///
116116
/// 1. Create a DataFrame via methods on [SessionContext], such as [`read_csv`]
117-
/// and [`read_parquet`].
117+
/// and [`read_parquet`].
118118
///
119119
/// 2. Build a desired calculation by calling methods such as [`filter`],
120-
/// [`select`], [`aggregate`], and [`limit`]
120+
/// [`select`], [`aggregate`], and [`limit`]
121121
///
122122
/// 3. Execute into [`RecordBatch`]es by calling [`collect`]
123123
///
124124
/// A `DataFrame` is a wrapper around a [`LogicalPlan`] and the [`SessionState`]
125-
/// required for execution.
125+
/// required for execution.
126126
///
127127
/// DataFrames are "lazy" in the sense that most methods do not actually compute
128128
/// anything, they just build up a plan. Calling [`collect`] executes the plan

datafusion/core/src/datasource/listing/helpers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use object_store::{ObjectMeta, ObjectStore};
4949
/// This means that if this function returns true:
5050
/// - the table provider can filter the table partition values with this expression
5151
/// - the expression can be marked as `TableProviderFilterPushDown::Exact` once this filtering
52-
/// was performed
52+
/// was performed
5353
pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
5454
let mut is_applicable = true;
5555
expr.apply(|expr| {

datafusion/core/src/datasource/listing/table.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,17 @@ impl ListingOptions {
287287
///# Notes
288288
///
289289
/// - If only one level (e.g. `year` in the example above) is
290-
/// specified, the other levels are ignored but the files are
291-
/// still read.
290+
/// specified, the other levels are ignored but the files are
291+
/// still read.
292292
///
293293
/// - Files that don't follow this partitioning scheme will be
294-
/// ignored.
294+
/// ignored.
295295
///
296296
/// - Since the columns have the same value for all rows read from
297-
/// each individual file (such as dates), they are typically
298-
/// dictionary encoded for efficiency. You may use
299-
/// [`wrap_partition_type_in_dict`] to request a
300-
/// dictionary-encoded type.
297+
/// each individual file (such as dates), they are typically
298+
/// dictionary encoded for efficiency. You may use
299+
/// [`wrap_partition_type_in_dict`] to request a
300+
/// dictionary-encoded type.
301301
///
302302
/// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
303303
///

datafusion/core/src/datasource/physical_plan/parquet/mod.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,32 +119,32 @@ pub use writer::plan_to_parquet;
119119
/// Supports the following optimizations:
120120
///
121121
/// * Concurrent reads: Can read from one or more files in parallel as multiple
122-
/// partitions, including concurrently reading multiple row groups from a single
123-
/// file.
122+
/// partitions, including concurrently reading multiple row groups from a single
123+
/// file.
124124
///
125125
/// * Predicate push down: skips row groups and pages based on
126-
/// min/max/null_counts in the row group metadata, the page index and bloom
127-
/// filters.
126+
/// min/max/null_counts in the row group metadata, the page index and bloom
127+
/// filters.
128128
///
129129
/// * Projection pushdown: reads and decodes only the columns required.
130130
///
131131
/// * Limit pushdown: stop execution early after some number of rows are read.
132132
///
133133
/// * Custom readers: customize reading parquet files, e.g. to cache metadata,
134-
/// coalesce I/O operations, etc. See [`ParquetFileReaderFactory`] for more
135-
/// details.
134+
/// coalesce I/O operations, etc. See [`ParquetFileReaderFactory`] for more
135+
/// details.
136136
///
137137
/// * Schema adapters: read parquet files with different schemas into a unified
138-
/// table schema. This can be used to implement "schema evolution". See
139-
/// [`SchemaAdapterFactory`] for more details.
138+
/// table schema. This can be used to implement "schema evolution". See
139+
/// [`SchemaAdapterFactory`] for more details.
140140
///
141141
/// * metadata_size_hint: controls the number of bytes read from the end of the
142-
/// file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
143-
/// custom reader is used, it supplies the metadata directly and this parameter
144-
/// is ignored. [`ParquetExecBuilder::with_metadata_size_hint`] for more details.
142+
/// file in the initial I/O when the default [`ParquetFileReaderFactory`]. If a
143+
/// custom reader is used, it supplies the metadata directly and this parameter
144+
/// is ignored. [`ParquetExecBuilder::with_metadata_size_hint`] for more details.
145145
///
146146
/// * User provided [`ParquetAccessPlan`]s to skip row groups and/or pages
147-
/// based on external information. See "Implementing External Indexes" below
147+
/// based on external information. See "Implementing External Indexes" below
148148
///
149149
/// # Implementing External Indexes
150150
///
@@ -191,22 +191,22 @@ pub use writer::plan_to_parquet;
191191
/// # Execution Overview
192192
///
193193
/// * Step 1: [`ParquetExec::execute`] is called, returning a [`FileStream`]
194-
/// configured to open parquet files with a [`ParquetOpener`].
194+
/// configured to open parquet files with a [`ParquetOpener`].
195195
///
196196
/// * Step 2: When the stream is polled, the [`ParquetOpener`] is called to open
197-
/// the file.
197+
/// the file.
198198
///
199199
/// * Step 3: The `ParquetOpener` gets the [`ParquetMetaData`] (file metadata)
200-
/// via [`ParquetFileReaderFactory`], creating a [`ParquetAccessPlan`] by
201-
/// applying predicates to metadata. The plan and projections are used to
202-
/// determine what pages must be read.
200+
/// via [`ParquetFileReaderFactory`], creating a [`ParquetAccessPlan`] by
201+
/// applying predicates to metadata. The plan and projections are used to
202+
/// determine what pages must be read.
203203
///
204204
/// * Step 4: The stream begins reading data, fetching the required pages
205-
/// and incrementally decoding them.
205+
/// and incrementally decoding them.
206206
///
207207
/// * Step 5: As each [`RecordBatch]` is read, it may be adapted by a
208-
/// [`SchemaAdapter`] to match the table schema. By default missing columns are
209-
/// filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
208+
/// [`SchemaAdapter`] to match the table schema. By default missing columns are
209+
/// filled with nulls, but this can be customized via [`SchemaAdapterFactory`].
210210
///
211211
/// [`RecordBatch`]: arrow::record_batch::RecordBatch
212212
/// [`SchemaAdapter`]: crate::datasource::schema_adapter::SchemaAdapter

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,14 +1358,14 @@ impl<'a> StatisticsConverter<'a> {
13581358
/// # Parameters:
13591359
///
13601360
/// * `column_page_index`: The parquet column page indices, read from
1361-
/// `ParquetMetaData` column_index
1361+
/// `ParquetMetaData` column_index
13621362
///
13631363
/// * `column_offset_index`: The parquet column offset indices, read from
1364-
/// `ParquetMetaData` offset_index
1364+
/// `ParquetMetaData` offset_index
13651365
///
13661366
/// * `row_group_indices`: The indices of the row groups, that are used to
1367-
/// extract the column page index and offset index on a per row group
1368-
/// per column basis.
1367+
/// extract the column page index and offset index on a per row group
1368+
/// per column basis.
13691369
///
13701370
/// # Return Value
13711371
///
@@ -1486,13 +1486,13 @@ impl<'a> StatisticsConverter<'a> {
14861486
/// # Parameters:
14871487
///
14881488
/// * `column_offset_index`: The parquet column offset indices, read from
1489-
/// `ParquetMetaData` offset_index
1489+
/// `ParquetMetaData` offset_index
14901490
///
14911491
/// * `row_group_metadatas`: The metadata slice of the row groups, read
1492-
/// from `ParquetMetaData` row_groups
1492+
/// from `ParquetMetaData` row_groups
14931493
///
14941494
/// * `row_group_indices`: The indices of the row groups, that are used to
1495-
/// extract the column offset index on a per row group per column basis.
1495+
/// extract the column offset index on a per row group per column basis.
14961496
///
14971497
/// See docs on [`Self::data_page_mins`] for details.
14981498
pub fn data_page_row_counts<I>(

0 commit comments

Comments
 (0)