16
16
// under the License.
17
17
18
18
use fnv:: FnvHashSet ;
19
-
19
+ use serde_bytes :: ByteBuf ;
20
20
use crate :: expr:: visitors:: bound_predicate_visitor:: { BoundPredicateVisitor , visit} ;
21
21
use crate :: expr:: { BoundPredicate , BoundReference } ;
22
22
use crate :: spec:: { Datum , FieldSummary , ManifestFile , PrimitiveLiteral , Type } ;
@@ -42,13 +42,13 @@ impl ManifestEvaluator {
42
42
/// see if this `ManifestFile` could possibly contain data that matches
43
43
/// the scan's filter.
44
44
pub ( crate ) fn eval ( & self , manifest_file : & ManifestFile ) -> Result < bool > {
45
- if manifest_file. partitions . is_empty ( ) {
46
- return Ok ( true ) ;
45
+ match & manifest_file. partitions {
46
+ Some ( p) if !p. is_empty ( ) => {
47
+ let mut evaluator = ManifestFilterVisitor :: new ( p) ;
48
+ visit ( & mut evaluator, & self . partition_filter )
49
+ }
50
+ _ => Ok ( true )
47
51
}
48
-
49
- let mut evaluator = ManifestFilterVisitor :: new ( & manifest_file. partitions ) ;
50
-
51
- visit ( & mut evaluator, & self . partition_filter )
52
52
}
53
53
}
54
54
@@ -154,9 +154,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
154
154
_predicate : & BoundPredicate ,
155
155
) -> crate :: Result < bool > {
156
156
let field = self . field_summary_for_reference ( reference) ;
157
+
157
158
match & field. lower_bound {
158
- Some ( bound) if datum <= bound => ROWS_CANNOT_MATCH ,
159
- Some ( _) => ROWS_MIGHT_MATCH ,
159
+ Some ( bound_bytes) => {
160
+ let bound = ManifestFilterVisitor :: bytes_to_datum ( bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
161
+ if datum <= & bound {
162
+ ROWS_CANNOT_MATCH
163
+ } else {
164
+ ROWS_MIGHT_MATCH
165
+ }
166
+ }
160
167
None => ROWS_CANNOT_MATCH ,
161
168
}
162
169
}
@@ -169,8 +176,14 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
169
176
) -> crate :: Result < bool > {
170
177
let field = self . field_summary_for_reference ( reference) ;
171
178
match & field. lower_bound {
172
- Some ( bound) if datum < bound => ROWS_CANNOT_MATCH ,
173
- Some ( _) => ROWS_MIGHT_MATCH ,
179
+ Some ( bound_bytes) => {
180
+ let bound = ManifestFilterVisitor :: bytes_to_datum ( bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
181
+ if datum < & bound {
182
+ ROWS_CANNOT_MATCH
183
+ } else {
184
+ ROWS_MIGHT_MATCH
185
+ }
186
+ }
174
187
None => ROWS_CANNOT_MATCH ,
175
188
}
176
189
}
@@ -183,8 +196,14 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
183
196
) -> crate :: Result < bool > {
184
197
let field = self . field_summary_for_reference ( reference) ;
185
198
match & field. upper_bound {
186
- Some ( bound) if datum >= bound => ROWS_CANNOT_MATCH ,
187
- Some ( _) => ROWS_MIGHT_MATCH ,
199
+ Some ( bound_bytes) => {
200
+ let bound = ManifestFilterVisitor :: bytes_to_datum ( bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
201
+ if datum >= & bound {
202
+ ROWS_CANNOT_MATCH
203
+ } else {
204
+ ROWS_MIGHT_MATCH
205
+ }
206
+ }
188
207
None => ROWS_CANNOT_MATCH ,
189
208
}
190
209
}
@@ -197,8 +216,14 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
197
216
) -> crate :: Result < bool > {
198
217
let field = self . field_summary_for_reference ( reference) ;
199
218
match & field. upper_bound {
200
- Some ( bound) if datum > bound => ROWS_CANNOT_MATCH ,
201
- Some ( _) => ROWS_MIGHT_MATCH ,
219
+ Some ( bound_bytes) => {
220
+ let bound = ManifestFilterVisitor :: bytes_to_datum ( bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
221
+ if datum > & bound {
222
+ ROWS_CANNOT_MATCH
223
+ } else {
224
+ ROWS_MIGHT_MATCH
225
+ }
226
+ }
202
227
None => ROWS_CANNOT_MATCH ,
203
228
}
204
229
}
@@ -215,14 +240,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
215
240
return ROWS_CANNOT_MATCH ;
216
241
}
217
242
218
- if let Some ( lower_bound) = & field. lower_bound {
219
- if lower_bound > datum {
243
+ if let Some ( lower_bound_bytes) = & field. lower_bound {
244
+ let lower_bound = ManifestFilterVisitor :: bytes_to_datum ( lower_bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
245
+ if datum > & lower_bound {
220
246
return ROWS_CANNOT_MATCH ;
221
247
}
222
248
}
223
249
224
- if let Some ( upper_bound) = & field. upper_bound {
225
- if upper_bound < datum {
250
+ if let Some ( upper_bound_bytes) = & field. upper_bound {
251
+ let upper_bound = ManifestFilterVisitor :: bytes_to_datum ( upper_bound_bytes, reference. field ( ) . field_type . clone ( ) ) ;
252
+ if datum < & upper_bound {
226
253
return ROWS_CANNOT_MATCH ;
227
254
}
228
255
}
@@ -260,23 +287,15 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
260
287
let prefix_len = prefix. len ( ) ;
261
288
262
289
if let Some ( lower_bound) = & field. lower_bound {
263
- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
264
- lower_bound,
265
- "Cannot perform starts_with on non-string lower bound" ,
266
- ) ?;
267
- let min_len = lower_bound_str. len ( ) . min ( prefix_len) ;
268
- if prefix. as_bytes ( ) . lt ( & lower_bound_str. as_bytes ( ) [ ..min_len] ) {
290
+ let min_len = lower_bound. len ( ) . min ( prefix_len) ;
291
+ if prefix. as_bytes ( ) . lt ( & lower_bound[ ..min_len] ) {
269
292
return ROWS_CANNOT_MATCH ;
270
293
}
271
294
}
272
295
273
296
if let Some ( upper_bound) = & field. upper_bound {
274
- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
275
- upper_bound,
276
- "Cannot perform starts_with on non-string upper bound" ,
277
- ) ?;
278
- let min_len = upper_bound_str. len ( ) . min ( prefix_len) ;
279
- if prefix. as_bytes ( ) . gt ( & upper_bound_str. as_bytes ( ) [ ..min_len] ) {
297
+ let min_len = upper_bound. len ( ) . min ( prefix_len) ;
298
+ if prefix. as_bytes ( ) . gt ( & upper_bound[ ..min_len] ) {
280
299
return ROWS_CANNOT_MATCH ;
281
300
}
282
301
}
@@ -305,34 +324,24 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
305
324
// not_starts_with will match unless all values must start with the prefix. This happens when
306
325
// the lower and upper bounds both start with the prefix.
307
326
if let Some ( lower_bound) = & field. lower_bound {
308
- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
309
- lower_bound,
310
- "Cannot perform not_starts_with on non-string lower bound" ,
311
- ) ?;
312
-
313
327
// if lower is shorter than the prefix then lower doesn't start with the prefix
314
- if prefix_len > lower_bound_str . len ( ) {
328
+ if prefix_len > lower_bound . len ( ) {
315
329
return ROWS_MIGHT_MATCH ;
316
330
}
317
331
318
332
if prefix
319
333
. as_bytes ( )
320
- . eq ( & lower_bound_str . as_bytes ( ) [ ..prefix_len] )
334
+ . eq ( & lower_bound [ ..prefix_len] )
321
335
{
322
336
if let Some ( upper_bound) = & field. upper_bound {
323
- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
324
- upper_bound,
325
- "Cannot perform not_starts_with on non-string upper bound" ,
326
- ) ?;
327
-
328
337
// if upper is shorter than the prefix then upper can't start with the prefix
329
- if prefix_len > upper_bound_str . len ( ) {
338
+ if prefix_len > upper_bound . len ( ) {
330
339
return ROWS_MIGHT_MATCH ;
331
340
}
332
341
333
342
if prefix
334
343
. as_bytes ( )
335
- . eq ( & upper_bound_str . as_bytes ( ) [ ..prefix_len] )
344
+ . eq ( & upper_bound [ ..prefix_len] )
336
345
{
337
346
return ROWS_CANNOT_MATCH ;
338
347
}
@@ -359,13 +368,19 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
359
368
}
360
369
361
370
if let Some ( lower_bound) = & field. lower_bound {
362
- if literals. iter ( ) . all ( |datum| lower_bound > datum) {
371
+ let d = ManifestFilterVisitor :: bytes_to_datum ( lower_bound, reference. field ( ) . clone ( ) . field_type ) ;
372
+ if literals. iter ( ) . all ( |datum| {
373
+ & d < datum
374
+ } ) {
363
375
return ROWS_CANNOT_MATCH ;
364
376
}
365
377
}
366
378
367
379
if let Some ( upper_bound) = & field. upper_bound {
368
- if literals. iter ( ) . all ( |datum| upper_bound < datum) {
380
+ let d = ManifestFilterVisitor :: bytes_to_datum ( upper_bound, reference. field ( ) . clone ( ) . field_type ) ;
381
+ if literals. iter ( ) . all ( |datum| {
382
+ & d < datum
383
+ } ) {
369
384
return ROWS_CANNOT_MATCH ;
370
385
}
371
386
}
@@ -414,6 +429,11 @@ impl ManifestFilterVisitor<'_> {
414
429
} ;
415
430
Ok ( bound)
416
431
}
432
+
433
+ fn bytes_to_datum < ' a > ( bytes : & ByteBuf , t : Box < Type > ) -> Datum {
434
+ let p = t. as_primitive_type ( ) . unwrap ( ) ;
435
+ Datum :: try_from_bytes ( bytes, p. clone ( ) ) . unwrap ( )
436
+ }
417
437
}
418
438
419
439
#[ cfg( test) ]
@@ -520,8 +540,8 @@ mod test {
520
540
FieldSummary {
521
541
contains_null: false ,
522
542
contains_nan: None ,
523
- lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) ) ,
524
- upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) ) ,
543
+ lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
544
+ upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
525
545
} ,
526
546
// all_nulls_missing_nan
527
547
FieldSummary {
@@ -534,22 +554,22 @@ mod test {
534
554
FieldSummary {
535
555
contains_null: true ,
536
556
contains_nan: None ,
537
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
538
- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
557
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
558
+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
539
559
} ,
540
560
// no_nulls
541
561
FieldSummary {
542
562
contains_null: false ,
543
563
contains_nan: None ,
544
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
545
- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
564
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
565
+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
546
566
} ,
547
567
// float
548
568
FieldSummary {
549
569
contains_null: true ,
550
570
contains_nan: None ,
551
- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
552
- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
571
+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
572
+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
553
573
} ,
554
574
// all_nulls_double
555
575
FieldSummary {
@@ -583,8 +603,8 @@ mod test {
583
603
FieldSummary {
584
604
contains_null: false ,
585
605
contains_nan: Some ( false ) ,
586
- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
587
- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
606
+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
607
+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
588
608
} ,
589
609
// all_nulls_missing_nan_float
590
610
FieldSummary {
@@ -597,15 +617,15 @@ mod test {
597
617
FieldSummary {
598
618
contains_null: true ,
599
619
contains_nan: None ,
600
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
601
- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
620
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
621
+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
602
622
} ,
603
623
// no_nulls_same_value_a
604
624
FieldSummary {
605
625
contains_null: false ,
606
626
contains_nan: None ,
607
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
608
- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
627
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
628
+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
609
629
} ,
610
630
]
611
631
}
@@ -625,7 +645,7 @@ mod test {
625
645
added_rows_count : None ,
626
646
existing_rows_count : None ,
627
647
deleted_rows_count : None ,
628
- partitions,
648
+ partitions : Some ( partitions ) ,
629
649
key_metadata : vec ! [ ] ,
630
650
}
631
651
}
0 commit comments