16
16
// under the License.
17
17
18
18
use fnv:: FnvHashSet ;
19
+ use serde_bytes:: ByteBuf ;
19
20
20
21
use crate :: expr:: visitors:: bound_predicate_visitor:: { BoundPredicateVisitor , visit} ;
21
22
use crate :: expr:: { BoundPredicate , BoundReference } ;
@@ -42,13 +43,13 @@ impl ManifestEvaluator {
42
43
/// see if this `ManifestFile` could possibly contain data that matches
43
44
/// the scan's filter.
44
45
pub ( crate ) fn eval ( & self , manifest_file : & ManifestFile ) -> Result < bool > {
45
- if manifest_file. partitions . is_empty ( ) {
46
- return Ok ( true ) ;
46
+ match & manifest_file. partitions {
47
+ Some ( p) if !p. is_empty ( ) => {
48
+ let mut evaluator = ManifestFilterVisitor :: new ( p) ;
49
+ visit ( & mut evaluator, & self . partition_filter )
50
+ }
51
+ _ => Ok ( true ) ,
47
52
}
48
-
49
- let mut evaluator = ManifestFilterVisitor :: new ( & manifest_file. partitions ) ;
50
-
51
- visit ( & mut evaluator, & self . partition_filter )
52
53
}
53
54
}
54
55
@@ -154,9 +155,19 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
154
155
_predicate : & BoundPredicate ,
155
156
) -> crate :: Result < bool > {
156
157
let field = self . field_summary_for_reference ( reference) ;
158
+
157
159
match & field. lower_bound {
158
- Some ( bound) if datum <= bound => ROWS_CANNOT_MATCH ,
159
- Some ( _) => ROWS_MIGHT_MATCH ,
160
+ Some ( bound_bytes) => {
161
+ let bound = ManifestFilterVisitor :: bytes_to_datum (
162
+ bound_bytes,
163
+ reference. field ( ) . field_type . clone ( ) ,
164
+ ) ;
165
+ if datum <= & bound {
166
+ ROWS_CANNOT_MATCH
167
+ } else {
168
+ ROWS_MIGHT_MATCH
169
+ }
170
+ }
160
171
None => ROWS_CANNOT_MATCH ,
161
172
}
162
173
}
@@ -169,8 +180,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
169
180
) -> crate :: Result < bool > {
170
181
let field = self . field_summary_for_reference ( reference) ;
171
182
match & field. lower_bound {
172
- Some ( bound) if datum < bound => ROWS_CANNOT_MATCH ,
173
- Some ( _) => ROWS_MIGHT_MATCH ,
183
+ Some ( bound_bytes) => {
184
+ let bound = ManifestFilterVisitor :: bytes_to_datum (
185
+ bound_bytes,
186
+ reference. field ( ) . field_type . clone ( ) ,
187
+ ) ;
188
+ if datum < & bound {
189
+ ROWS_CANNOT_MATCH
190
+ } else {
191
+ ROWS_MIGHT_MATCH
192
+ }
193
+ }
174
194
None => ROWS_CANNOT_MATCH ,
175
195
}
176
196
}
@@ -183,8 +203,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
183
203
) -> crate :: Result < bool > {
184
204
let field = self . field_summary_for_reference ( reference) ;
185
205
match & field. upper_bound {
186
- Some ( bound) if datum >= bound => ROWS_CANNOT_MATCH ,
187
- Some ( _) => ROWS_MIGHT_MATCH ,
206
+ Some ( bound_bytes) => {
207
+ let bound = ManifestFilterVisitor :: bytes_to_datum (
208
+ bound_bytes,
209
+ reference. field ( ) . field_type . clone ( ) ,
210
+ ) ;
211
+ if datum >= & bound {
212
+ ROWS_CANNOT_MATCH
213
+ } else {
214
+ ROWS_MIGHT_MATCH
215
+ }
216
+ }
188
217
None => ROWS_CANNOT_MATCH ,
189
218
}
190
219
}
@@ -197,8 +226,17 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
197
226
) -> crate :: Result < bool > {
198
227
let field = self . field_summary_for_reference ( reference) ;
199
228
match & field. upper_bound {
200
- Some ( bound) if datum > bound => ROWS_CANNOT_MATCH ,
201
- Some ( _) => ROWS_MIGHT_MATCH ,
229
+ Some ( bound_bytes) => {
230
+ let bound = ManifestFilterVisitor :: bytes_to_datum (
231
+ bound_bytes,
232
+ reference. field ( ) . field_type . clone ( ) ,
233
+ ) ;
234
+ if datum > & bound {
235
+ ROWS_CANNOT_MATCH
236
+ } else {
237
+ ROWS_MIGHT_MATCH
238
+ }
239
+ }
202
240
None => ROWS_CANNOT_MATCH ,
203
241
}
204
242
}
@@ -215,14 +253,22 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
215
253
return ROWS_CANNOT_MATCH ;
216
254
}
217
255
218
- if let Some ( lower_bound) = & field. lower_bound {
219
- if lower_bound > datum {
256
+ if let Some ( lower_bound_bytes) = & field. lower_bound {
257
+ let lower_bound = ManifestFilterVisitor :: bytes_to_datum (
258
+ lower_bound_bytes,
259
+ reference. field ( ) . field_type . clone ( ) ,
260
+ ) ;
261
+ if datum > & lower_bound {
220
262
return ROWS_CANNOT_MATCH ;
221
263
}
222
264
}
223
265
224
- if let Some ( upper_bound) = & field. upper_bound {
225
- if upper_bound < datum {
266
+ if let Some ( upper_bound_bytes) = & field. upper_bound {
267
+ let upper_bound = ManifestFilterVisitor :: bytes_to_datum (
268
+ upper_bound_bytes,
269
+ reference. field ( ) . field_type . clone ( ) ,
270
+ ) ;
271
+ if datum < & upper_bound {
226
272
return ROWS_CANNOT_MATCH ;
227
273
}
228
274
}
@@ -260,23 +306,15 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
260
306
let prefix_len = prefix. len ( ) ;
261
307
262
308
if let Some ( lower_bound) = & field. lower_bound {
263
- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
264
- lower_bound,
265
- "Cannot perform starts_with on non-string lower bound" ,
266
- ) ?;
267
- let min_len = lower_bound_str. len ( ) . min ( prefix_len) ;
268
- if prefix. as_bytes ( ) . lt ( & lower_bound_str. as_bytes ( ) [ ..min_len] ) {
309
+ let min_len = lower_bound. len ( ) . min ( prefix_len) ;
310
+ if prefix. as_bytes ( ) . lt ( & lower_bound[ ..min_len] ) {
269
311
return ROWS_CANNOT_MATCH ;
270
312
}
271
313
}
272
314
273
315
if let Some ( upper_bound) = & field. upper_bound {
274
- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
275
- upper_bound,
276
- "Cannot perform starts_with on non-string upper bound" ,
277
- ) ?;
278
- let min_len = upper_bound_str. len ( ) . min ( prefix_len) ;
279
- if prefix. as_bytes ( ) . gt ( & upper_bound_str. as_bytes ( ) [ ..min_len] ) {
316
+ let min_len = upper_bound. len ( ) . min ( prefix_len) ;
317
+ if prefix. as_bytes ( ) . gt ( & upper_bound[ ..min_len] ) {
280
318
return ROWS_CANNOT_MATCH ;
281
319
}
282
320
}
@@ -305,35 +343,19 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
305
343
// not_starts_with will match unless all values must start with the prefix. This happens when
306
344
// the lower and upper bounds both start with the prefix.
307
345
if let Some ( lower_bound) = & field. lower_bound {
308
- let lower_bound_str = ManifestFilterVisitor :: datum_as_str (
309
- lower_bound,
310
- "Cannot perform not_starts_with on non-string lower bound" ,
311
- ) ?;
312
-
313
346
// if lower is shorter than the prefix then lower doesn't start with the prefix
314
- if prefix_len > lower_bound_str . len ( ) {
347
+ if prefix_len > lower_bound . len ( ) {
315
348
return ROWS_MIGHT_MATCH ;
316
349
}
317
350
318
- if prefix
319
- . as_bytes ( )
320
- . eq ( & lower_bound_str. as_bytes ( ) [ ..prefix_len] )
321
- {
351
+ if prefix. as_bytes ( ) . eq ( & lower_bound[ ..prefix_len] ) {
322
352
if let Some ( upper_bound) = & field. upper_bound {
323
- let upper_bound_str = ManifestFilterVisitor :: datum_as_str (
324
- upper_bound,
325
- "Cannot perform not_starts_with on non-string upper bound" ,
326
- ) ?;
327
-
328
353
// if upper is shorter than the prefix then upper can't start with the prefix
329
- if prefix_len > upper_bound_str . len ( ) {
354
+ if prefix_len > upper_bound . len ( ) {
330
355
return ROWS_MIGHT_MATCH ;
331
356
}
332
357
333
- if prefix
334
- . as_bytes ( )
335
- . eq ( & upper_bound_str. as_bytes ( ) [ ..prefix_len] )
336
- {
358
+ if prefix. as_bytes ( ) . eq ( & upper_bound[ ..prefix_len] ) {
337
359
return ROWS_CANNOT_MATCH ;
338
360
}
339
361
}
@@ -359,13 +381,21 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> {
359
381
}
360
382
361
383
if let Some ( lower_bound) = & field. lower_bound {
362
- if literals. iter ( ) . all ( |datum| lower_bound > datum) {
384
+ let d = ManifestFilterVisitor :: bytes_to_datum (
385
+ lower_bound,
386
+ reference. field ( ) . clone ( ) . field_type ,
387
+ ) ;
388
+ if literals. iter ( ) . all ( |datum| & d < datum) {
363
389
return ROWS_CANNOT_MATCH ;
364
390
}
365
391
}
366
392
367
393
if let Some ( upper_bound) = & field. upper_bound {
368
- if literals. iter ( ) . all ( |datum| upper_bound < datum) {
394
+ let d = ManifestFilterVisitor :: bytes_to_datum (
395
+ upper_bound,
396
+ reference. field ( ) . clone ( ) . field_type ,
397
+ ) ;
398
+ if literals. iter ( ) . all ( |datum| & d < datum) {
369
399
return ROWS_CANNOT_MATCH ;
370
400
}
371
401
}
@@ -414,6 +444,11 @@ impl ManifestFilterVisitor<'_> {
414
444
} ;
415
445
Ok ( bound)
416
446
}
447
+
448
+ fn bytes_to_datum < ' a > ( bytes : & ByteBuf , t : Box < Type > ) -> Datum {
449
+ let p = t. as_primitive_type ( ) . unwrap ( ) ;
450
+ Datum :: try_from_bytes ( bytes, p. clone ( ) ) . unwrap ( )
451
+ }
417
452
}
418
453
419
454
#[ cfg( test) ]
@@ -520,8 +555,8 @@ mod test {
520
555
FieldSummary {
521
556
contains_null: false ,
522
557
contains_nan: None ,
523
- lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) ) ,
524
- upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) ) ,
558
+ lower_bound: Some ( Datum :: int( INT_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
559
+ upper_bound: Some ( Datum :: int( INT_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
525
560
} ,
526
561
// all_nulls_missing_nan
527
562
FieldSummary {
@@ -534,22 +569,22 @@ mod test {
534
569
FieldSummary {
535
570
contains_null: true ,
536
571
contains_nan: None ,
537
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
538
- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
572
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
573
+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
539
574
} ,
540
575
// no_nulls
541
576
FieldSummary {
542
577
contains_null: false ,
543
578
contains_nan: None ,
544
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
545
- upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) ) ,
579
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
580
+ upper_bound: Some ( Datum :: string( STRING_MAX_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
546
581
} ,
547
582
// float
548
583
FieldSummary {
549
584
contains_null: true ,
550
585
contains_nan: None ,
551
- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
552
- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
586
+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
587
+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
553
588
} ,
554
589
// all_nulls_double
555
590
FieldSummary {
@@ -583,8 +618,8 @@ mod test {
583
618
FieldSummary {
584
619
contains_null: false ,
585
620
contains_nan: Some ( false ) ,
586
- lower_bound: Some ( Datum :: float( 0.0 ) ) ,
587
- upper_bound: Some ( Datum :: float( 20.0 ) ) ,
621
+ lower_bound: Some ( Datum :: float( 0.0 ) . to_bytes ( ) . unwrap ( ) ) ,
622
+ upper_bound: Some ( Datum :: float( 20.0 ) . to_bytes ( ) . unwrap ( ) ) ,
588
623
} ,
589
624
// all_nulls_missing_nan_float
590
625
FieldSummary {
@@ -597,15 +632,15 @@ mod test {
597
632
FieldSummary {
598
633
contains_null: true ,
599
634
contains_nan: None ,
600
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
601
- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
635
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
636
+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
602
637
} ,
603
638
// no_nulls_same_value_a
604
639
FieldSummary {
605
640
contains_null: false ,
606
641
contains_nan: None ,
607
- lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
608
- upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) ) ,
642
+ lower_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
643
+ upper_bound: Some ( Datum :: string( STRING_MIN_VALUE ) . to_bytes ( ) . unwrap ( ) ) ,
609
644
} ,
610
645
]
611
646
}
@@ -625,7 +660,7 @@ mod test {
625
660
added_rows_count : None ,
626
661
existing_rows_count : None ,
627
662
deleted_rows_count : None ,
628
- partitions,
663
+ partitions : Some ( partitions ) ,
629
664
key_metadata : vec ! [ ] ,
630
665
}
631
666
}
0 commit comments