@@ -79,7 +79,12 @@ impl std::fmt::Display for ObjectStoreUrl {
79
79
}
80
80
}
81
81
82
- /// Provides a mechanism for lazy, on-demand creation of [`ObjectStore`]
82
+ /// Provides a mechanism for lazy, on-demand creation of an [`ObjectStore`]
83
+ ///
84
+ /// For example, to support reading arbitrary buckets from AWS S3
85
+ /// without instantiating an [`ObjectStore`] for each possible bucket
86
+ /// up front, an [`ObjectStoreProvider`] can be used to create the
87
+ /// appropriate [`ObjectStore`] instance on demand.
83
88
///
84
89
/// See [`ObjectStoreRegistry::new_with_provider`]
85
90
pub trait ObjectStoreProvider : Send + Sync + ' static {
@@ -89,21 +94,29 @@ pub trait ObjectStoreProvider: Send + Sync + 'static {
89
94
fn get_by_url ( & self , url : & Url ) -> Result < Arc < dyn ObjectStore > > ;
90
95
}
91
96
92
- /// [`ObjectStoreRegistry`] stores [`ObjectStore`] keyed by url scheme and authority, that is
93
- /// the part of a URL preceding the path
97
+ /// [`ObjectStoreRegistry`] maps a URL to an [`ObjectStore`] instance,
98
+ /// and allows DataFusion to read from different [`ObjectStore`]
99
+ /// instances. For example DataFusion might be configured so that
100
+ ///
101
+ /// 1. `s3://my_bucket/lineitem/` mapped to the `/lineitem` path on an
102
+ /// AWS S3 object store bound to `my_bucket`
94
103
///
95
- /// This is used by DataFusion to find an appropriate [`ObjectStore`] for a [`ListingTableUrl`]
96
- /// provided in a query such as
104
+ /// 2. `s3://my_other_bucket/lineitem/` mapped to the (same)
105
+ /// `/lineitem` path on a *different* AWS S3 object store bound to
106
+ /// `my_other_bucket`
107
+ ///
108
+ /// When given a [`ListingTableUrl`], DataFusion tries to find an
109
+ /// appropriate [`ObjectStore`]. For example
97
110
///
98
111
/// ```sql
99
112
/// create external table unicorns stored as parquet location 's3://my_bucket/lineitem/';
100
113
/// ```
101
114
///
102
- /// In this particular case the url `s3://my_bucket/lineitem/` will be provided to
115
+ /// In this particular case, the url `s3://my_bucket/lineitem/` will be provided to
103
116
/// [`ObjectStoreRegistry::get_by_url`] and one of three things will happen:
104
117
///
105
118
/// - If an [`ObjectStore`] has been registered with [`ObjectStoreRegistry::register_store`] with
106
- /// scheme `s3` and host `my_bucket`, this [`ObjectStore`] will be returned
119
+ /// scheme `s3` and host `my_bucket`, that [`ObjectStore`] will be returned
107
120
///
108
121
/// - If an [`ObjectStoreProvider`] has been associated with this [`ObjectStoreRegistry`] using
109
122
/// [`ObjectStoreRegistry::new_with_provider`], [`ObjectStoreProvider::get_by_url`] will be invoked,
@@ -115,9 +128,10 @@ pub trait ObjectStoreProvider: Send + Sync + 'static {
115
128
///
116
129
/// This allows for two different use-cases:
117
130
///
118
- /// * DBMS systems where object store buckets are explicitly created using DDL, can register these
131
+ /// 1. Systems where object store buckets are explicitly created using DDL, can register these
119
132
/// buckets using [`ObjectStoreRegistry::register_store`]
120
- /// * DMBS systems relying on ad-hoc discovery, without corresponding DDL, can create [`ObjectStore`]
133
+ ///
134
+ /// 2. Systems relying on ad-hoc discovery, without corresponding DDL, can create [`ObjectStore`]
121
135
/// lazily, on-demand using [`ObjectStoreProvider`]
122
136
///
123
137
/// [`ListingTableUrl`]: crate::datasource::listing::ListingTableUrl
0 commit comments