Skip to content

Commit 8c0c0ad

Browse files
authored
Cleanup InformationSchema plumbing (#4740)
1 parent 9046a8a commit 8c0c0ad

File tree

3 files changed

+31
-152
lines changed

3 files changed

+31
-152
lines changed

datafusion/core/src/catalog/information_schema.rs

Lines changed: 17 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,14 @@
1919
//!
2020
//! Information Schema]<https://en.wikipedia.org/wiki/Information_schema>
2121
22-
use std::{
23-
any::Any,
24-
sync::{Arc, Weak},
25-
};
22+
use std::{any::Any, sync::Arc};
2623

2724
use arrow::{
2825
array::{StringBuilder, UInt64Builder},
2926
datatypes::{DataType, Field, Schema, SchemaRef},
3027
record_batch::RecordBatch,
3128
};
3229

33-
use datafusion_common::Result;
34-
3530
use crate::config::ConfigOptions;
3631
use crate::datasource::streaming::{PartitionStream, StreamingTable};
3732
use crate::datasource::TableProvider;
@@ -40,87 +35,33 @@ use crate::logical_expr::TableType;
4035
use crate::physical_plan::stream::RecordBatchStreamAdapter;
4136
use crate::physical_plan::SendableRecordBatchStream;
4237

43-
use super::{
44-
catalog::{CatalogList, CatalogProvider},
45-
schema::SchemaProvider,
46-
};
47-
48-
const INFORMATION_SCHEMA: &str = "information_schema";
49-
const TABLES: &str = "tables";
50-
const VIEWS: &str = "views";
51-
const COLUMNS: &str = "columns";
52-
const DF_SETTINGS: &str = "df_settings";
53-
54-
/// Wraps another [`CatalogProvider`] and adds a "information_schema"
55-
/// schema that can introspect on tables in the catalog_list
56-
pub(crate) struct CatalogWithInformationSchema {
57-
catalog_list: Weak<dyn CatalogList>,
58-
/// wrapped provider
59-
inner: Arc<dyn CatalogProvider>,
60-
}
38+
use super::{catalog::CatalogList, schema::SchemaProvider};
6139

62-
impl CatalogWithInformationSchema {
63-
pub(crate) fn new(
64-
catalog_list: Weak<dyn CatalogList>,
65-
inner: Arc<dyn CatalogProvider>,
66-
) -> Self {
67-
Self {
68-
catalog_list,
69-
inner,
70-
}
71-
}
72-
73-
/// Return a reference to the wrapped provider
74-
pub(crate) fn inner(&self) -> Arc<dyn CatalogProvider> {
75-
self.inner.clone()
76-
}
77-
}
78-
79-
impl CatalogProvider for CatalogWithInformationSchema {
80-
fn as_any(&self) -> &dyn Any {
81-
self
82-
}
83-
84-
fn schema_names(&self) -> Vec<String> {
85-
self.inner
86-
.schema_names()
87-
.into_iter()
88-
.chain(std::iter::once(INFORMATION_SCHEMA.to_string()))
89-
.collect::<Vec<String>>()
90-
}
91-
92-
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
93-
if name.eq_ignore_ascii_case(INFORMATION_SCHEMA) {
94-
Weak::upgrade(&self.catalog_list).map(|catalog_list| {
95-
Arc::new(InformationSchemaProvider {
96-
config: InformationSchemaConfig { catalog_list },
97-
}) as Arc<dyn SchemaProvider>
98-
})
99-
} else {
100-
self.inner.schema(name)
101-
}
102-
}
103-
104-
fn register_schema(
105-
&self,
106-
name: &str,
107-
schema: Arc<dyn SchemaProvider>,
108-
) -> Result<Option<Arc<dyn SchemaProvider>>> {
109-
let catalog = &self.inner;
110-
catalog.register_schema(name, schema)
111-
}
112-
}
40+
pub const INFORMATION_SCHEMA: &str = "information_schema";
41+
pub const TABLES: &str = "tables";
42+
pub const VIEWS: &str = "views";
43+
pub const COLUMNS: &str = "columns";
44+
pub const DF_SETTINGS: &str = "df_settings";
11345

11446
/// Implements the `information_schema` virtual schema and tables
11547
///
11648
/// The underlying tables in the `information_schema` are created on
11749
/// demand. This means that if more tables are added to the underlying
11850
/// providers, they will appear the next time the `information_schema`
11951
/// table is queried.
120-
struct InformationSchemaProvider {
52+
pub struct InformationSchemaProvider {
12153
config: InformationSchemaConfig,
12254
}
12355

56+
impl InformationSchemaProvider {
57+
/// Creates a new [`InformationSchemaProvider`] for the provided `catalog_list`
58+
pub fn new(catalog_list: Arc<dyn CatalogList>) -> Self {
59+
Self {
60+
config: InformationSchemaConfig { catalog_list },
61+
}
62+
}
63+
}
64+
12465
#[derive(Clone)]
12566
struct InformationSchemaConfig {
12667
catalog_list: Arc<dyn CatalogList>,

datafusion/core/src/catalog/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
2121
#![allow(clippy::module_inception)]
2222
pub mod catalog;
23-
pub mod information_schema;
23+
pub(crate) mod information_schema;
2424
pub mod listing_schema;
2525
pub mod schema;
2626

datafusion/core/src/execution/context.rs

Lines changed: 13 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@
1717

1818
//! SessionContext contains methods for registering data sources and executing queries
1919
use crate::{
20-
catalog::{
21-
catalog::{CatalogList, MemoryCatalogList},
22-
information_schema::CatalogWithInformationSchema,
23-
},
20+
catalog::catalog::{CatalogList, MemoryCatalogList},
2421
config::{
2522
OPT_COLLECT_STATISTICS, OPT_CREATE_DEFAULT_CATALOG_AND_SCHEMA,
2623
OPT_INFORMATION_SCHEMA, OPT_PARQUET_ENABLE_PRUNING, OPT_REPARTITION_AGGREGATIONS,
@@ -97,6 +94,7 @@ use datafusion_sql::{
9794
use parquet::file::properties::WriterProperties;
9895
use url::Url;
9996

97+
use crate::catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA};
10098
use crate::catalog::listing_schema::ListingSchemaProvider;
10199
use crate::datasource::object_store::ObjectStoreUrl;
102100
use crate::execution::memory_pool::MemoryPool;
@@ -422,10 +420,6 @@ impl SessionContext {
422420
))
423421
}
424422
}
425-
// Since information_schema config may have changed, revalidate
426-
if variable == OPT_INFORMATION_SCHEMA {
427-
state.update_information_schema();
428-
}
429423
drop(state);
430424

431425
self.return_empty_dataframe()
@@ -877,18 +871,10 @@ impl SessionContext {
877871
catalog: Arc<dyn CatalogProvider>,
878872
) -> Option<Arc<dyn CatalogProvider>> {
879873
let name = name.into();
880-
let information_schema = self.copied_config().information_schema();
881-
let state = self.state.read();
882-
let catalog = if information_schema {
883-
Arc::new(CatalogWithInformationSchema::new(
884-
Arc::downgrade(&state.catalog_list),
885-
catalog,
886-
))
887-
} else {
888-
catalog
889-
};
890-
891-
state.catalog_list.register_catalog(name, catalog)
874+
self.state
875+
.read()
876+
.catalog_list
877+
.register_catalog(name, catalog)
892878
}
893879

894880
/// Retrieves the list of available catalog names.
@@ -1587,7 +1573,7 @@ impl SessionState {
15871573
// rule below performs this analysis and removes unnecessary `SortExec`s.
15881574
physical_optimizers.push(Arc::new(OptimizeSorts::new()));
15891575

1590-
let mut this = SessionState {
1576+
SessionState {
15911577
session_id,
15921578
optimizer: Optimizer::new(),
15931579
physical_optimizers,
@@ -1598,60 +1584,6 @@ impl SessionState {
15981584
config,
15991585
execution_props: ExecutionProps::new(),
16001586
runtime_env: runtime,
1601-
};
1602-
this.update_information_schema();
1603-
this
1604-
}
1605-
1606-
/// Enables/Disables information_schema support based on the value of
1607-
/// config.information_schema()
1608-
///
1609-
/// When enabled, all catalog providers are wrapped with
1610-
/// [`CatalogWithInformationSchema`] if needed
1611-
///
1612-
/// When disabled, any [`CatalogWithInformationSchema`] is unwrapped
1613-
fn update_information_schema(&mut self) {
1614-
let enabled = self.config.information_schema();
1615-
let catalog_list = &self.catalog_list;
1616-
1617-
let new_catalogs: Vec<_> = self
1618-
.catalog_list
1619-
.catalog_names()
1620-
.into_iter()
1621-
.map(|catalog_name| {
1622-
// unwrap because the list of names came from catalog
1623-
// list so it should still be there
1624-
let catalog = catalog_list.catalog(&catalog_name).unwrap();
1625-
1626-
let unwrapped = catalog
1627-
.as_any()
1628-
.downcast_ref::<CatalogWithInformationSchema>()
1629-
.map(|wrapped| wrapped.inner());
1630-
1631-
let new_catalog = match (enabled, unwrapped) {
1632-
// already wrapped, no thing needed
1633-
(true, Some(_)) => catalog,
1634-
(true, None) => {
1635-
// wrap the catalog in information schema
1636-
Arc::new(CatalogWithInformationSchema::new(
1637-
Arc::downgrade(catalog_list),
1638-
catalog,
1639-
))
1640-
}
1641-
// disabling, currently wrapped
1642-
(false, Some(unwrapped)) => unwrapped,
1643-
// disabling, currently unwrapped
1644-
(false, None) => catalog,
1645-
};
1646-
1647-
(catalog_name, new_catalog)
1648-
})
1649-
// collect to avoid concurrent modification
1650-
.collect();
1651-
1652-
// replace all catalogs
1653-
for (catalog_name, new_catalog) in new_catalogs {
1654-
catalog_list.register_catalog(catalog_name, new_catalog);
16551587
}
16561588
}
16571589

@@ -1721,6 +1653,12 @@ impl SessionState {
17211653
table_ref: impl Into<TableReference<'a>>,
17221654
) -> Result<Arc<dyn SchemaProvider>> {
17231655
let resolved_ref = self.resolve_table_ref(table_ref);
1656+
if self.config.information_schema() && resolved_ref.schema == INFORMATION_SCHEMA {
1657+
return Ok(Arc::new(InformationSchemaProvider::new(
1658+
self.catalog_list.clone(),
1659+
)));
1660+
}
1661+
17241662
self.catalog_list
17251663
.catalog(resolved_ref.catalog)
17261664
.ok_or_else(|| {

0 commit comments

Comments
 (0)