Skip to content

Commit 68e372f

Browse files
eliaperantonialamb
andauthored
Add related source code locations to errors (#13664)
* Reset branch to no changes other than SqlRel * feat: improve `Diagnostic` ergonomics * feat: 'table not found' diagnostic in `SqlToRel` * feat: unresolved fields * feat: union with different number of columns * feat: `Spans` * feat: adjust field not found error * feat: incompatible types in binary expr * fix: tests not compiling * feat: ambiguous column * feat: possible references * feat: group by * chore: fmt * feat: relay `Spans` in `create_col_from_scalar_expr` * chore: cargo check * chore: cargo fmt * feat: diagnostic can only be either error or warning * feat: simplify to one `Diagnostic` per error * test: add tests * chore: add license headers * chore: update CLI lockfile * chore: taplo format * fix: doctest * chore: configs.md * fix: test * fix: clippy, by removing smallvec * fix: update slt * feat: support all binary expressions * refactor: move diagnostic tests to datafusion-sql * chore: format * fix: tests * feat: pr feedback * fix: ci checks --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent e9c35a6 commit 68e372f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1187
-229
lines changed

datafusion-cli/Cargo.lock

Lines changed: 29 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/src/column.rs

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,33 @@
1717

1818
//! Column
1919
20-
use arrow_schema::{Field, FieldRef};
21-
2220
use crate::error::_schema_err;
2321
use crate::utils::{parse_identifiers_normalized, quote_identifier};
24-
use crate::{DFSchema, Result, SchemaError, TableReference};
22+
use crate::{DFSchema, Diagnostic, Result, SchemaError, Spans, TableReference};
23+
use arrow_schema::{Field, FieldRef};
2524
use std::collections::HashSet;
2625
use std::convert::Infallible;
2726
use std::fmt;
2827
use std::str::FromStr;
2928

3029
/// A named reference to a qualified field in a schema.
31-
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
30+
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
3231
pub struct Column {
3332
/// relation/table reference.
3433
pub relation: Option<TableReference>,
3534
/// field/column name.
3635
pub name: String,
36+
/// Original source code location, if known
37+
pub spans: Spans,
38+
}
39+
40+
impl fmt::Debug for Column {
41+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42+
f.debug_struct("Column")
43+
.field("relation", &self.relation)
44+
.field("name", &self.name)
45+
.finish()
46+
}
3747
}
3848

3949
impl Column {
@@ -50,6 +60,7 @@ impl Column {
5060
Self {
5161
relation: relation.map(|r| r.into()),
5262
name: name.into(),
63+
spans: Spans::new(),
5364
}
5465
}
5566

@@ -58,6 +69,7 @@ impl Column {
5869
Self {
5970
relation: None,
6071
name: name.into(),
72+
spans: Spans::new(),
6173
}
6274
}
6375

@@ -68,6 +80,7 @@ impl Column {
6880
Self {
6981
relation: None,
7082
name: name.into(),
83+
spans: Spans::new(),
7184
}
7285
}
7386

@@ -99,7 +112,11 @@ impl Column {
99112
// identifiers will be treated as an unqualified column name
100113
_ => return None,
101114
};
102-
Some(Self { relation, name })
115+
Some(Self {
116+
relation,
117+
name,
118+
spans: Spans::new(),
119+
})
103120
}
104121

105122
/// Deserialize a fully qualified name string into a column
@@ -113,6 +130,7 @@ impl Column {
113130
Self {
114131
relation: None,
115132
name: flat_name,
133+
spans: Spans::new(),
116134
},
117135
)
118136
}
@@ -124,6 +142,7 @@ impl Column {
124142
Self {
125143
relation: None,
126144
name: flat_name,
145+
spans: Spans::new(),
127146
},
128147
)
129148
}
@@ -239,7 +258,30 @@ impl Column {
239258

240259
// If not due to USING columns then due to ambiguous column name
241260
return _schema_err!(SchemaError::AmbiguousReference {
242-
field: Column::new_unqualified(self.name),
261+
field: Column::new_unqualified(&self.name),
262+
})
263+
.map_err(|err| {
264+
let mut diagnostic = Diagnostic::new_error(
265+
format!("column '{}' is ambiguous", &self.name),
266+
self.spans().first(),
267+
);
268+
// TODO If [`DFSchema`] had spans, we could show the
269+
// user which columns are candidates, or which table
270+
// they come from. For now, let's list the table names
271+
// only.
272+
for qualified_field in qualified_fields {
273+
let (Some(table), _) = qualified_field else {
274+
continue;
275+
};
276+
diagnostic.add_note(
277+
format!(
278+
"possible reference to '{}' in table '{}'",
279+
&self.name, table
280+
),
281+
None,
282+
);
283+
}
284+
err.with_diagnostic(diagnostic)
243285
});
244286
}
245287
}
@@ -254,6 +296,33 @@ impl Column {
254296
.collect(),
255297
})
256298
}
299+
300+
/// Returns a reference to the set of locations in the SQL query where this
301+
/// column appears, if known.
302+
pub fn spans(&self) -> &Spans {
303+
&self.spans
304+
}
305+
306+
/// Returns a mutable reference to the set of locations in the SQL query
307+
/// where this column appears, if known.
308+
pub fn spans_mut(&mut self) -> &mut Spans {
309+
&mut self.spans
310+
}
311+
312+
/// Replaces the set of locations in the SQL query where this column
313+
/// appears, if known.
314+
pub fn with_spans(mut self, spans: Spans) -> Self {
315+
self.spans = spans;
316+
self
317+
}
318+
319+
/// Qualifies the column with the given table reference.
320+
pub fn with_relation(&self, relation: TableReference) -> Self {
321+
Self {
322+
relation: Some(relation),
323+
..self.clone()
324+
}
325+
}
257326
}
258327

259328
impl From<&str> for Column {

datafusion/common/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ config_namespace! {
250250
/// specified. The Arrow type system does not have a notion of maximum
251251
/// string length and thus DataFusion can not enforce such limits.
252252
pub support_varchar_with_length: bool, default = true
253+
254+
/// When set to true, the source locations relative to the original SQL
255+
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
256+
/// and recorded in the logical plan nodes.
257+
pub collect_spans: bool, default = false
253258
}
254259
}
255260

datafusion/common/src/dfschema.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -502,10 +502,7 @@ impl DFSchema {
502502
Ok((fields_without_qualifier[0].0, fields_without_qualifier[0].1))
503503
} else {
504504
_schema_err!(SchemaError::AmbiguousReference {
505-
field: Column {
506-
relation: None,
507-
name: name.to_string(),
508-
},
505+
field: Column::new_unqualified(name.to_string(),),
509506
})
510507
}
511508
}

0 commit comments

Comments
 (0)