Skip to content

Commit 832fcb2

Browse files
committed
databricks: RLIKE, raw strings, single quote escapes
1 parent 4e5efd1 commit 832fcb2

File tree

5 files changed

+71
-4
lines changed

5 files changed

+71
-4
lines changed

src/ast/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,13 @@ pub enum Expr {
555555
pattern: Box<Expr>,
556556
escape_char: Option<char>,
557557
},
558+
/// RLIKE
559+
RLike {
560+
negated: bool,
561+
expr: Box<Expr>,
562+
pattern: Box<Expr>,
563+
escape_char: Option<char>,
564+
},
558565
/// REGEXP
559566
/// Snowflake: <subject> REGEXP <pattern>
560567
/// https://docs.snowflake.com/en/sql-reference/functions/regexp
@@ -878,6 +885,28 @@ impl fmt::Display for Expr {
878885
pattern
879886
),
880887
},
888+
Expr::RLike {
889+
negated,
890+
expr,
891+
pattern,
892+
escape_char,
893+
} => match escape_char {
894+
Some(ch) => write!(
895+
f,
896+
"{} {}RLIKE {} ESCAPE '{}'",
897+
expr,
898+
if *negated { "NOT " } else { "" },
899+
pattern,
900+
ch
901+
),
902+
_ => write!(
903+
f,
904+
"{} {}RLIKE {}",
905+
expr,
906+
if *negated { "NOT " } else { "" },
907+
pattern
908+
),
909+
},
881910
Expr::ILike {
882911
negated,
883912
expr,

src/keywords.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ define_keywords!(
540540
RETURNS,
541541
REVOKE,
542542
RIGHT,
543+
RLIKE,
543544
ROLE,
544545
ROLLBACK,
545546
ROLLUP,

src/parser/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,7 @@ impl<'a> Parser<'a> {
20562056
| Keyword::BETWEEN
20572057
| Keyword::LIKE
20582058
| Keyword::ILIKE
2059+
| Keyword::RLIKE
20592060
| Keyword::SIMILAR
20602061
| Keyword::REGEXP => {
20612062
self.prev_token();
@@ -2084,6 +2085,13 @@ impl<'a> Parser<'a> {
20842085
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
20852086
escape_char: self.parse_escape_char()?,
20862087
})
2088+
} else if self.parse_keyword(Keyword::RLIKE) {
2089+
Ok(Expr::RLike {
2090+
negated,
2091+
expr: Box::new(expr),
2092+
pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?),
2093+
escape_char: self.parse_escape_char()?,
2094+
})
20872095
} else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) {
20882096
Ok(Expr::SimilarTo {
20892097
negated,
@@ -2377,6 +2385,7 @@ impl<'a> Parser<'a> {
23772385
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
23782386
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
23792387
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
2388+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
23802389
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
23812390
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
23822391
_ => Ok(0),
@@ -2386,6 +2395,7 @@ impl<'a> Parser<'a> {
23862395
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC),
23872396
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC),
23882397
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC),
2398+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC),
23892399
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
23902400
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
23912401
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC),

src/tokenizer.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ use sqlparser_derive::{Visit, VisitMut};
3838

3939
use crate::ast::DollarQuotedString;
4040
use crate::dialect::{
41-
BigQueryDialect, DuckDbDialect, GenericDialect, HiveDialect, RedshiftSqlDialect,
42-
SnowflakeDialect,
41+
BigQueryDialect, DatabricksDialect, DuckDbDialect, GenericDialect, HiveDialect,
42+
RedshiftSqlDialect, SnowflakeDialect,
4343
};
4444
use crate::dialect::{Dialect, MySqlDialect};
4545
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
@@ -731,7 +731,8 @@ impl<'a> Tokenizer<'a> {
731731
}
732732
}
733733
// BigQuery uses r or R for raw string literal
734-
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
734+
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect | DatabricksDialect) =>
735+
{
735736
chars.next(); // consume
736737
match chars.peek() {
737738
Some('\'') => {
@@ -1366,7 +1367,8 @@ impl<'a> Tokenizer<'a> {
13661367
// consume
13671368
chars.next();
13681369
// slash escaping is specific to MySQL / BigQuery dialect.
1369-
if dialect_of!(self is MySqlDialect | BigQueryDialect | RedshiftSqlDialect) {
1370+
if dialect_of!(self is MySqlDialect | BigQueryDialect | RedshiftSqlDialect | DatabricksDialect)
1371+
{
13701372
if let Some(next) = chars.peek() {
13711373
if !self.unescape {
13721374
// In no-escape mode, the given query has to be saved completely including backslashes.

tests/sqlparser_databricks.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use pretty_assertions::assert_eq;
1919
use sqlparser::ast::*;
2020
use sqlparser::dialect::{DatabricksDialect, GenericDialect};
21+
use sqlparser::parser::ParserOptions;
2122
use test_utils::*;
2223

2324
#[macro_use]
@@ -37,6 +38,13 @@ fn databricks_and_generic() -> TestedDialects {
3738
}
3839
}
3940

41+
fn databricks_unescaped() -> TestedDialects {
42+
TestedDialects {
43+
dialects: vec![Box::new(DatabricksDialect {})],
44+
options: Some(ParserOptions::new().with_unescape(false)),
45+
}
46+
}
47+
4048
#[test]
4149
fn test_databricks_create_table() {
4250
let sql = "CREATE TABLE main.dbt_lukasz.customers (customer_id BIGINT, customer_lifetime_value DOUBLE) USING delta TBLPROPERTIES ('delta.minReaderVersion' = '3', 'delta.minWriterVersion' = '7')";
@@ -53,3 +61,20 @@ fn test_identifiers() {
5361
let sql = "SELECT * FROM `main`.`dbt_lukasz`.`raw_orders`";
5462
databricks().verified_stmt(sql);
5563
}
64+
65+
#[test]
66+
fn test_string_escape() {
67+
databricks().one_statement_parses_to(r#"SELECT 'O\'Connell'"#, r#"SELECT 'O''Connell'"#);
68+
}
69+
70+
#[test]
71+
fn test_string_raw_literal() {
72+
let sql = r#"SELECT R'Some\nText'"#;
73+
databricks_unescaped().verified_stmt(sql);
74+
}
75+
76+
#[test]
77+
fn test_rlike() {
78+
let sql = r#"SELECT R'%SystemDrive%\Users\John' RLIKE R'%SystemDrive%\\Users.*'"#;
79+
databricks_unescaped().verified_stmt(sql);
80+
}

0 commit comments

Comments
 (0)