@@ -1855,28 +1855,33 @@ impl<'a> Tokenizer<'a> {
1855
1855
) -> Result < Option < Token > , TokenizerError > {
1856
1856
let mut s = String :: new ( ) ;
1857
1857
let mut nested = 1 ;
1858
- let mut last_ch = ' ' ;
1858
+ let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
1859
1859
1860
1860
loop {
1861
1861
match chars. next ( ) {
1862
- Some ( ch) => {
1863
- if last_ch == '/' && ch == '*' {
1864
- nested += 1 ;
1865
- } else if last_ch == '*' && ch == '/' {
1866
- nested -= 1 ;
1867
- if nested == 0 {
1868
- s. pop ( ) ;
1869
- break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1870
- }
1862
+ Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
1863
+ chars. next ( ) ; // consume the '*'
1864
+ s. push ( '/' ) ;
1865
+ s. push ( '*' ) ;
1866
+ nested += 1 ;
1867
+ }
1868
+ Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
1869
+ chars. next ( ) ; // consume the '/'
1870
+ nested -= 1 ;
1871
+ if nested == 0 {
1872
+ break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1871
1873
}
1874
+ s. push ( '*' ) ;
1875
+ s. push ( '/' ) ;
1876
+ }
1877
+ Some ( ch) => {
1872
1878
s. push ( ch) ;
1873
- last_ch = ch;
1874
1879
}
1875
1880
None => {
1876
1881
break self . tokenizer_error (
1877
1882
chars. location ( ) ,
1878
1883
"Unexpected EOF while in a multi-line comment" ,
1879
- )
1884
+ ) ;
1880
1885
}
1881
1886
}
1882
1887
}
@@ -2718,18 +2723,90 @@ mod tests {
2718
2723
2719
2724
#[ test]
2720
2725
fn tokenize_nested_multiline_comment ( ) {
2721
- let sql = String :: from ( "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ) ;
2726
+ let dialect = GenericDialect { } ;
2727
+ let test_cases = vec ! [
2728
+ (
2729
+ "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ,
2730
+ vec![
2731
+ Token :: Number ( "0" . to_string( ) , false ) ,
2732
+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2733
+ "multi-line\n * \n /* comment \n /*comment*/*/ " . into( ) ,
2734
+ ) ) ,
2735
+ Token :: Whitespace ( Whitespace :: Space ) ,
2736
+ Token :: Div ,
2737
+ Token :: Word ( Word {
2738
+ value: "comment" . to_string( ) ,
2739
+ quote_style: None ,
2740
+ keyword: Keyword :: COMMENT ,
2741
+ } ) ,
2742
+ Token :: Mul ,
2743
+ Token :: Div ,
2744
+ Token :: Number ( "1" . to_string( ) , false ) ,
2745
+ ] ,
2746
+ ) ,
2747
+ (
2748
+ "0/*multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/*/1" ,
2749
+ vec![
2750
+ Token :: Number ( "0" . to_string( ) , false ) ,
2751
+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2752
+ "multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/" . into( ) ,
2753
+ ) ) ,
2754
+ Token :: Number ( "1" . to_string( ) , false ) ,
2755
+ ] ,
2756
+ ) ,
2757
+ (
2758
+ "SELECT 1/* a /* b */ c */0" ,
2759
+ vec![
2760
+ Token :: make_keyword( "SELECT" ) ,
2761
+ Token :: Whitespace ( Whitespace :: Space ) ,
2762
+ Token :: Number ( "1" . to_string( ) , false ) ,
2763
+ Token :: Whitespace ( Whitespace :: MultiLineComment ( " a /* b */ c " . to_string( ) ) ) ,
2764
+ Token :: Number ( "0" . to_string( ) , false ) ,
2765
+ ] ,
2766
+ ) ,
2767
+ ] ;
2768
+
2769
+ for ( sql, expected) in test_cases {
2770
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2771
+ compare ( expected, tokens) ;
2772
+ }
2773
+ }
2774
+
2775
+ #[ test]
2776
+ fn tokenize_nested_multiline_comment_empty ( ) {
2777
+ let sql = "select 1/*/**/*/0" ;
2722
2778
2723
2779
let dialect = GenericDialect { } ;
2724
- let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
2780
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2725
2781
let expected = vec ! [
2782
+ Token :: make_keyword( "select" ) ,
2783
+ Token :: Whitespace ( Whitespace :: Space ) ,
2784
+ Token :: Number ( "1" . to_string( ) , false ) ,
2785
+ Token :: Whitespace ( Whitespace :: MultiLineComment ( "/**/" . to_string( ) ) ) ,
2726
2786
Token :: Number ( "0" . to_string( ) , false ) ,
2787
+ ] ;
2788
+
2789
+ compare ( expected, tokens) ;
2790
+ }
2791
+
2792
+ #[ test]
2793
+ fn tokenize_nested_comments_if_not_supported ( ) {
2794
+ let dialect = SQLiteDialect { } ;
2795
+ let sql = "SELECT 1/*/* nested comment */*/0" ;
2796
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) ;
2797
+ let expected = vec ! [
2798
+ Token :: make_keyword( "SELECT" ) ,
2799
+ Token :: Whitespace ( Whitespace :: Space ) ,
2800
+ Token :: Number ( "1" . to_string( ) , false ) ,
2727
2801
Token :: Whitespace ( Whitespace :: MultiLineComment (
2728
- "multi-line \n * \n /* comment \n /* comment*/*/ */ /comment " . to_string( ) ,
2802
+ "/* nested comment " . to_string( ) ,
2729
2803
) ) ,
2730
- Token :: Number ( "1" . to_string( ) , false ) ,
2804
+ Token :: Mul ,
2805
+ Token :: Div ,
2806
+ Token :: Number ( "0" . to_string( ) , false ) ,
2731
2807
] ;
2732
- compare ( expected, tokens) ;
2808
+
2809
+ compare ( expected, tokens. unwrap ( ) ) ;
2733
2810
}
2734
2811
2735
2812
#[ test]
0 commit comments