@@ -706,7 +706,9 @@ impl<'a> Tokenizer<'a> {
706706 // BigQuery uses b or B for byte string literal
707707 b @ 'B' | b @ 'b' if dialect_of ! ( self is BigQueryDialect | GenericDialect ) => {
708708 chars. next ( ) ; // consume
709- match chars. peek ( ) {
709+ match peeking_skip_whitespace_take_if ( chars, |ch| {
710+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
711+ } ) {
710712 Some ( '\'' ) => {
711713 if self . dialect . supports_triple_quoted_string ( ) {
712714 return self
@@ -745,7 +747,9 @@ impl<'a> Tokenizer<'a> {
745747 // BigQuery uses r or R for raw string literal
746748 b @ 'R' | b @ 'r' if dialect_of ! ( self is BigQueryDialect | GenericDialect ) => {
747749 chars. next ( ) ; // consume
748- match chars. peek ( ) {
750+ match peeking_skip_whitespace_take_if ( chars, |ch| {
751+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
752+ } ) {
749753 Some ( '\'' ) => self
750754 . tokenize_single_or_triple_quoted_string :: < fn ( String ) -> Token > (
751755 chars,
@@ -772,12 +776,19 @@ impl<'a> Tokenizer<'a> {
772776 // Redshift uses lower case n for national string literal
773777 n @ 'N' | n @ 'n' => {
774778 chars. next ( ) ; // consume, to check the next char
775- match chars. peek ( ) {
779+ match peeking_skip_whitespace_take_if ( chars, |ch| {
780+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
781+ } ) {
776782 Some ( '\'' ) => {
777783 // N'...' - a <national character string literal>
778784 let s = self . tokenize_single_quoted_string ( chars, '\'' , true ) ?;
779785 Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
780786 }
787+ Some ( '\"' ) => {
788+ // N"..." - a <national character string literal>
789+ let s = self . tokenize_single_quoted_string ( chars, '\"' , true ) ?;
790+ Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
791+ }
781792 _ => {
782793 // regular identifier starting with an "N"
783794 let s = self . tokenize_word ( n, chars) ;
@@ -789,7 +800,7 @@ impl<'a> Tokenizer<'a> {
789800 x @ 'e' | x @ 'E' => {
790801 let starting_loc = chars. location ( ) ;
791802 chars. next ( ) ; // consume, to check the next char
792- match chars. peek ( ) {
803+ match peeking_skip_whitespace_take_if ( chars, |ch| matches ! ( ch , '\'' ) ) {
793804 Some ( '\'' ) => {
794805 let s =
795806 self . tokenize_escaped_single_quoted_string ( starting_loc, chars) ?;
@@ -823,12 +834,19 @@ impl<'a> Tokenizer<'a> {
823834 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
824835 x @ 'x' | x @ 'X' => {
825836 chars. next ( ) ; // consume, to check the next char
826- match chars. peek ( ) {
837+ match peeking_skip_whitespace_take_if ( chars, |ch| {
838+ matches ! ( ch, '\'' ) || matches ! ( ch, '\"' )
839+ } ) {
827840 Some ( '\'' ) => {
828841 // X'...' - a <binary string literal>
829842 let s = self . tokenize_single_quoted_string ( chars, '\'' , true ) ?;
830843 Ok ( Some ( Token :: HexStringLiteral ( s) ) )
831844 }
845+ Some ( '\"' ) => {
846+ // X"..." - a <binary string literal>
847+ let s = self . tokenize_single_quoted_string ( chars, '\"' , true ) ?;
848+ Ok ( Some ( Token :: HexStringLiteral ( s) ) )
849+ }
832850 _ => {
833851 // regular identifier starting with an "X"
834852 let s = self . tokenize_word ( x, chars) ;
@@ -1674,6 +1692,47 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool
16741692 s
16751693}
16761694
1695+ /// Peek ahead in a clone of `self.peekable`, skipping whitespace,
1696+ /// until `predicate` returns `true` or a non-whitespace character is encountered.
1697+ /// If a character matching the predicate is found:
1698+ /// - Advance the original iterator by the number of whitespace characters skipped
1699+ /// - Return the peeked character matching the predicate
1700+ ///
1701+ /// If a non-whitespace character not matching the predicate is encountered, or EOF is reached,
1702+ /// return `self.peek()` without advancing the iterator.
1703+ ///
1704+ /// Note: This function may advance the original iterator if a match is found after skipping whitespace.
1705+ fn peeking_skip_whitespace_take_if (
1706+ chars : & mut State ,
1707+ mut predicate : impl FnMut ( char ) -> bool ,
1708+ ) -> Option < char > {
1709+ // Check if the next character is a match to avoid unnecessary cloning.
1710+ if let Some ( & ch) = chars. peek ( ) {
1711+ if predicate ( ch) {
1712+ return Some ( ch) ;
1713+ }
1714+ }
1715+
1716+ let mut chars_clone = chars. peekable . clone ( ) ;
1717+ let mut next_count = 0 ;
1718+ loop {
1719+ match chars_clone. peek ( ) {
1720+ Some ( & ch) if predicate ( ch) => {
1721+ // Advance the original iterator
1722+ for _ in 0 ..next_count {
1723+ chars. next ( ) ;
1724+ }
1725+ return chars. peek ( ) . copied ( ) ;
1726+ }
1727+ Some ( ch) if ch. is_whitespace ( ) || matches ! ( ch, ' ' | '\t' | '\n' | '\r' ) => {
1728+ next_count += 1 ;
1729+ chars_clone. next ( ) ;
1730+ }
1731+ _ => return chars. peek ( ) . copied ( ) ,
1732+ }
1733+ }
1734+ }
1735+
16771736fn unescape_single_quoted_string ( chars : & mut State < ' _ > ) -> Option < String > {
16781737 Unescape :: new ( chars) . unescape ( )
16791738}
0 commit comments