From daee871ec22e8be10298119bc49c3f3dd6defcc4 Mon Sep 17 00:00:00 2001 From: Hiroshi Hatake Date: Mon, 6 Jan 2025 19:48:22 +0900 Subject: [PATCH] unescape: Try to fill out with \uFFFD for invalid sequence Signed-off-by: Hiroshi Hatake --- src/flb_unescape.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/flb_unescape.c b/src/flb_unescape.c index 71bd9ff4292..18df30e6d10 100644 --- a/src/flb_unescape.c +++ b/src/flb_unescape.c @@ -121,11 +121,13 @@ static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) } if (dno != 4) { /* Incomplete \u escape sequence */ + ch = L'\uFFFD'; goto invalid_sequence; } ch = strtol(digs, NULL, 16); if (u8_low_surrogate(ch)) { /* Invalid: low surrogate without preceding high surrogate */ + ch = L'\uFFFD'; goto invalid_sequence; } else if (u8_high_surrogate(ch)) { @@ -139,6 +141,7 @@ static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) } if (dno != 4) { /* Incomplete low surrogate */ + ch = L'\uFFFD'; goto invalid_sequence; } uint32_t low = strtol(digs, NULL, 16); @@ -147,11 +150,13 @@ static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) } else { /* Invalid: high surrogate not followed by low surrogate */ + ch = L'\uFFFD'; goto invalid_sequence; } } else { /* Invalid: high surrogate not followed by \u */ + ch = L'\uFFFD'; goto invalid_sequence; } } @@ -166,10 +171,9 @@ static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) } *dest = ch; - return i; - invalid_sequence: - return -1; + + return i; } int flb_unescape_string_utf8(const char *in_buf, int sz, char *out_buf) @@ -223,6 +227,10 @@ int flb_unescape_string_utf8(const char *in_buf, int sz, char *out_buf) size = end - next; if (size > 0) { esc_in = u8_read_escape_sequence(next, size, &ch) + 1; + if (esc_in == -1) { + flb_error("invalid sequence detected"); + break; + } } else { /* because char is unsigned char by default on arm, so we need to do a explicit conversion */