Skip to content

Commit 38324b2

Browse files
author
jan.nijtmans
committed
Merge 8.7
2 parents 53c25d7 + bf36d53 commit 38324b2

File tree

3 files changed

+9
-88
lines changed

3 files changed

+9
-88
lines changed

generic/tclEncoding.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2408,13 +2408,12 @@ UtfToUtfProc(
24082408
dst += Tcl_UniCharToUtf(ch, dst);
24092409
ch = low;
24102410
#endif
2411-
} else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)
2412-
&& (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
2411+
} else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) {
24132412
result = TCL_CONVERT_UNKNOWN;
24142413
src = saveSrc;
24152414
break;
24162415
} else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
2417-
&& (flags & TCL_ENCODING_MODIFIED) && !Tcl_UniCharIsUnicode(ch)) {
2416+
&& (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) {
24182417
result = TCL_CONVERT_SYNTAX;
24192418
src = saveSrc;
24202419
break;
@@ -2506,7 +2505,7 @@ Utf32ToUtfProc(
25062505
ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
25072506
}
25082507
if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
2509-
&& !Tcl_UniCharIsUnicode(ch))) {
2508+
&& ((ch & ~0x7FF) == 0xD800))) {
25102509
if (STOPONERROR) {
25112510
result = TCL_CONVERT_SYNTAX;
25122511
break;
@@ -2602,7 +2601,7 @@ UtfToUtf32Proc(
26022601
break;
26032602
}
26042603
len = TclUtfToUCS4(src, &ch);
2605-
if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
2604+
if ((ch & ~0x7FF) == 0xD800) {
26062605
if (STOPONERROR) {
26072606
result = TCL_CONVERT_UNKNOWN;
26082607
break;
@@ -2804,7 +2803,7 @@ UtfToUtf16Proc(
28042803
break;
28052804
}
28062805
len = TclUtfToUCS4(src, &ch);
2807-
if (!Tcl_UniCharIsUnicode(ch) && (((ch & ~0x7FF) == 0xD800) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
2806+
if ((ch & ~0x7FF) == 0xD800) {
28082807
if (STOPONERROR) {
28092808
result = TCL_CONVERT_UNKNOWN;
28102809
break;

tests/encoding.test

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -704,18 +704,18 @@ test encoding-24.28 {Parse invalid utf-8 with -strict} -body {
704704
test encoding-24.29 {Parse invalid utf-8} -body {
705705
encoding convertfrom utf-8 \xEF\xBF\xBF
706706
} -result \uFFFF
707-
test encoding-24.30 {Parse invalid utf-8 with -strict} -body {
707+
test encoding-24.30 {Parse noncharacter with -strict} -body {
708708
encoding convertfrom -strict utf-8 \xEF\xBF\xBF
709-
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xEF'}
709+
} -result \uFFFF
710710
test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body {
711711
encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF
712712
} -result \uFFFF
713713
test encoding-24.32 {Try to generate invalid utf-8} -body {
714714
encoding convertto utf-8 \uFFFF
715715
} -result \xEF\xBF\xBF
716-
test encoding-24.33 {Try to generate invalid utf-8 with -strict} -body {
716+
test encoding-24.33 {Try to generate noncharacter with -strict} -body {
717717
encoding convertto -strict utf-8 \uFFFF
718-
} -returnCodes 1 -result {unexpected character at index 0: 'U+00FFFF'}
718+
} -result \xEF\xBF\xBF
719719
test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body {
720720
encoding convertto -nocomplain utf-8 \uFFFF
721721
} -result \xEF\xBF\xBF

tests/io.test

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -9198,84 +9198,6 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -
91989198
removeFile io-75.13
91999199
} -match glob -result {41 1 {error reading "*": illegal byte sequence}}
92009200

9201-
# Testcase for Rolf's use-case (detecting Invalid byte sequence, but allowing noncharacter)
9202-
test io-75.14 {How to use -strict, but allow non-characters} -setup {
9203-
set fn [makeFile {} io-75.14]
9204-
set f [open $fn w+]
9205-
fconfigure $f -encoding binary
9206-
# Noncharacter followed by a single
9207-
puts -nonewline $f pre\xEF\xBF\xBE\x81post
9208-
flush $f
9209-
seek $f 0
9210-
fconfigure stdout -nocomplainencoding 1
9211-
catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
9212-
fconfigure $f -encoding utf-8 -buffering none -translation lf -strictencoding 1
9213-
} -body {
9214-
set hd {}
9215-
catch {
9216-
while {![eof $f]} {
9217-
if {[catch {
9218-
append hd [read $f]
9219-
}]} {
9220-
fconfigure $f -nocomplainencoding 1 -strictencoding 0
9221-
set char [read $f 1]
9222-
if {[string is unicode $char]} {
9223-
error "InvalidByteSequence"
9224-
} elseif {$char >= "\uD800" && $char < "\uE000"} {
9225-
error "Surrogate"
9226-
} else {
9227-
append hd $char
9228-
}
9229-
catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
9230-
fconfigure $f -strictencoding 1 -encoding utf-8
9231-
}
9232-
}
9233-
} msg
9234-
close $f
9235-
append hd +$msg
9236-
} -cleanup {
9237-
removeFile io-75.14
9238-
} -result "pre\uFFFE+InvalidByteSequence"
9239-
9240-
# Testcase for Rolf's use-case (detecting Surrogate, but allowing noncharacter)
9241-
test io-75.15 {How to use -strict, but allow non-characters} -setup {
9242-
set fn [makeFile {} io-75.14]
9243-
set f [open $fn w+]
9244-
fconfigure $f -encoding utf-8 -nocomplainencoding 1
9245-
# Noncharacter followed by a single
9246-
puts -nonewline $f pre\uFFFE\uD800post
9247-
flush $f
9248-
seek $f 0
9249-
fconfigure stdout -nocomplainencoding 1
9250-
catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
9251-
fconfigure $f -buffering none -translation lf -strictencoding 1
9252-
} -body {
9253-
set hd {}
9254-
catch {
9255-
while {![eof $f]} {
9256-
if {[catch {
9257-
append hd [read $f]
9258-
}]} {
9259-
fconfigure $f -nocomplainencoding 1 -strictencoding 0
9260-
set char [read $f 1]
9261-
if {[string is unicode $char]} {
9262-
error "Invalid Byte Sequence"
9263-
} elseif {$char >= "\uD800" && $char < "\uE000"} {
9264-
error "Surrogate"
9265-
} else {
9266-
append hd $char
9267-
}
9268-
catch {fconfigure $f -nocomplainencoding 0};# Only needed on Tcl 9
9269-
fconfigure $f -strictencoding 1
9270-
}
9271-
}
9272-
} msg
9273-
close $f
9274-
append hd +$msg
9275-
} -cleanup {
9276-
removeFile io-75.15
9277-
} -result "pre\uFFFE+Surrogate"
9278-
92799201
# ### ### ### ######### ######### #########
92809202

92819203

0 commit comments

Comments
 (0)