Skip to content

Commit 1043b73

Browse files
committed
Fix encoding detection regression for UTF-16 without BOM, issue #953.
1 parent cd377f0 commit 1043b73

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/EditEncoding.cpp

+13-6
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ static constexpr bool IsC0ControlChar(uint8_t ch) noexcept {
10351035
#endif
10361036
}
10371037

1038-
bool MaybeBinaryFile(const uint8_t *ptr, DWORD length) noexcept {
1038+
bool MaybeBinaryFile(const uint8_t *ptr, DWORD length, int *encodingFlag) noexcept {
10391039
/* Test C0 Control Character
10401040
These characters are not reused in most text encodings, and do not appear in normal text files.
10411041
Most binary files have reserved fields (mostly zeros) or small values in the header.
@@ -1046,17 +1046,25 @@ bool MaybeBinaryFile(const uint8_t *ptr, DWORD length) noexcept {
10461046
const uint8_t * const end = ptr + length;
10471047
UINT count = 0;
10481048
UINT mask = 0; // find two different C0 control characters
1049+
int result = 0;
10491050
while (ptr < end) {
10501051
const uint8_t ch = *ptr++;
10511052
if (IsC0ControlChar(ch)) {
10521053
++count;
10531054
mask |= 1U << ch;
1054-
if (((mask & (mask - 1)) != 0) && ((count >= 8) || IsC0ControlChar(*ptr))) {
1055-
return true;
1055+
if ((mask & (mask - 1)) != 0) {
1056+
result |= 2;
1057+
}
1058+
if ((count >= 8) || IsC0ControlChar(*ptr)) {
1059+
result |= 1;
1060+
}
1061+
if (result == 3) {
1062+
*encodingFlag = EncodingFlag_Binary;
1063+
break;
10561064
}
10571065
}
10581066
}
1059-
return false;
1067+
return result & true;
10601068
}
10611069

10621070
static inline BOOL IsValidMultiByte(UINT codePage, const char *lpData, DWORD cbData) noexcept {
@@ -2439,9 +2447,8 @@ int EditDetermineEncoding(LPCWSTR pszFile, char *lpData, DWORD cbData, int *enco
24392447
*encodingFlag = EncodingFlag_Invalid;
24402448
}
24412449
// detect binary file
2442-
if (MaybeBinaryFile(reinterpret_cast<const uint8_t *>(lpData), cbData)) {
2450+
if (MaybeBinaryFile(reinterpret_cast<const uint8_t *>(lpData), cbData, encodingFlag)) {
24432451
tryUnicode = true;
2444-
*encodingFlag = EncodingFlag_Binary;
24452452
}
24462453
// check UTF-16 without BOM
24472454
if (tryUnicode && (cbData & 1) == 0 && fvCurFile.mask == 0) {

0 commit comments

Comments
 (0)