Skip to content

Commit 1cfe46e

Browse files
committed
Fix mod CSV encoding errors found in HPM and DoD
1 parent 1f785f0 commit 1cfe46e

File tree

2 files changed

+56
-1
lines changed

2 files changed

+56
-1
lines changed

src/openvic-dataloader/detail/Convert.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,11 @@ namespace ovdl::convert {
156156

157157
// Paradox being special, invalid Windows-1252
158158
// Used for (semantically incorrect) Polish localization TODOs
159-
.map<'\x8F'>("Ę");
159+
.map<'\x8F'>("Ę")
160+
// HPM (and derived mods) have CSVs which permit this interpretation
161+
.map<'\x90'>("é")
162+
// DoD 0_news.csv mixes Windows-1252 and UTF-8
163+
.map<'\x9D'>("");
160164

161165
static constexpr auto win1251_map = lexy::symbol_table<std::string_view> //
162166
.map<'\x80'>("Ђ")

tests/src/csv/Parser.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,4 +934,55 @@ TEST_CASE("CSV Parse", "[csv-parse]") {
934934
}
935935
}
936936
#endif
937+
938+
SECTION("EVTOPTA36918;\\xF3\\xED\\x90\\xE2\\x80\\x9C\\xE2\\x80\\x9D;;x") {
939+
static constexpr auto buffer = "EVTOPTA36918;\xF3\xED\x90\xE2\x80\x9C\xE2\x80\x9D;;x"sv;
940+
parser.load_from_string(buffer);
941+
942+
CHECK_PARSE();
943+
944+
const std::vector<LineObject>& line_list = parser.get_lines();
945+
CHECK_FALSE(line_list.empty());
946+
CHECK(ranges::size(line_list) == 1);
947+
948+
const LineObject& line = line_list.front();
949+
CHECK_FALSE(line.empty());
950+
CHECK(ranges::size(line) == 3);
951+
CHECK(line.value_count() == 4);
952+
CHECK(line.prefix_end() == 0);
953+
CHECK(line.suffix_end() == 4);
954+
955+
for (const auto [index, val] : line | ranges::views::enumerate) {
956+
CAPTURE(index);
957+
CHECK_FALSE_OR_CONTINUE(val.second.empty());
958+
switch (index) {
959+
case 0:
960+
CHECK_OR_CONTINUE(val.first == 0);
961+
CHECK_OR_CONTINUE(val.second == "EVTOPTA36918"sv);
962+
break;
963+
case 1:
964+
CHECK_OR_CONTINUE(val.first == 1);
965+
CHECK_OR_CONTINUE(val.second == "óíé“â€�"sv);
966+
break;
967+
case 2:
968+
CHECK_OR_CONTINUE(val.first == 3);
969+
CHECK_OR_CONTINUE(val.second == "x"sv);
970+
break;
971+
default: CHECK_OR_CONTINUE(false); break;
972+
}
973+
}
974+
975+
CHECK(line.value_count() == 4);
976+
977+
for (const auto index : ranges::views::iota(size_t(0), line.value_count())) {
978+
CAPTURE(index);
979+
switch (index) {
980+
case 0: CHECK_OR_CONTINUE(line.get_value_for(index) == "EVTOPTA36918"sv); break;
981+
case 1: CHECK_OR_CONTINUE(line.get_value_for(index) == "óíé“â€�"sv); break;
982+
case 2: CHECK_OR_CONTINUE(line.get_value_for(index) == ""sv); break;
983+
case 3: CHECK_OR_CONTINUE(line.get_value_for(index) == "x"sv); break;
984+
default: CHECK_OR_CONTINUE(false); break;
985+
}
986+
}
987+
}
937988
}

0 commit comments

Comments
 (0)