diff --git a/src/game_interpreter.cpp b/src/game_interpreter.cpp index d903697b8f..96e6cf0fb9 100644 --- a/src/game_interpreter.cpp +++ b/src/game_interpreter.cpp @@ -4823,7 +4823,7 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const& int pos = 0; std::string op_string; for (int i = 0; i < 3; i++) { - op_string += ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], &pos, *Main_Data::game_variables)); + op_string += Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], pos, *Main_Data::game_variables); } result = std::move(op_string); break; @@ -4834,10 +4834,10 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const& std::string base, insert; args[1] = ValueOrVariable(modes[1], args[1]); - base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables)); - insert = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables)); + base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables); + insert = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables); - result = base.insert(args[1], insert); + result = Game_Strings::Insert(base, insert, args[1]); break; } case 8: //Replace (rep) @@ -4845,23 +4845,19 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const& int pos = 0; std::string base, search, replacement; - base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables)); - search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables)); - replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables)); + base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables); + search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables); + replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables); + result = Utils::ReplaceAll(base, search, replacement); - std::size_t index = base.find(search); - while (index != std::string::npos) { - base.replace(index, search.length(), replacement); - index = base.find(search, index + replacement.length()); - } - - result = std::move(base); break; } case 9: //Substring (subs) args[1] = ValueOrVariable(modes[1], args[1]); args[2] = ValueOrVariable(modes[2], args[2]); - result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables).substr(args[1], args[2])); + + result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables)); + result = Game_Strings::Substring(result, args[1], args[2]); break; case 10: //Join (join) { @@ -4906,21 +4902,24 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const& args[1] = ValueOrVariable(modes[1], args[1]); args[2] = ValueOrVariable(modes[2], args[2]); result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables)); - result = result.erase(args[1], args[2]); + result = Game_Strings::Erase(result, args[1], args[2]); break; case 14: //Replace Ex (exRep) , edge case: the arg "first" is at ((flags >> 19) & 1). Wtf BingShan { int pos = 0; std::string base, search, replacement; - base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables)); - search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables)); - replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables)); + base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables); + search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables); + replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables); + + auto flags = std::regex_constants::match_default; - std::regex rexp(search); + if (first_flag) { + flags = std::regex_constants::format_first_only; + } - if (first_flag) result = std::regex_replace(base, rexp, replacement, std::regex_constants::format_first_only); - else result = std::regex_replace(base, rexp, replacement); + result = Game_Strings::RegExReplace(base, search, replacement, flags); break; } default: diff --git a/src/game_strings.cpp b/src/game_strings.cpp index 0dfb4319ac..24495a9820 100644 --- a/src/game_strings.cpp +++ b/src/game_strings.cpp @@ -92,20 +92,22 @@ int Game_Strings::ToNum(Str_Params params, int var_id, Game_Variables& variables num = static_cast(std::strtol(it->second.c_str(), nullptr, 0)); variables.Set(var_id, num); - Game_Map::SetNeedRefresh(true); + + Game_Map::SetNeedRefreshForVarChange(var_id); + return num; } int Game_Strings::GetLen(Str_Params params, int var_id, Game_Variables& variables) const { - // Note: The length differs between Maniac and EasyRPG due to different internal encoding (utf-8 vs. ansi) - if (params.string_id <= 0) { return -1; } - int len = Get(params.string_id).length(); + int len = Utils::UTF8Length(Get(params.string_id)); variables.Set(var_id, len); - Game_Map::SetNeedRefresh(true); + + Game_Map::SetNeedRefreshForVarChange(var_id); + return len; } @@ -118,9 +120,14 @@ int Game_Strings::InStr(Str_Params params, std::string search, int var_id, int b search = Extract(search, params.hex); } - int index = Get(params.string_id).find(search, begin); + auto search32 = Utils::DecodeUTF32(search); + auto string32 = Utils::DecodeUTF32(Get(params.string_id)); + + int index = string32.find(search32, begin); variables.Set(var_id, index); - Game_Map::SetNeedRefresh(true); + + Game_Map::SetNeedRefreshForVarChange(var_id); + return index; } @@ -161,6 +168,7 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str if (str.find(delimiter) == std::string::npos) { // token not found } else { + // This works for UTF-8 std::string token; for (auto index = str.find(delimiter); index != std::string::npos; index = str.find(delimiter)) { token = str.substr(0, index); @@ -175,6 +183,9 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str // set the remaining string Set(params, str); variables.Set(var_id, components); + + Game_Map::SetNeedRefreshForVarChange(var_id); + return components; } @@ -277,24 +288,30 @@ StringView Game_Strings::PopLine(Str_Params params, int offset, int string_out_i } StringView Game_Strings::ExMatch(Str_Params params, std::string expr, int var_id, int begin, int string_out_id, Game_Variables& variables) { + // std::regex only works with char and wchar, not char32 + // For full Unicode support requires the w-API, even on non-Windows systems int var_result; std::string str_result; - std::smatch match; if (params.extract) { expr = Extract(expr, params.hex); } - std::string base = ToString(Get(params.string_id)).erase(0, begin); - std::regex r(expr); + std::string base = Substring(Get(params.string_id), begin); - std::regex_search(base, match, r); + std::wsmatch match; + auto wbase = Utils::ToWideString(base); + auto wexpr = Utils::ToWideString(expr); + + std::wregex r(wexpr); + + std::regex_search(wbase, match, r); + str_result = Utils::FromWideString(match.str()); var_result = match.position() + begin; variables.Set(var_id, var_result); - Game_Map::SetNeedRefresh(true); + Game_Map::SetNeedRefreshForVarChange(var_id); - str_result = match.str(); if (string_out_id > 0) { params.string_id = string_out_id; Set(params, str_result); @@ -337,8 +354,10 @@ const Game_Strings::Strings_t& Game_Strings::RangeOp(Str_Params params, int stri } std::string Game_Strings::PrependMin(StringView string, int min_size, char c) { - if (static_cast(string.size()) < min_size) { - int s = min_size - string.size(); + int len = Utils::UTF8Length(string); + + if (len < min_size) { + int s = min_size - len; return std::string(s, c) + ToString(string); } return ToString(string); @@ -356,6 +375,69 @@ std::string Game_Strings::Extract(StringView string, bool as_hex) { return PendingMessage::ApplyTextInsertingCommands(ToString(string), Player::escape_char, cmd_fn); } +std::string Game_Strings::Substring(StringView source, int begin, int length) { + const char* iter = source.data(); + const auto end = source.data() + source.size(); + + if (length == -1) { + length = Utils::UTF8Length(source) - begin; + } + + // Points at start of the substring + auto left = Utils::UTF8Skip(iter, end, begin); + + // Points at end of the substring + auto right = Utils::UTF8Skip(left.next, end, length); + + if (right.next == nullptr) { + return std::string(left.next, end); + } else { + return std::string(left.next, right.next); + } +} + +std::string Game_Strings::Insert(StringView source, StringView what, int where) { + const char* iter = source.data(); + const auto end = source.data() + source.size(); + + // Points at insertion location + auto ret = Utils::UTF8Skip(iter, end, where); + + return std::string(source.data(), ret.next) + ToString(what) + std::string(ret.next, end); +} + +std::string Game_Strings::Erase(StringView source, int begin, int length) { + const char* iter = source.data(); + const auto end = source.data() + source.size(); + + // Points at start of deletion + auto left = Utils::UTF8Skip(iter, end, begin); + + // Points at end of deletion + auto right = Utils::UTF8Skip(left.next, end, length); + + std::string ret = std::string(source.data(), left.next); + if (right.next != nullptr) { + ret += std::string(right.next, end); + } + + return ret; +} + +std::string Game_Strings::RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags) { + // std::regex only works with char and wchar, not char32 + // For full Unicode support requires the w-API, even on non-Windows systems + auto wstr = Utils::ToWideString(str); + auto wsearch = Utils::ToWideString(search); + auto wreplace = Utils::ToWideString(replace); + + std::wregex rexp(wsearch); + + auto result = std::regex_replace(wstr, rexp, wreplace, flags); + + return Utils::FromWideString(result); +} + std::optional Game_Strings::ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char) { if (ch == 'S' || ch == 's') { // \s in a normal message is the speed modifier diff --git a/src/game_strings.h b/src/game_strings.h index dbee88cb3c..4e8f1ca731 100644 --- a/src/game_strings.h +++ b/src/game_strings.h @@ -27,6 +27,9 @@ #include "pending_message.h" #include "player.h" #include "string_view.h" +#include "utils.h" + +#include #ifdef HAVE_NLOHMANN_JSON #include @@ -34,6 +37,13 @@ /** * Game_Strings class. + * For all operations codepoints are used (instead of bytes). + * This way operations that use the length or the index work almost like in Maniac Patch. + * And using codepoints is better anyway because this is a single character. + * With bytes you have the risk to "chop" a character in half. + * + * Where simple to implement UTF8 is used directly. + * In other cases the code does a roundtrip through UTF32. */ class Game_Strings { public: @@ -62,7 +72,7 @@ class Game_Strings { StringView Get(int id) const; StringView GetIndirect(int id, const Game_Variables& variables) const; StringView GetWithMode(StringView str_data, int mode, int arg, const Game_Variables& variables) const; - StringView GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables); + std::string GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables); #ifdef HAVE_NLOHMANN_JSON nlohmann::json* ParseJson(int id); @@ -83,6 +93,10 @@ class Game_Strings { static std::string PrependMin(StringView string, int min_size, char c); static std::string Extract(StringView string, bool as_hex); + static std::string Substring(StringView source, int begin, int length = -1); + static std::string Insert(StringView source, StringView what, int where); + static std::string Erase(StringView source, int begin, int length); + static std::string RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags = std::regex_constants::match_default); static std::optional ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char); static std::optional ManiacsCommandInserterHex(char ch, const char** iter, const char* end, uint32_t escape_char); @@ -197,18 +211,23 @@ inline StringView Game_Strings::GetWithMode(StringView str_data, int mode, int a } } -inline StringView Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables) { - StringView ret; +inline std::string Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables) { + std::string ret; switch (mode) { - case StringEvalMode::eStringEval_Text: - assert(pos); - ret = str_data.substr(*pos, arg); - *pos += arg; + case StringEvalMode::eStringEval_Text: { + const auto end = str_data.data() + str_data.size(); + + auto left = Utils::UTF8Skip(str_data.begin(), end, pos); + auto right = Utils::UTF8Skip(left.next, end, arg); + + ret = std::string(left.next, right.next); + pos += arg; return ret; + } case StringEvalMode::eStringEval_Direct: - return Get(arg); + return ToString(Get(arg)); case StringEvalMode::eStringEval_Indirect: - return GetIndirect(arg, variables); + return ToString(GetIndirect(arg, variables)); default: return ret; } diff --git a/src/utils.cpp b/src/utils.cpp index 640388e21c..d14b9073fc 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -377,6 +377,36 @@ Utils::UtfNextResult Utils::UTF8Next(const char* iter, const char* const end) { return { iter, 0 }; } +Utils::UtfNextResult Utils::UTF8Skip(const char* iter, const char* end, int skip) { + UtfNextResult ret; + + if (skip == 0) { + ret = UTF8Next(iter, end); + return { iter, ret.ch }; + } + + for (int i = skip; iter < end && skip > 0; --skip) { + ret = UTF8Next(iter, end); + iter = ret.next; + } + + return ret; +} + +int Utils::UTF8Length(StringView str) { + size_t len = 0; + + const char* iter = str.data(); + const char* const e = str.data() + str.size(); + while (iter < e) { + auto ret = Utils::UTF8Next(iter, e); + iter = ret.next; + ++len; + } + + return len; +} + Utils::ExFontRet Utils::ExFontNext(const char* iter, const char* end) { ExFontRet ret; if (end - iter >= 2 && *iter == '$') { diff --git a/src/utils.h b/src/utils.h index e02defac3e..ce5cb925b5 100644 --- a/src/utils.h +++ b/src/utils.h @@ -127,10 +127,28 @@ namespace Utils { * * @param iter begginning of the range to convert from * @param end end of the range to convert from - * @return the converted string. + * @return iter to the next character and codepoint number */ UtfNextResult UTF8Next(const char* iter, const char* end); + /** + * Like UTF8Next but skips "skip" characters. + * + * @param iter begginning of the range to convert from + * @param end end of the range to convert from + * @param skip how many characters to skip + * @return iter to the next character and codepoint number + */ + UtfNextResult UTF8Skip(const char* iter, const char* end, int skip); + + /** + * Determines how many codepoints are in the passed string. + * + * @param str unicode string + * @return amount of codepoints + */ + int UTF8Length(StringView str); + // Please report an issue when you get a compile error here because your toolchain is broken and lacks wchar_t /** * Converts UTF-8 string to std::wstring. diff --git a/tests/utf.cpp b/tests/utf.cpp index 4a63d58ac2..c6f1d1994e 100644 --- a/tests/utf.cpp +++ b/tests/utf.cpp @@ -86,6 +86,29 @@ TEST_CASE("next") { } } +TEST_CASE("length") { + for (auto& ts: tests) { + REQUIRE_EQ(Utils::UTF8Length(ts.u8), ts.u32.length()); + } +} + +TEST_CASE("skip") { + auto test = tests[0]; + + // First 3 characters + auto res = Utils::UTF8Skip(test.u8.data(), test.u8.data() + test.u8.size(), 3); + REQUIRE_EQ(std::string((const char*)test.u8.data(), res.next), "κόσ"); + + // 2 - 4 + auto beg = Utils::UTF8Skip(test.u8.data(), test.u8.data() + test.u8.size(), 1); + auto end = Utils::UTF8Skip(beg.next, test.u8.data() + test.u8.size(), 3); + REQUIRE_EQ(std::string(beg.next, end.next), "όσμ"); + + // 0 characters (return iter to start) + res = Utils::UTF8Skip(test.u8.data(), test.u8.data() + test.u8.size(), 0); + REQUIRE_EQ(res.ch, tests[0].u32[0]); +} + TEST_CASE("TextNext") { std::string text = u8"H $A$B\\\\\\^\\n\nぽ"; const auto* iter = text.data();