Skip to content

Commit

Permalink
String Variables: Work with codepoints instead of bytes
Browse files Browse the repository at this point in the history
This matches the Maniac Patch behaviour better and is actually much better than using bytes.

For e.g. Latin1 (Western European) MP works with 1 Byte per char and for shift-jis (Japanese) MP works with multi-byte characters (2 bytes per character).

When using Unicode both cases map to 1 codepoint so we are very lucky here: It just works.

Fix EasyRPG#3298
  • Loading branch information
Ghabry committed Nov 24, 2024
1 parent cb936e6 commit 0697a61
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 47 deletions.
43 changes: 21 additions & 22 deletions src/game_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4823,7 +4823,7 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
int pos = 0;
std::string op_string;
for (int i = 0; i < 3; i++) {
op_string += ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], &pos, *Main_Data::game_variables));
op_string += Main_Data::game_strings->GetWithModeAndPos(str_param, modes[i], args[i], pos, *Main_Data::game_variables);
}
result = std::move(op_string);
break;
Expand All @@ -4834,34 +4834,30 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
std::string base, insert;

args[1] = ValueOrVariable(modes[1], args[1]);
base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
insert = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
insert = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);

result = base.insert(args[1], insert);
result = Game_Strings::Insert(base, insert, args[1]);
break;
}
case 8: //Replace (rep) <fn(string base, string search, string replacement)>
{
int pos = 0;
std::string base, search, replacement;

base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables));
replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables);
replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);
result = Utils::ReplaceAll(base, search, replacement);

std::size_t index = base.find(search);
while (index != std::string::npos) {
base.replace(index, search.length(), replacement);
index = base.find(search, index + replacement.length());
}

result = std::move(base);
break;
}
case 9: //Substring (subs) <fn(string base, int index, int size)>
args[1] = ValueOrVariable(modes[1], args[1]);
args[2] = ValueOrVariable(modes[2], args[2]);
result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables).substr(args[1], args[2]));

result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables));
result = Game_Strings::Substring(result, args[1], args[2]);
break;
case 10: //Join (join) <fn(string delimiter, int id, int size)>
{
Expand Down Expand Up @@ -4906,21 +4902,24 @@ bool Game_Interpreter::CommandManiacControlStrings(lcf::rpg::EventCommand const&
args[1] = ValueOrVariable(modes[1], args[1]);
args[2] = ValueOrVariable(modes[2], args[2]);
result = ToString(Main_Data::game_strings->GetWithMode(str_param, modes[0], args[0], *Main_Data::game_variables));
result = result.erase(args[1], args[2]);
result = Game_Strings::Erase(result, args[1], args[2]);
break;
case 14: //Replace Ex (exRep) <fn(string base, string search, string replacement, bool first)>, edge case: the arg "first" is at ((flags >> 19) & 1). Wtf BingShan
{
int pos = 0;
std::string base, search, replacement;

base = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], &pos, *Main_Data::game_variables));
search = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], &pos, *Main_Data::game_variables));
replacement = ToString(Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], &pos, *Main_Data::game_variables));
base = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[0], args[0], pos, *Main_Data::game_variables);
search = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[1], args[1], pos, *Main_Data::game_variables);
replacement = Main_Data::game_strings->GetWithModeAndPos(str_param, modes[2], args[2], pos, *Main_Data::game_variables);

auto flags = std::regex_constants::match_default;

std::regex rexp(search);
if (first_flag) {
flags = std::regex_constants::format_first_only;
}

if (first_flag) result = std::regex_replace(base, rexp, replacement, std::regex_constants::format_first_only);
else result = std::regex_replace(base, rexp, replacement);
result = Game_Strings::RegExReplace(base, search, replacement, flags);
break;
}
default:
Expand Down
112 changes: 97 additions & 15 deletions src/game_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,22 @@ int Game_Strings::ToNum(Str_Params params, int var_id, Game_Variables& variables
num = static_cast<int>(std::strtol(it->second.c_str(), nullptr, 0));

variables.Set(var_id, num);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return num;
}

int Game_Strings::GetLen(Str_Params params, int var_id, Game_Variables& variables) const {
// Note: The length differs between Maniac and EasyRPG due to different internal encoding (utf-8 vs. ansi)

if (params.string_id <= 0) {
return -1;
}

int len = Get(params.string_id).length();
int len = Utils::UTF8Length(Get(params.string_id));
variables.Set(var_id, len);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return len;
}

Expand All @@ -118,9 +120,14 @@ int Game_Strings::InStr(Str_Params params, std::string search, int var_id, int b
search = Extract(search, params.hex);
}

int index = Get(params.string_id).find(search, begin);
auto search32 = Utils::DecodeUTF32(search);
auto string32 = Utils::DecodeUTF32(Get(params.string_id));

int index = string32.find(search32, begin);
variables.Set(var_id, index);
Game_Map::SetNeedRefresh(true);

Game_Map::SetNeedRefreshForVarChange(var_id);

return index;
}

Expand Down Expand Up @@ -161,6 +168,7 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str
if (str.find(delimiter) == std::string::npos) {
// token not found
} else {
// This works for UTF-8
std::string token;
for (auto index = str.find(delimiter); index != std::string::npos; index = str.find(delimiter)) {
token = str.substr(0, index);
Expand All @@ -175,6 +183,9 @@ int Game_Strings::Split(Str_Params params, const std::string& delimiter, int str
// set the remaining string
Set(params, str);
variables.Set(var_id, components);

Game_Map::SetNeedRefreshForVarChange(var_id);

return components;
}

Expand Down Expand Up @@ -277,24 +288,30 @@ StringView Game_Strings::PopLine(Str_Params params, int offset, int string_out_i
}

StringView Game_Strings::ExMatch(Str_Params params, std::string expr, int var_id, int begin, int string_out_id, Game_Variables& variables) {
// std::regex only works with char and wchar, not char32
// For full Unicode support requires the w-API, even on non-Windows systems
int var_result;
std::string str_result;
std::smatch match;

if (params.extract) {
expr = Extract(expr, params.hex);
}

std::string base = ToString(Get(params.string_id)).erase(0, begin);
std::regex r(expr);
std::string base = Substring(Get(params.string_id), begin);

std::regex_search(base, match, r);
std::wsmatch match;
auto wbase = Utils::ToWideString(base);
auto wexpr = Utils::ToWideString(expr);

std::wregex r(wexpr);

std::regex_search(wbase, match, r);
str_result = Utils::FromWideString(match.str());

var_result = match.position() + begin;
variables.Set(var_id, var_result);
Game_Map::SetNeedRefresh(true);
Game_Map::SetNeedRefreshForVarChange(var_id);

str_result = match.str();
if (string_out_id > 0) {
params.string_id = string_out_id;
Set(params, str_result);
Expand Down Expand Up @@ -337,8 +354,10 @@ const Game_Strings::Strings_t& Game_Strings::RangeOp(Str_Params params, int stri
}

std::string Game_Strings::PrependMin(StringView string, int min_size, char c) {
if (static_cast<int>(string.size()) < min_size) {
int s = min_size - string.size();
int len = Utils::UTF8Length(string);

if (len < min_size) {
int s = min_size - len;
return std::string(s, c) + ToString(string);
}
return ToString(string);
Expand All @@ -356,6 +375,69 @@ std::string Game_Strings::Extract(StringView string, bool as_hex) {
return PendingMessage::ApplyTextInsertingCommands(ToString(string), Player::escape_char, cmd_fn);
}

std::string Game_Strings::Substring(StringView source, int begin, int length) {
const char* iter = source.data();
const auto end = source.data() + source.size();

if (length == -1) {
length = Utils::UTF8Length(source) - begin;
}

// Points at start of the substring
auto left = Utils::UTF8Skip(iter, end, begin);

// Points at end of the substring
auto right = Utils::UTF8Skip(left.next, end, length);

if (right.next == nullptr) {
return std::string(left.next, end);
} else {
return std::string(left.next, right.next);
}
}

std::string Game_Strings::Insert(StringView source, StringView what, int where) {
const char* iter = source.data();
const auto end = source.data() + source.size();

// Points at insertion location
auto ret = Utils::UTF8Skip(iter, end, where);

return std::string(source.data(), ret.next) + ToString(what) + std::string(ret.next, end);
}

std::string Game_Strings::Erase(StringView source, int begin, int length) {
const char* iter = source.data();
const auto end = source.data() + source.size();

// Points at start of deletion
auto left = Utils::UTF8Skip(iter, end, begin);

// Points at end of deletion
auto right = Utils::UTF8Skip(left.next, end, length);

std::string ret = std::string(source.data(), left.next);
if (right.next != nullptr) {
ret += std::string(right.next, end);
}

return ret;
}

std::string Game_Strings::RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags) {
// std::regex only works with char and wchar, not char32
// For full Unicode support requires the w-API, even on non-Windows systems
auto wstr = Utils::ToWideString(str);
auto wsearch = Utils::ToWideString(search);
auto wreplace = Utils::ToWideString(replace);

std::wregex rexp(wsearch);

auto result = std::regex_replace(wstr, rexp, wreplace, flags);

return Utils::FromWideString(result);
}

std::optional<std::string> Game_Strings::ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char) {
if (ch == 'S' || ch == 's') {
// \s in a normal message is the speed modifier
Expand Down
37 changes: 28 additions & 9 deletions src/game_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,23 @@
#include "pending_message.h"
#include "player.h"
#include "string_view.h"
#include "utils.h"

#include <regex>

#ifdef HAVE_NLOHMANN_JSON
#include <nlohmann/json.hpp>
#endif

/**
* Game_Strings class.
* For all operations codepoints are used (instead of bytes).
* This way operations that use the length or the index work almost like in Maniac Patch.
* And using codepoints is better anyway because this is a single character.
* With bytes you have the risk to "chop" a character in half.
*
* Where simple to implement UTF8 is used directly.
* In other cases the code does a roundtrip through UTF32.
*/
class Game_Strings {
public:
Expand Down Expand Up @@ -62,7 +72,7 @@ class Game_Strings {
StringView Get(int id) const;
StringView GetIndirect(int id, const Game_Variables& variables) const;
StringView GetWithMode(StringView str_data, int mode, int arg, const Game_Variables& variables) const;
StringView GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables);
std::string GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables);

#ifdef HAVE_NLOHMANN_JSON
nlohmann::json* ParseJson(int id);
Expand All @@ -83,6 +93,10 @@ class Game_Strings {

static std::string PrependMin(StringView string, int min_size, char c);
static std::string Extract(StringView string, bool as_hex);
static std::string Substring(StringView source, int begin, int length = -1);
static std::string Insert(StringView source, StringView what, int where);
static std::string Erase(StringView source, int begin, int length);
static std::string RegExReplace(StringView str, StringView search, StringView replace, std::regex_constants::match_flag_type flags = std::regex_constants::match_default);

static std::optional<std::string> ManiacsCommandInserter(char ch, const char** iter, const char* end, uint32_t escape_char);
static std::optional<std::string> ManiacsCommandInserterHex(char ch, const char** iter, const char* end, uint32_t escape_char);
Expand Down Expand Up @@ -197,18 +211,23 @@ inline StringView Game_Strings::GetWithMode(StringView str_data, int mode, int a
}
}

inline StringView Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int* pos, const Game_Variables& variables) {
StringView ret;
inline std::string Game_Strings::GetWithModeAndPos(StringView str_data, int mode, int arg, int& pos, const Game_Variables& variables) {
std::string ret;
switch (mode) {
case StringEvalMode::eStringEval_Text:
assert(pos);
ret = str_data.substr(*pos, arg);
*pos += arg;
case StringEvalMode::eStringEval_Text: {
const auto end = str_data.data() + str_data.size();

auto left = Utils::UTF8Skip(str_data.begin(), end, pos);
auto right = Utils::UTF8Skip(left.next, end, arg);

ret = std::string(left.next, right.next);
pos += arg;
return ret;
}
case StringEvalMode::eStringEval_Direct:
return Get(arg);
return ToString(Get(arg));
case StringEvalMode::eStringEval_Indirect:
return GetIndirect(arg, variables);
return ToString(GetIndirect(arg, variables));
default:
return ret;
}
Expand Down
30 changes: 30 additions & 0 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,36 @@ Utils::UtfNextResult Utils::UTF8Next(const char* iter, const char* const end) {
return { iter, 0 };
}

Utils::UtfNextResult Utils::UTF8Skip(const char* iter, const char* end, int skip) {
UtfNextResult ret;

if (skip == 0) {
ret = UTF8Next(iter, end);
return { iter, ret.ch };
}

for (int i = skip; iter < end && skip > 0; --skip) {
ret = UTF8Next(iter, end);
iter = ret.next;
}

return ret;
}

int Utils::UTF8Length(StringView str) {
size_t len = 0;

const char* iter = str.data();
const char* const e = str.data() + str.size();
while (iter < e) {
auto ret = Utils::UTF8Next(iter, e);
iter = ret.next;
++len;
}

return len;
}

Utils::ExFontRet Utils::ExFontNext(const char* iter, const char* end) {
ExFontRet ret;
if (end - iter >= 2 && *iter == '$') {
Expand Down
Loading

0 comments on commit 0697a61

Please sign in to comment.