From f8d029183af65fd038b9be4e4fe41c75e80c0d2e Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 17 Feb 2023 11:20:05 -0500 Subject: [PATCH] src: remove icu usage from node_string.cc PR-URL: https://github.com/nodejs/node/pull/46548 Reviewed-By: Ben Noordhuis Reviewed-By: Michael Dawson Reviewed-By: James M Snell --- src/inspector/node_string.cc | 81 +++++++++++++++++++----------------- src/inspector/node_string.h | 21 ++++++---- 2 files changed, 55 insertions(+), 47 deletions(-) diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc index c020130ae65c52..441d9a352eaca8 100644 --- a/src/inspector/node_string.cc +++ b/src/inspector/node_string.cc @@ -1,9 +1,8 @@ #include "node_string.h" #include "node/inspector/protocol/Protocol.h" +#include "node_util.h" #include "simdutf.h" -#include - namespace node { namespace inspector { namespace protocol { @@ -12,27 +11,34 @@ namespace StringUtil { size_t kNotFound = std::string::npos; // NOLINTNEXTLINE(runtime/references) V8 API requirement -void builderAppendQuotedString(StringBuilder& builder, const String& string) { +void builderAppendQuotedString(StringBuilder& builder, + const std::string_view string) { builder.put('"'); if (!string.empty()) { - icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8( - icu::StringPiece(string.data(), string.length())); - escapeWideStringForJSON( - reinterpret_cast(utf16.getBuffer()), utf16.length(), - &builder); + size_t expected_utf16_length = + simdutf::utf16_length_from_utf8(string.data(), string.length()); + MaybeStackBuffer buffer(expected_utf16_length); + size_t utf16_length = simdutf::convert_utf8_to_utf16( + string.data(), string.length(), buffer.out()); + CHECK_EQ(expected_utf16_length, utf16_length); + escapeWideStringForJSON(reinterpret_cast(buffer.out()), + utf16_length, + &builder); } builder.put('"'); } -std::unique_ptr parseJSON(const String& string) { +std::unique_ptr parseJSON(const std::string_view string) { if (string.empty()) return nullptr; - - icu::UnicodeString utf16 = - icu::UnicodeString::fromUTF8(icu::StringPiece(string.data(), - string.length())); - return parseJSONCharacters( - reinterpret_cast(utf16.getBuffer()), utf16.length()); + size_t expected_utf16_length = + simdutf::utf16_length_from_utf8(string.data(), string.length()); + MaybeStackBuffer buffer(expected_utf16_length); + size_t utf16_length = simdutf::convert_utf8_to_utf16( + string.data(), string.length(), buffer.out()); + CHECK_EQ(expected_utf16_length, utf16_length); + return parseJSONCharacters(reinterpret_cast(buffer.out()), + utf16_length); } std::unique_ptr parseJSON(v8_inspector::StringView string) { @@ -50,24 +56,15 @@ String StringViewToUtf8(v8_inspector::StringView view) { return std::string(reinterpret_cast(view.characters8()), view.length()); } - const uint16_t* source = view.characters16(); - const UChar* unicodeSource = reinterpret_cast(source); - static_assert(sizeof(*source) == sizeof(*unicodeSource), - "sizeof(*source) == sizeof(*unicodeSource)"); - - size_t result_length = view.length() * sizeof(*source); - std::string result(result_length, '\0'); - icu::UnicodeString utf16(unicodeSource, view.length()); - // ICU components for std::string compatibility are not enabled in build... - bool done = false; - while (!done) { - icu::CheckedArrayByteSink sink(&result[0], result_length); - utf16.toUTF8(sink); - result_length = sink.NumberOfBytesAppended(); - result.resize(result_length); - done = !sink.Overflowed(); - } - return result; + const char16_t* source = + reinterpret_cast(view.characters16()); + size_t expected_utf8_length = + simdutf::utf8_length_from_utf16(source, view.length()); + MaybeStackBuffer buffer(expected_utf8_length); + size_t utf8_length = + simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out()); + CHECK_EQ(expected_utf8_length, utf8_length); + return String(buffer.out(), utf8_length); } String fromDouble(double d) { @@ -86,7 +83,8 @@ double toDouble(const char* buffer, size_t length, bool* ok) { return d; } -std::unique_ptr parseMessage(const std::string& message, bool binary) { +std::unique_ptr parseMessage(const std::string_view message, + bool binary) { if (binary) { return Value::parseBinary( reinterpret_cast(message.data()), @@ -109,16 +107,21 @@ String fromUTF8(const uint8_t* data, size_t length) { } String fromUTF16(const uint16_t* data, size_t length) { - icu::UnicodeString utf16(reinterpret_cast(data), length); - std::string result; - return utf16.toUTF8String(result); + auto casted_data = reinterpret_cast(data); + size_t expected_utf8_length = + simdutf::utf8_length_from_utf16(casted_data, length); + MaybeStackBuffer buffer(expected_utf8_length); + size_t utf8_length = + simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out()); + CHECK_EQ(expected_utf8_length, utf8_length); + return String(buffer.out(), utf8_length); } -const uint8_t* CharactersUTF8(const String& s) { +const uint8_t* CharactersUTF8(const std::string_view s) { return reinterpret_cast(s.data()); } -size_t CharacterCount(const String& s) { +size_t CharacterCount(const std::string_view s) { // TODO(@anonrig): Test to make sure CharacterCount returns correctly. return simdutf::utf32_length_from_utf8(s.data(), s.length()); } diff --git a/src/inspector/node_string.h b/src/inspector/node_string.h index 1b8560b6fa5642..e36da446248ef1 100644 --- a/src/inspector/node_string.h +++ b/src/inspector/node_string.h @@ -64,21 +64,26 @@ double toDouble(const char* buffer, size_t length, bool* ok); String StringViewToUtf8(v8_inspector::StringView view); // NOLINTNEXTLINE(runtime/references) -void builderAppendQuotedString(StringBuilder& builder, const String&); -std::unique_ptr parseJSON(const String&); +void builderAppendQuotedString(StringBuilder& builder, const std::string_view); +std::unique_ptr parseJSON(const std::string_view); std::unique_ptr parseJSON(v8_inspector::StringView view); -std::unique_ptr parseMessage(const std::string& message, bool binary); +std::unique_ptr parseMessage(const std::string_view message, + bool binary); ProtocolMessage jsonToMessage(String message); ProtocolMessage binaryToMessage(std::vector message); String fromUTF8(const uint8_t* data, size_t length); String fromUTF16(const uint16_t* data, size_t length); -const uint8_t* CharactersUTF8(const String& s); -size_t CharacterCount(const String& s); +const uint8_t* CharactersUTF8(const std::string_view s); +size_t CharacterCount(const std::string_view s); // Unimplemented. The generated code will fall back to CharactersUTF8(). -inline uint8_t* CharactersLatin1(const String& s) { return nullptr; } -inline const uint16_t* CharactersUTF16(const String& s) { return nullptr; } +inline uint8_t* CharactersLatin1(const std::string_view s) { + return nullptr; +} +inline const uint16_t* CharactersUTF16(const std::string_view s) { + return nullptr; +} extern size_t kNotFound; } // namespace StringUtil @@ -92,7 +97,7 @@ class Binary { const uint8_t* data() const { UNREACHABLE(); } size_t size() const { UNREACHABLE(); } String toBase64() const { UNREACHABLE(); } - static Binary fromBase64(const String& base64, bool* success) { + static Binary fromBase64(const std::string_view base64, bool* success) { UNREACHABLE(); } static Binary fromSpan(const uint8_t* data, size_t size) { UNREACHABLE(); }