From 75e4d0df8f4ea812772c3c621501df766cc4ef0f Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Sat, 31 Aug 2024 21:24:53 +0300 Subject: [PATCH] node-api: add support for UTF-8 and Latin-1 property keys PR-URL: https://github.com/nodejs/node/pull/52984 Reviewed-By: James M Snell Reviewed-By: Chengzhong Wu Reviewed-By: Vladimir Morozov --- doc/api/n-api.md | 88 +++++++-- src/js_native_api.h | 4 + src/js_native_api_v8.cc | 24 +++ test/js-native-api/test_string/test.js | 182 +++++++------------ test/js-native-api/test_string/test_string.c | 55 ++++++ 5 files changed, 224 insertions(+), 129 deletions(-) diff --git a/doc/api/n-api.md b/doc/api/n-api.md index 0727ca74d2f986..6a725ccf70944e 100644 --- a/doc/api/n-api.md +++ b/doc/api/n-api.md @@ -3088,6 +3088,54 @@ The native string is copied. The JavaScript `string` type is described in [Section 6.1.4][] of the ECMAScript Language Specification. +### Functions to create optimized property keys + +Many JavaScript engines including V8 use internalized strings as keys +to set and get property values. They typically use a hash table to create +and lookup such strings. While it adds some cost per key creation, it improves +the performance after that by enabling comparison of string pointers instead +of the whole strings. + +If a new JavaScript string is intended to be used as a property key, then for +some JavaScript engines it will be more efficient to use the functions in this +section. Otherwise, use the `napi_create_string_utf8` or +`node_api_create_external_string_utf8` series functions as there may be +additional overhead in creating/storing strings with the property key +creation methods. + +#### `node_api_create_property_key_latin1` + + + +> Stability: 1 - Experimental + +```c +napi_status NAPI_CDECL node_api_create_property_key_latin1(napi_env env, + const char* str, + size_t length, + napi_value* result); +``` + +* `[in] env`: The environment that the API is invoked under. +* `[in] str`: Character buffer representing an ISO-8859-1-encoded string. +* `[in] length`: The length of the string in bytes, or `NAPI_AUTO_LENGTH` if it + is null-terminated. +* `[out] result`: A `napi_value` representing an optimized JavaScript `string` + to be used as a property key for objects. + +Returns `napi_ok` if the API succeeded. + +This API creates an optimized JavaScript `string` value from +an ISO-8859-1-encoded C string to be used as a property key for objects. +The native string is copied. In contrast with `napi_create_string_latin1`, +subsequent calls to this function with the same `str` pointer may benefit from a speedup +in the creation of the requested `napi_value`, depending on the engine. + +The JavaScript `string` type is described in +[Section 6.1.4][] of the ECMAScript Language Specification. + #### `node_api_create_property_key_utf16` + +> Stability: 1 - Experimental + +```c +napi_status NAPI_CDECL node_api_create_property_key_utf8(napi_env env, + const char* str, + size_t length, + napi_value* result); +``` + +* `[in] env`: The environment that the API is invoked under. +* `[in] str`: Character buffer representing a UTF8-encoded string. +* `[in] length`: The length of the string in two-byte code units, or + `NAPI_AUTO_LENGTH` if it is null-terminated. +* `[out] result`: A `napi_value` representing an optimized JavaScript `string` + to be used as a property key for objects. + +Returns `napi_ok` if the API succeeded. + +This API creates an optimized JavaScript `string` value from +a UTF8-encoded C string to be used as a property key for objects. +The native string is copied. The JavaScript `string` type is described in [Section 6.1.4][] of the ECMAScript Language Specification. diff --git a/src/js_native_api.h b/src/js_native_api.h index 1558a9f996a069..07e3df13407030 100644 --- a/src/js_native_api.h +++ b/src/js_native_api.h @@ -114,6 +114,10 @@ node_api_create_external_string_utf16(napi_env env, #ifdef NAPI_EXPERIMENTAL #define NODE_API_EXPERIMENTAL_HAS_PROPERTY_KEYS +NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_latin1( + napi_env env, const char* str, size_t length, napi_value* result); +NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf8( + napi_env env, const char* str, size_t length, napi_value* result); NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf16( napi_env env, const char16_t* str, size_t length, napi_value* result); #endif // NAPI_EXPERIMENTAL diff --git a/src/js_native_api_v8.cc b/src/js_native_api_v8.cc index b6349b832acd10..bcb9e5ca8d2926 100644 --- a/src/js_native_api_v8.cc +++ b/src/js_native_api_v8.cc @@ -1704,6 +1704,30 @@ napi_status NAPI_CDECL node_api_create_external_string_utf16( }); } +napi_status node_api_create_property_key_latin1(napi_env env, + const char* str, + size_t length, + napi_value* result) { + return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) { + return v8::String::NewFromOneByte(isolate, + reinterpret_cast(str), + v8::NewStringType::kInternalized, + length); + }); +} + +napi_status node_api_create_property_key_utf8(napi_env env, + const char* str, + size_t length, + napi_value* result) { + return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) { + return v8::String::NewFromUtf8(isolate, + str, + v8::NewStringType::kInternalized, + static_cast(length)); + }); +} + napi_status NAPI_CDECL node_api_create_property_key_utf16(napi_env env, const char16_t* str, size_t length, diff --git a/test/js-native-api/test_string/test.js b/test/js-native-api/test_string/test.js index ffcf0fab754e7c..04515a286ce36a 100644 --- a/test/js-native-api/test_string/test.js +++ b/test/js-native-api/test_string/test.js @@ -4,131 +4,77 @@ const assert = require('assert'); // Testing api calls for string const test_string = require(`./build/${common.buildType}/test_string`); +// The insufficient buffer test case allocates a buffer of size 4, including +// the null terminator. +const kInsufficientIdx = 3; -const empty = ''; -assert.strictEqual(test_string.TestLatin1(empty), empty); -assert.strictEqual(test_string.TestUtf8(empty), empty); -assert.strictEqual(test_string.TestUtf16(empty), empty); -assert.strictEqual(test_string.TestLatin1AutoLength(empty), empty); -assert.strictEqual(test_string.TestUtf8AutoLength(empty), empty); -assert.strictEqual(test_string.TestUtf16AutoLength(empty), empty); -assert.strictEqual(test_string.TestLatin1External(empty), empty); -assert.strictEqual(test_string.TestUtf16External(empty), empty); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(empty), empty); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(empty), empty); -assert.strictEqual(test_string.TestPropertyKeyUtf16(empty), empty); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(empty), empty); -assert.strictEqual(test_string.Utf16Length(empty), 0); -assert.strictEqual(test_string.Utf8Length(empty), 0); +const asciiCases = [ + '', + 'hello world', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', + '?!@#$%^&*()_+-=[]{}/.,<>\'"\\', +]; -const str1 = 'hello world'; -assert.strictEqual(test_string.TestLatin1(str1), str1); -assert.strictEqual(test_string.TestUtf8(str1), str1); -assert.strictEqual(test_string.TestUtf16(str1), str1); -assert.strictEqual(test_string.TestLatin1AutoLength(str1), str1); -assert.strictEqual(test_string.TestUtf8AutoLength(str1), str1); -assert.strictEqual(test_string.TestUtf16AutoLength(str1), str1); -assert.strictEqual(test_string.TestLatin1External(str1), str1); -assert.strictEqual(test_string.TestUtf16External(str1), str1); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str1), str1); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str1), str1); -assert.strictEqual(test_string.TestLatin1Insufficient(str1), str1.slice(0, 3)); -assert.strictEqual(test_string.TestUtf8Insufficient(str1), str1.slice(0, 3)); -assert.strictEqual(test_string.TestUtf16Insufficient(str1), str1.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str1), str1); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str1), str1); -assert.strictEqual(test_string.Utf16Length(str1), 11); -assert.strictEqual(test_string.Utf8Length(str1), 11); +const latin1Cases = [ + { + str: '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿', + utf8Length: 62, + utf8InsufficientIdx: 1, + }, + { + str: 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ', + utf8Length: 126, + utf8InsufficientIdx: 1, + }, +]; -const str2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; -assert.strictEqual(test_string.TestLatin1(str2), str2); -assert.strictEqual(test_string.TestUtf8(str2), str2); -assert.strictEqual(test_string.TestUtf16(str2), str2); -assert.strictEqual(test_string.TestLatin1AutoLength(str2), str2); -assert.strictEqual(test_string.TestUtf8AutoLength(str2), str2); -assert.strictEqual(test_string.TestUtf16AutoLength(str2), str2); -assert.strictEqual(test_string.TestLatin1External(str2), str2); -assert.strictEqual(test_string.TestUtf16External(str2), str2); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str2), str2); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str2), str2); -assert.strictEqual(test_string.TestLatin1Insufficient(str2), str2.slice(0, 3)); -assert.strictEqual(test_string.TestUtf8Insufficient(str2), str2.slice(0, 3)); -assert.strictEqual(test_string.TestUtf16Insufficient(str2), str2.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str2), str2); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str2), str2); -assert.strictEqual(test_string.Utf16Length(str2), 62); -assert.strictEqual(test_string.Utf8Length(str2), 62); +const unicodeCases = [ + { + str: '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}', + utf8Length: 14, + utf8InsufficientIdx: 1, + }, +]; -const str3 = '?!@#$%^&*()_+-=[]{}/.,<>\'"\\'; -assert.strictEqual(test_string.TestLatin1(str3), str3); -assert.strictEqual(test_string.TestUtf8(str3), str3); -assert.strictEqual(test_string.TestUtf16(str3), str3); -assert.strictEqual(test_string.TestLatin1AutoLength(str3), str3); -assert.strictEqual(test_string.TestUtf8AutoLength(str3), str3); -assert.strictEqual(test_string.TestUtf16AutoLength(str3), str3); -assert.strictEqual(test_string.TestLatin1External(str3), str3); -assert.strictEqual(test_string.TestUtf16External(str3), str3); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str3), str3); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str3), str3); -assert.strictEqual(test_string.TestLatin1Insufficient(str3), str3.slice(0, 3)); -assert.strictEqual(test_string.TestUtf8Insufficient(str3), str3.slice(0, 3)); -assert.strictEqual(test_string.TestUtf16Insufficient(str3), str3.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str3), str3); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str3), str3); -assert.strictEqual(test_string.Utf16Length(str3), 27); -assert.strictEqual(test_string.Utf8Length(str3), 27); +function testLatin1Cases(str) { + assert.strictEqual(test_string.TestLatin1(str), str); + assert.strictEqual(test_string.TestLatin1AutoLength(str), str); + assert.strictEqual(test_string.TestLatin1External(str), str); + assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str), str); + assert.strictEqual(test_string.TestPropertyKeyLatin1(str), str); + assert.strictEqual(test_string.TestPropertyKeyLatin1AutoLength(str), str); + assert.strictEqual(test_string.Latin1Length(str), str.length); -const str4 = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿'; -assert.strictEqual(test_string.TestLatin1(str4), str4); -assert.strictEqual(test_string.TestUtf8(str4), str4); -assert.strictEqual(test_string.TestUtf16(str4), str4); -assert.strictEqual(test_string.TestLatin1AutoLength(str4), str4); -assert.strictEqual(test_string.TestUtf8AutoLength(str4), str4); -assert.strictEqual(test_string.TestUtf16AutoLength(str4), str4); -assert.strictEqual(test_string.TestLatin1External(str4), str4); -assert.strictEqual(test_string.TestUtf16External(str4), str4); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str4), str4); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str4), str4); -assert.strictEqual(test_string.TestLatin1Insufficient(str4), str4.slice(0, 3)); -assert.strictEqual(test_string.TestUtf8Insufficient(str4), str4.slice(0, 1)); -assert.strictEqual(test_string.TestUtf16Insufficient(str4), str4.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str4), str4); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str4), str4); -assert.strictEqual(test_string.Utf16Length(str4), 31); -assert.strictEqual(test_string.Utf8Length(str4), 62); + if (str !== '') { + assert.strictEqual(test_string.TestLatin1Insufficient(str), str.slice(0, kInsufficientIdx)); + } +} -const str5 = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ'; -assert.strictEqual(test_string.TestLatin1(str5), str5); -assert.strictEqual(test_string.TestUtf8(str5), str5); -assert.strictEqual(test_string.TestUtf16(str5), str5); -assert.strictEqual(test_string.TestLatin1AutoLength(str5), str5); -assert.strictEqual(test_string.TestUtf8AutoLength(str5), str5); -assert.strictEqual(test_string.TestUtf16AutoLength(str5), str5); -assert.strictEqual(test_string.TestLatin1External(str5), str5); -assert.strictEqual(test_string.TestUtf16External(str5), str5); -assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str5), str5); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str5), str5); -assert.strictEqual(test_string.TestLatin1Insufficient(str5), str5.slice(0, 3)); -assert.strictEqual(test_string.TestUtf8Insufficient(str5), str5.slice(0, 1)); -assert.strictEqual(test_string.TestUtf16Insufficient(str5), str5.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str5), str5); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str5), str5); -assert.strictEqual(test_string.Utf16Length(str5), 63); -assert.strictEqual(test_string.Utf8Length(str5), 126); +function testUnicodeCases(str, utf8Length, utf8InsufficientIdx) { + assert.strictEqual(test_string.TestUtf8(str), str); + assert.strictEqual(test_string.TestUtf16(str), str); + assert.strictEqual(test_string.TestUtf8AutoLength(str), str); + assert.strictEqual(test_string.TestUtf16AutoLength(str), str); + assert.strictEqual(test_string.TestUtf16External(str), str); + assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str), str); + assert.strictEqual(test_string.TestPropertyKeyUtf8(str), str); + assert.strictEqual(test_string.TestPropertyKeyUtf8AutoLength(str), str); + assert.strictEqual(test_string.TestPropertyKeyUtf16(str), str); + assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str), str); + assert.strictEqual(test_string.Utf8Length(str), utf8Length); + assert.strictEqual(test_string.Utf16Length(str), str.length); -const str6 = '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}'; -assert.strictEqual(test_string.TestUtf8(str6), str6); -assert.strictEqual(test_string.TestUtf16(str6), str6); -assert.strictEqual(test_string.TestUtf8AutoLength(str6), str6); -assert.strictEqual(test_string.TestUtf16AutoLength(str6), str6); -assert.strictEqual(test_string.TestUtf16External(str6), str6); -assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str6), str6); -assert.strictEqual(test_string.TestUtf8Insufficient(str6), str6.slice(0, 1)); -assert.strictEqual(test_string.TestUtf16Insufficient(str6), str6.slice(0, 3)); -assert.strictEqual(test_string.TestPropertyKeyUtf16(str6), str6); -assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str6), str6); -assert.strictEqual(test_string.Utf16Length(str6), 5); -assert.strictEqual(test_string.Utf8Length(str6), 14); + if (str !== '') { + assert.strictEqual(test_string.TestUtf8Insufficient(str), str.slice(0, utf8InsufficientIdx)); + assert.strictEqual(test_string.TestUtf16Insufficient(str), str.slice(0, kInsufficientIdx)); + } +} + +asciiCases.forEach(testLatin1Cases); +asciiCases.forEach((str) => testUnicodeCases(str, str.length, kInsufficientIdx)); +latin1Cases.forEach((it) => testLatin1Cases(it.str)); +latin1Cases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx)); +unicodeCases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx)); assert.throws(() => { test_string.TestLargeUtf8(); diff --git a/test/js-native-api/test_string/test_string.c b/test/js-native-api/test_string/test_string.c index 57353b9f6303f2..01e5dbee3912d8 100644 --- a/test/js-native-api/test_string/test_string.c +++ b/test/js-native-api/test_string/test_string.c @@ -293,6 +293,40 @@ static napi_value TestUtf16Insufficient(napi_env env, napi_callback_info info) { return output; } +static napi_value TestPropertyKeyLatin1(napi_env env, napi_callback_info info) { + return TestOneByteImpl(env, + info, + napi_get_value_string_latin1, + node_api_create_property_key_latin1, + actual_length); +} + +static napi_value TestPropertyKeyLatin1AutoLength(napi_env env, + napi_callback_info info) { + return TestOneByteImpl(env, + info, + napi_get_value_string_latin1, + node_api_create_property_key_latin1, + auto_length); +} + +static napi_value TestPropertyKeyUtf8(napi_env env, napi_callback_info info) { + return TestOneByteImpl(env, + info, + napi_get_value_string_utf8, + node_api_create_property_key_utf8, + actual_length); +} + +static napi_value TestPropertyKeyUtf8AutoLength(napi_env env, + napi_callback_info info) { + return TestOneByteImpl(env, + info, + napi_get_value_string_utf8, + node_api_create_property_key_utf8, + auto_length); +} + static napi_value TestPropertyKeyUtf16(napi_env env, napi_callback_info info) { return TestTwoByteImpl(env, info, @@ -310,6 +344,20 @@ static napi_value TestPropertyKeyUtf16AutoLength(napi_env env, auto_length); } +static napi_value Latin1Length(napi_env env, napi_callback_info info) { + napi_value args[1]; + NODE_API_CALL(env, validate_and_retrieve_single_string_arg(env, info, args)); + + size_t length; + NODE_API_CALL(env, + napi_get_value_string_latin1(env, args[0], NULL, 0, &length)); + + napi_value output; + NODE_API_CALL(env, napi_create_uint32(env, (uint32_t)length, &output)); + + return output; +} + static napi_value Utf16Length(napi_env env, napi_callback_info info) { napi_value args[1]; NODE_API_CALL(env, validate_and_retrieve_single_string_arg(env, info, args)); @@ -420,12 +468,19 @@ napi_value Init(napi_env env, napi_value exports) { DECLARE_NODE_API_PROPERTY("TestUtf16ExternalAutoLength", TestUtf16ExternalAutoLength), DECLARE_NODE_API_PROPERTY("TestUtf16Insufficient", TestUtf16Insufficient), + DECLARE_NODE_API_PROPERTY("Latin1Length", Latin1Length), DECLARE_NODE_API_PROPERTY("Utf16Length", Utf16Length), DECLARE_NODE_API_PROPERTY("Utf8Length", Utf8Length), DECLARE_NODE_API_PROPERTY("TestLargeUtf8", TestLargeUtf8), DECLARE_NODE_API_PROPERTY("TestLargeLatin1", TestLargeLatin1), DECLARE_NODE_API_PROPERTY("TestLargeUtf16", TestLargeUtf16), DECLARE_NODE_API_PROPERTY("TestMemoryCorruption", TestMemoryCorruption), + DECLARE_NODE_API_PROPERTY("TestPropertyKeyLatin1", TestPropertyKeyLatin1), + DECLARE_NODE_API_PROPERTY("TestPropertyKeyLatin1AutoLength", + TestPropertyKeyLatin1AutoLength), + DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf8", TestPropertyKeyUtf8), + DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf8AutoLength", + TestPropertyKeyUtf8AutoLength), DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf16", TestPropertyKeyUtf16), DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf16AutoLength", TestPropertyKeyUtf16AutoLength),