From e82c663564bfe73c705ada1c28bc21da6ff138cc Mon Sep 17 00:00:00 2001 From: Tobias Bachert Date: Wed, 13 Mar 2024 23:30:50 +0100 Subject: [PATCH] Fix conversion of non-utf8 sequences to `AnyValue` (#1253) String values which are not valid Unicode sequences SHOULD be converted to AnyValue's bytes_value with the bytes representing the string in the original order and format of the source string. --- AttributesConverter.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/AttributesConverter.php b/AttributesConverter.php index 4a349ab..bca663f 100644 --- a/AttributesConverter.php +++ b/AttributesConverter.php @@ -46,12 +46,23 @@ public static function convertAnyValue($value): AnyValue $result->setDoubleValue($value); } if (is_string($value)) { - $result->setStringValue($value); + if (self::isUtf8($value)) { + $result->setStringValue($value); + } else { + $result->setBytesValue($value); + } } return $result; } + private static function isUtf8(string $value): bool + { + return \extension_loaded('mbstring') + ? \mb_check_encoding($value, 'UTF-8') + : (bool) \preg_match('//u', $value); + } + /** * Test whether an array is simple (non-KeyValue) */