diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 1faff8765d33..72f02de9cc3b 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -1258,6 +1258,9 @@
strings
+
+ strings
+
strings
diff --git a/icu4c/source/common/unicode/utfiter.h b/icu4c/source/common/unicode/utfiter.h
new file mode 100644
index 000000000000..78252ddebc14
--- /dev/null
+++ b/icu4c/source/common/unicode/utfiter.h
@@ -0,0 +1,672 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utf16cppiter.h
+// created: 2024aug12 Markus W. Scherer
+
+#ifndef __UTF16CPPITER_H__
+#define __UTF16CPPITER_H__
+
+// TODO: For experimentation outside of ICU, comment out this include.
+// Experimentally conditional code below checks for UTYPES_H and
+// otherwise uses copies of bits of ICU.
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
+
+#include
+#ifdef UTYPES_H
+#include "unicode/utf16.h"
+#include "unicode/uversion.h"
+#else
+// TODO: Remove checks for UTYPES_H and replacement definitions.
+// unicode/utypes.h etc.
+#include
+typedef int32_t UChar32;
+constexpr UChar32 U_SENTINEL = -1;
+// unicode/uversion.h
+#define U_HEADER_ONLY_NAMESPACE header
+namespace header {}
+// unicode/utf.h
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+// unicode/utf16.h
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+#endif
+
+/**
+ * \file
+ * \brief C++ header-only API: C++ iterators over Unicode 16-bit strings (=UTF-16 if well-formed).
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+// Some defined behaviors for handling ill-formed Unicode strings.
+// TODO: For 8-bit strings, the SURROGATE option does not have an equivalent -- static_assert.
+typedef enum UIllFormedBehavior {
+ U_BEHAVIOR_NEGATIVE,
+ U_BEHAVIOR_FFFD,
+ U_BEHAVIOR_SURROGATE
+} UIllFormedBehavior;
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+/**
+ * Result of validating and decoding a minimal Unicode code unit sequence.
+ * Returned from validating Unicode string code point iterators.
+ *
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ * UTF-8: char or char8_t or uint8_t;
+ * UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template
+class CodeUnits {
+ using Unit = typename std::iterator_traits::value_type;
+public:
+ // @internal
+ CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, UnitIter data) :
+ c(codePoint), len(length), ok(wellFormed), p(data) {}
+
+ CodeUnits(const CodeUnits &other) = default;
+ CodeUnits &operator=(const CodeUnits &other) = default;
+
+ UChar32 codePoint() const { return c; }
+
+ bool wellFormed() const { return ok; }
+
+ UnitIter data() const { return p; }
+
+ int32_t length() const { return len; }
+
+ template
+ std::enable_if_t<
+ std::is_pointer_v,
+ std::basic_string_view>
+ stringView() const {
+ return std::basic_string_view(p, len);
+ }
+
+private:
+ // Order of fields with padding and access frequency in mind.
+ CP32 c;
+ uint8_t len;
+ bool ok;
+ UnitIter p;
+};
+
+/**
+ * Result of decoding a minimal Unicode code unit sequence which must be well-formed.
+ * Returned from non-validating Unicode string code point iterators.
+ *
+ * @tparam Unit Code unit type:
+ * UTF-8: char or char8_t or uint8_t;
+ * UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template
+class UnsafeCodeUnits {
+public:
+ // @internal
+ UnsafeCodeUnits(CP32 codePoint, uint8_t length, const Unit *data) :
+ c(codePoint), len(length), p(data) {}
+
+ UnsafeCodeUnits(const UnsafeCodeUnits &other) = default;
+ UnsafeCodeUnits &operator=(const UnsafeCodeUnits &other) = default;
+
+ UChar32 codePoint() const { return c; }
+
+ const Unit *data() const { return p; }
+
+ int32_t length() const { return len; }
+
+ std::basic_string_view stringView() const {
+ return std::basic_string_view(p, len);
+ }
+
+ // TODO: std::optional maybeCodePoint() const ? (nullopt if ill-formed)
+
+private:
+ // Order of fields with padding and access frequency in mind.
+ CP32 c;
+ uint8_t len;
+ const Unit *p;
+};
+
+/**
+ * Internal base class for public U16Iterator & U16ReverseIterator.
+ * Not intended for public subclassing.
+ *
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ * UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
+ * @internal
+ */
+template
+class U16IteratorBase {
+protected:
+ // @internal
+ U16IteratorBase(UnitIter start, UnitIter p, UnitIter limit) :
+ start(start), current(p), limit(limit) {}
+ // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
+ // Test pointers for == or != but not < or >.
+
+ // @internal
+ U16IteratorBase(const U16IteratorBase &other) = default;
+ // @internal
+ U16IteratorBase &operator=(const U16IteratorBase &other) = default;
+
+ // @internal
+ bool operator==(const U16IteratorBase &other) const { return current == other.current; }
+ // @internal
+ bool operator!=(const U16IteratorBase &other) const { return !operator==(other); }
+
+ // @internal
+ void dec() {
+ // TODO: assert current != limit -- more precisely: start <= current < limit
+ // Very similar to U16_BACK_1().
+ if (U16_IS_TRAIL(*(--current)) && current != start && U16_IS_LEAD(*(current - 1))) {
+ --current;
+ }
+ }
+
+ // @internal
+ CodeUnits readAndInc(UnitIter &p) const {
+ // TODO: assert p != limit -- more precisely: start <= p < limit
+ // Very similar to U16_NEXT_OR_FFFD().
+ UnitIter p0 = p;
+ CP32 c = *p++;
+ if (!U16_IS_SURROGATE(c)) {
+ return {c, 1, true, p0};
+ } else {
+ uint16_t c2;
+ if (U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(c2 = *p)) {
+ ++p;
+ c = U16_GET_SUPPLEMENTARY(c, c2);
+ return {c, 2, true, p0};
+ } else {
+ return {sub(c), 1, false, p0};
+ }
+ }
+ }
+
+ // @internal
+ CodeUnits decAndRead(UnitIter &p) const {
+ // TODO: assert p != limit -- more precisely: start <= p < limit
+ // Very similar to U16_PREV_OR_FFFD().
+ CP32 c = *--p;
+ if (!U16_IS_SURROGATE(c)) {
+ return {c, 1, true, p};
+ } else {
+ UnitIter p1;
+ uint16_t c2;
+ if (U16_IS_SURROGATE_TRAIL(c) && p != start && (p1 = p--, U16_IS_LEAD(c2 = *p1))) {
+ p = p1;
+ c = U16_GET_SUPPLEMENTARY(c2, c);
+ return {c, 2, true, p};
+ } else {
+ return {sub(c), 1, false, p};
+ }
+ }
+ }
+
+ // Handle ill-formed UTF-16: One unpaired surrogate.
+ // @internal
+ CP32 sub(CP32 surrogate) const {
+ switch (behavior) {
+ case U_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+ case U_BEHAVIOR_FFFD: return 0xfffd;
+ case U_BEHAVIOR_SURROGATE: return surrogate;
+ }
+ }
+
+ // In a validating iterator, we need start & limit so that when we read a code point
+ // (forward or backward) we can test if there are enough code units.
+ // @internal
+ const UnitIter start;
+ // @internal
+ UnitIter current;
+ // @internal
+ const UnitIter limit;
+};
+
+/**
+ * Validating bidirectional iterator over the code points in a Unicode 16-bit string.
+ *
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ * UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
+ * @draft ICU 77
+ */
+template
+class U16Iterator : private U16IteratorBase {
+ // FYI: We need to qualify all accesses to super class members because of private inheritance.
+ using Super = U16IteratorBase;
+public:
+ // TODO: make private, make friends
+ U16Iterator(UnitIter start, UnitIter p, UnitIter limit) :
+ Super(start, p, limit) {}
+
+ U16Iterator(const U16Iterator &other) = default;
+ U16Iterator &operator=(const U16Iterator &other) = default;
+
+ bool operator==(const U16Iterator &other) const { return Super::operator==(other); }
+ bool operator!=(const U16Iterator &other) const { return !Super::operator==(other); }
+
+ CodeUnits operator*() const {
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ UnitIter p = Super::current;
+ return Super::readAndInc(p);
+ }
+
+ U16Iterator &operator++() { // pre-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ Super::readAndInc(Super::current);
+ return *this;
+ }
+
+ U16Iterator operator++(int) { // post-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ U16Iterator result(*this);
+ Super::readAndInc(Super::current);
+ return result;
+ }
+
+ U16Iterator &operator--() { // pre-decrement
+ return Super::dec();
+ }
+
+ U16Iterator operator--(int) { // post-decrement
+ U16Iterator result(*this);
+ Super::dec();
+ return result;
+ }
+};
+
+/**
+ * Validating reverse iterator over the code points in a Unicode 16-bit string.
+ * Not bidirectional, but optimized for reverse iteration.
+ *
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ * UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
+ * @draft ICU 77
+ */
+template
+class U16ReverseIterator : private U16IteratorBase {
+ using Super = U16IteratorBase;
+public:
+ // TODO: make private, make friends
+ U16ReverseIterator(UnitIter start, UnitIter p, UnitIter limit) :
+ Super(start, p, limit) {}
+
+ U16ReverseIterator(const U16ReverseIterator &other) = default;
+ U16ReverseIterator &operator=(const U16ReverseIterator &other) = default;
+
+ bool operator==(const U16ReverseIterator &other) const { return Super::operator==(other); }
+ bool operator!=(const U16ReverseIterator &other) const { return !Super::operator==(other); }
+
+ CodeUnits operator*() const {
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ UnitIter p = Super::current;
+ return Super::decAndRead(p);
+ }
+
+ U16ReverseIterator &operator++() { // pre-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ Super::decAndRead(Super::current);
+ return *this;
+ }
+
+ U16ReverseIterator operator++(int) { // post-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ U16ReverseIterator result(*this);
+ Super::decAndRead(Super::current);
+ return result;
+ }
+};
+
+/**
+ * A C++ "range" for validating iteration over all of the code points of a 16-bit Unicode string.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
+ * @draft ICU 77
+ */
+template
+class U16StringCodePoints {
+public:
+ /**
+ * Constructs a C++ "range" object over the code points in the string.
+ * @draft ICU 77
+ */
+ U16StringCodePoints(std::basic_string_view s) : s(s) {}
+
+ /** @draft ICU 77 */
+ U16StringCodePoints(const U16StringCodePoints &other) = default;
+
+ /** @draft ICU 77 */
+ U16StringCodePoints &operator=(const U16StringCodePoints &other) = default;
+
+ /** @draft ICU 77 */
+ U16Iterator begin() const {
+ return {s.data(), s.data(), s.data() + s.length()};
+ }
+
+ /** @draft ICU 77 */
+ U16Iterator end() const {
+ const Unit16 *limit = s.data() + s.length();
+ return {s.data(), limit, limit};
+ }
+
+ /** @draft ICU 77 */
+ U16ReverseIterator rbegin() const {
+ const Unit16 *limit = s.data() + s.length();
+ return {s.data(), limit, limit};
+ }
+
+ /** @draft ICU 77 */
+ U16ReverseIterator rend() const {
+ return {s.data(), s.data(), s.data() + s.length()};
+ }
+
+private:
+ std::basic_string_view s;
+};
+
+// ------------------------------------------------------------------------- ***
+
+/**
+ * Internal base class for public U16UnsafeIterator & U16UnsafeReverseIterator.
+ * Not intended for public subclassing.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @internal
+ */
+template
+class U16UnsafeIteratorBase {
+protected:
+ // @internal
+ U16UnsafeIteratorBase(const Unit16 *p) : current(p) {}
+ // Test pointers for == or != but not < or >.
+
+ // @internal
+ U16UnsafeIteratorBase(const U16UnsafeIteratorBase &other) = default;
+ // @internal
+ U16UnsafeIteratorBase &operator=(const U16UnsafeIteratorBase &other) = default;
+
+ // @internal
+ bool operator==(const U16UnsafeIteratorBase &other) const { return current == other.current; }
+ // @internal
+ bool operator!=(const U16UnsafeIteratorBase &other) const { return !operator==(other); }
+
+ // @internal
+ void dec() {
+ // Very similar to U16_BACK_1_UNSAFE().
+ if (U16_IS_TRAIL(*(--current))) {
+ --current;
+ }
+ }
+
+ // @internal
+ UnsafeCodeUnits readAndInc(const Unit16 *&p) const {
+ // Very similar to U16_NEXT_UNSAFE().
+ const Unit16 *p0 = p;
+ CP32 c = *p++;
+ if (!U16_IS_LEAD(c)) {
+ return {c, 1, p0};
+ } else {
+ c = U16_GET_SUPPLEMENTARY(c, *p++);
+ return {c, 2, p0};
+ }
+ }
+
+ // @internal
+ UnsafeCodeUnits decAndRead(const Unit16 *&p) const {
+ // Very similar to U16_PREV_UNSAFE().
+ CP32 c = *--p;
+ if (!U16_IS_TRAIL(c)) {
+ return {c, 1, p};
+ } else {
+ c = U16_GET_SUPPLEMENTARY(*--p, c);
+ return {c, 2, p};
+ }
+ }
+
+ // @internal
+ const Unit16 *current;
+};
+
+/**
+ * Non-validating bidirectional iterator over the code points in a UTF-16 string.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template
+class U16UnsafeIterator : private U16UnsafeIteratorBase {
+ // FYI: We need to qualify all accesses to super class members because of private inheritance.
+ using Super = U16UnsafeIteratorBase;
+public:
+ // TODO: make private, make friends
+ U16UnsafeIterator(const Unit16 *p) : Super(p) {}
+
+ U16UnsafeIterator(const U16UnsafeIterator &other) = default;
+ U16UnsafeIterator &operator=(const U16UnsafeIterator &other) = default;
+
+ bool operator==(const U16UnsafeIterator &other) const { return Super::operator==(other); }
+ bool operator!=(const U16UnsafeIterator &other) const { return !Super::operator==(other); }
+
+ UnsafeCodeUnits operator*() const {
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ const Unit16 *p = Super::current;
+ return Super::readAndInc(p);
+ }
+
+ U16UnsafeIterator &operator++() { // pre-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ Super::readAndInc(Super::current);
+ return *this;
+ }
+
+ U16UnsafeIterator operator++(int) { // post-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ U16UnsafeIterator result(*this);
+ Super::readAndInc(Super::current);
+ return result;
+ }
+
+ U16UnsafeIterator &operator--() { // pre-decrement
+ return Super::dec();
+ }
+
+ U16UnsafeIterator operator--(int) { // post-decrement
+ U16UnsafeIterator result(*this);
+ Super::dec();
+ return result;
+ }
+};
+
+/**
+ * Non-validating reverse iterator over the code points in a UTF-16 string.
+ * Not bidirectional, but optimized for reverse iteration.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template
+class U16UnsafeReverseIterator : private U16UnsafeIteratorBase {
+ using Super = U16UnsafeIteratorBase;
+public:
+ // TODO: make private, make friends
+ U16UnsafeReverseIterator(const Unit16 *p) : Super(p) {}
+
+ U16UnsafeReverseIterator(const U16UnsafeReverseIterator &other) = default;
+ U16UnsafeReverseIterator &operator=(const U16UnsafeReverseIterator &other) = default;
+
+ bool operator==(const U16UnsafeReverseIterator &other) const { return Super::operator==(other); }
+ bool operator!=(const U16UnsafeReverseIterator &other) const { return !Super::operator==(other); }
+
+ UnsafeCodeUnits operator*() const {
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ const Unit16 *p = Super::current;
+ return Super::decAndRead(p);
+ }
+
+ U16UnsafeReverseIterator &operator++() { // pre-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ Super::decAndRead(Super::current);
+ return *this;
+ }
+
+ U16UnsafeReverseIterator operator++(int) { // post-increment
+ // Call the same function in both operator*() and operator++() so that an
+ // optimizing compiler can easily eliminate redundant work when alternating between the two.
+ U16UnsafeReverseIterator result(*this);
+ Super::decAndRead(Super::current);
+ return result;
+ }
+};
+
+/**
+ * A C++ "range" for non-validating iteration over all of the code points of a UTF-16 string.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ * should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template
+class U16UnsafeStringCodePoints {
+public:
+ /**
+ * Constructs a C++ "range" object over the code points in the string.
+ * @draft ICU 77
+ */
+ U16UnsafeStringCodePoints(std::basic_string_view s) : s(s) {}
+
+ /** @draft ICU 77 */
+ U16UnsafeStringCodePoints(const U16UnsafeStringCodePoints &other) = default;
+ U16UnsafeStringCodePoints &operator=(const U16UnsafeStringCodePoints &other) = default;
+
+ /** @draft ICU 77 */
+ U16UnsafeIterator begin() const {
+ return {s.data()};
+ }
+
+ /** @draft ICU 77 */
+ U16UnsafeIterator end() const {
+ return {s.data() + s.length()};
+ }
+
+ /** @draft ICU 77 */
+ U16UnsafeReverseIterator rbegin() const {
+ return {s.data() + s.length()};
+ }
+
+ /** @draft ICU 77 */
+ U16UnsafeReverseIterator rend() const {
+ return {s.data()};
+ }
+
+private:
+ std::basic_string_view s;
+};
+
+// ------------------------------------------------------------------------- ***
+
+// TODO: UTF-8
+
+// TODO: remove experimental sample code
+#ifndef UTYPES_H
+int32_t rangeLoop(std::u16string_view s) {
+ header::U16StringCodePoints range(s);
+ int32_t sum = 0;
+ for (auto units : range) {
+ sum += units.codePoint();
+ }
+ return sum;
+}
+
+int32_t loopIterPlusPlus(std::u16string_view s) {
+ header::U16StringCodePoints range(s);
+ int32_t sum = 0;
+ auto iter = range.begin();
+ auto limit = range.end();
+ while (iter != limit) {
+ sum += (*iter++).codePoint();
+ }
+ return sum;
+}
+
+int32_t reverseLoop(std::u16string_view s) {
+ header::U16StringCodePoints range(s);
+ int32_t sum = 0;
+ for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
+ sum += (*iter).codePoint();
+ }
+ return sum;
+}
+
+int32_t unsafeRangeLoop(std::u16string_view s) {
+ header::U16UnsafeStringCodePoints range(s);
+ int32_t sum = 0;
+ for (auto units : range) {
+ sum += units.codePoint();
+ }
+ return sum;
+}
+
+int32_t unsafeReverseLoop(std::u16string_view s) {
+ header::U16UnsafeStringCodePoints range(s);
+ int32_t sum = 0;
+ for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
+ sum += (*iter).codePoint();
+ }
+ return sum;
+}
+#endif
+
+} // namespace U_HEADER_ONLY_NAMESPACE
+
+#endif // U_HIDE_DRAFT_API
+#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#endif // __UTF16CPPITER_H__
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index 81ad55578072..8a12daa2f5de 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -75,7 +75,7 @@ numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
units_data_test.o units_router_test.o units_test.o displayoptions_test.o \
-numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o
+numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o utfitertest.o
DEPS = $(OBJECTS:.o=.d)
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index b58b29b3d4e7..476b4b3b5934 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -223,6 +223,7 @@
+
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index d5c23d5e4cb5..7fc0c646647a 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -490,6 +490,9 @@
strings
+
+ strings
+
strings
diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp
index 4585792126d6..b9df5935414c 100644
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@@ -48,6 +48,7 @@ extern IntlTest *createPluralMapTest();
extern IntlTest *createStaticUnicodeSetsTest();
#endif
static IntlTest *createUHashTest();
+extern IntlTest *createU16IteratorTest();
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
{
@@ -84,6 +85,7 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
TESTCASE_AUTO_CREATE_CLASS(LocaleMatcherTest);
TESTCASE_AUTO_CREATE_CLASS(UHashTest);
TESTCASE_AUTO_CREATE_CLASS(USetHeaderOnlyTest);
+ TESTCASE_AUTO_CREATE_CLASS(U16IteratorTest);
TESTCASE_AUTO_END;
}
diff --git a/icu4c/source/test/intltest/utfitertest.cpp b/icu4c/source/test/intltest/utfitertest.cpp
new file mode 100644
index 000000000000..a8bda260bc4d
--- /dev/null
+++ b/icu4c/source/test/intltest/utfitertest.cpp
@@ -0,0 +1,209 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utfitertest.cpp
+// created: 2024aug12 Markus W. Scherer
+
+#include
+
+// Test header-only ICU C++ APIs. Do not use other ICU C++ APIs.
+// Non-default configuration:
+#define U_SHOW_CPLUSPLUS_API 0
+// Default configuration:
+// #define U_SHOW_CPLUSPLUS_HEADER_API 1
+
+#include "unicode/utypes.h"
+#include "unicode/utfiter.h"
+#include "intltest.h"
+
+// Makes u"literal"sv std::u16string_view literals possible.
+// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
+using namespace std::string_view_literals;
+
+using U_HEADER_ONLY_NAMESPACE::U16Iterator;
+using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
+
+template
+class FwdIter {
+public:
+ typedef Unit value_type;
+
+ FwdIter(const Unit *data) : p(data) {}
+
+ bool operator==(const FwdIter &other) const { return p == other.p; }
+ bool operator!=(const FwdIter &other) const { return !operator==(other); }
+
+ Unit operator*() const { return *p; }
+ FwdIter &operator++() { // pre-increment
+ ++p;
+ return *this;
+ }
+ FwdIter operator++(int) { // post-increment
+ FwdIter result(*this);
+ ++p;
+ return result;
+ }
+
+private:
+ const Unit *p;
+};
+
+class U16IteratorTest : public IntlTest {
+public:
+ U16IteratorTest() {}
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=nullptr) override;
+
+ void testGood();
+ void testNegative();
+ void testFFFD();
+ void testSurrogate();
+ void testFwdIter();
+};
+
+extern IntlTest *createU16IteratorTest() {
+ return new U16IteratorTest();
+}
+
+void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+ if(exec) {
+ logln("TestSuite U16IteratorTest: ");
+ }
+ TESTCASE_AUTO_BEGIN;
+ TESTCASE_AUTO(testGood);
+ TESTCASE_AUTO(testNegative);
+ TESTCASE_AUTO(testFFFD);
+ TESTCASE_AUTO(testSurrogate);
+ TESTCASE_AUTO(testFwdIter);
+ TESTCASE_AUTO_END;
+}
+
+void U16IteratorTest::testGood() {
+ IcuTestErrorCode errorCode(*this, "testGood");
+ std::u16string_view good(u"abçカ🚴"sv);
+ U16StringCodePoints range(good);
+ auto iter = range.begin();
+ assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+ ++iter; // pre-increment
+ auto units = *iter;
+ assertEquals("iter[1] * codePoint", u'b', units.codePoint());
+ assertEquals("iter[1] * length", 1, units.length());
+ assertTrue("iter[1] * wellFormed", units.wellFormed());
+ assertTrue("iter[1] * stringView()", units.stringView() == u"b"sv);
+ ++iter;
+ assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
+ assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint());
+ ++iter;
+ units = *iter++;
+ assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
+ assertEquals("iter[4] * length", 2, units.length());
+ assertTrue("iter[4] * wellFormed", units.wellFormed());
+ assertTrue("iter[4] * stringView()", units.stringView() == u"🚴"sv);
+ assertTrue("iter == endIter", iter == range.end());
+}
+
+void U16IteratorTest::testNegative() {
+ IcuTestErrorCode errorCode(*this, "testNegative");
+ static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+ std::u16string_view bad(badChars, 5);
+ U16StringCodePoints range(bad);
+ auto iter = range.begin();
+ assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+ ++iter; // pre-increment
+ auto units = *iter;
+ assertEquals("iter[1] * codePoint", -1, units.codePoint());
+ assertEquals("iter[1] * length", 1, units.length());
+ assertFalse("iter[1] * wellFormed", units.wellFormed());
+ auto sv = units.stringView();
+ assertEquals("iter[1] * stringView().length()", 1, sv.length());
+ assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+ // TODO: test units.data()
+ ++iter;
+ assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
+ units = *iter++; // post-increment
+ assertEquals("iter[3] * codePoint", -1, units.codePoint());
+ assertFalse("iter[3] * wellFormed", units.wellFormed());
+ assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
+ assertTrue("iter == endIter", iter == range.end());
+}
+
+void U16IteratorTest::testFFFD() {
+ IcuTestErrorCode errorCode(*this, "testFFFD");
+ static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+ std::u16string_view bad(badChars, 5);
+ U16StringCodePoints range(bad);
+ auto iter = range.begin();
+ assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+ ++iter; // pre-increment
+ auto units = *iter;
+ assertEquals("iter[1] * codePoint", 0xfffd, units.codePoint());
+ assertEquals("iter[1] * length", 1, units.length());
+ assertFalse("iter[1] * wellFormed", units.wellFormed());
+ auto sv = units.stringView();
+ assertEquals("iter[1] * stringView().length()", 1, sv.length());
+ assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+ ++iter;
+ assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
+ units = *iter++; // post-increment
+ assertEquals("iter[3] * codePoint", 0xfffd, units.codePoint());
+ assertFalse("iter[3] * wellFormed", units.wellFormed());
+ assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
+ assertTrue("iter == endIter", iter == range.end());
+}
+
+void U16IteratorTest::testSurrogate() {
+ IcuTestErrorCode errorCode(*this, "testSurrogate");
+ static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+ std::u16string_view bad(badChars, 5);
+ U16StringCodePoints range(bad);
+ auto iter = range.begin();
+ assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+ ++iter; // pre-increment
+ auto units = *iter;
+ assertEquals("iter[1] * codePoint", 0xd900, units.codePoint());
+ assertEquals("iter[1] * length", 1, units.length());
+ assertFalse("iter[1] * wellFormed", units.wellFormed());
+ auto sv = units.stringView();
+ assertEquals("iter[1] * stringView().length()", 1, sv.length());
+ assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+ ++iter;
+ assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint()); // post-increment
+ units = *iter++; // post-increment
+ assertEquals("iter[3] * codePoint", 0xdc05, units.codePoint());
+ assertFalse("iter[3] * wellFormed", units.wellFormed());
+ assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView()); // post-increment
+ assertTrue("iter == endIter", iter == range.end());
+}
+
+void U16IteratorTest::testFwdIter() {
+ IcuTestErrorCode errorCode(*this, "testFwdIter");
+ std::u16string_view good(u"abçカ🚴"sv);
+ FwdIter goodBegin(good.data());
+ FwdIter goodLimit(good.data() + good.length());
+ U16Iterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
+ goodBegin, goodBegin, goodLimit);
+ U16Iterator, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(
+ goodBegin, goodLimit, goodLimit);
+ // TODO: U16StringCodePoints range(good);
+ auto iter = rangeBegin;
+ assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+ ++iter; // pre-increment
+ auto units = *iter;
+ assertEquals("iter[1] * codePoint", u'b', units.codePoint());
+ assertEquals("iter[1] * length", 1, units.length());
+ assertTrue("iter[1] * wellFormed", units.wellFormed());
+ // No units.stringView() when the unit iterator is not a pointer.
+ assertTrue("iter[1] * data()[0]", *units.data() == u'b');
+ ++iter;
+ assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint()); // post-increment
+ assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint());
+ ++iter;
+ units = *iter++;
+ assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
+ assertEquals("iter[4] * length", 2, units.length());
+ assertTrue("iter[4] * wellFormed", units.wellFormed());
+ FwdIter data = units.data();
+ assertTrue("iter[4] * data()[0]", *data++ == u"🚴"[0]);
+ assertTrue("iter[4] * data()[1]", *data == u"🚴"[1]);
+ assertTrue("iter == endIter", iter == rangeLimit);
+}