From 74e9b6f7ca25bfad5bf645d1e8cb841bea2fedb5 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 12 Aug 2024 14:59:58 -0700
Subject: [PATCH 01/23] U16Iterator experiment

---
 icu4c/source/common/unicode/utf16cppiter.h    | 144 ++++++++++++++++++
 icu4c/source/test/intltest/Makefile.in        |   2 +-
 icu4c/source/test/intltest/itutil.cpp         |   2 +
 icu4c/source/test/intltest/utfcppitertest.cpp |  59 +++++++
 4 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100644 icu4c/source/common/unicode/utf16cppiter.h
 create mode 100644 icu4c/source/test/intltest/utfcppitertest.cpp
diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
new file mode 100644
index 000000000000..5fb0b87dae06
--- /dev/null
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -0,0 +1,144 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utf16cppiter.h
+// created: 2024aug12 Markus W. Scherer
+
+#ifndef __UTF16CPPITER_H__
+#define __UTF16CPPITER_H__
+
+#include <string_view>
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/utf16.h"
+#include "unicode/uversion.h"
+
+/**
+ * \file
+ * \brief C++ API: C++ iterators over Unicode 16-bit strings (=UTF-16 if well-formed).
+ */
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+// Some defined behaviors for handling ill-formed 16-bit strings.
+// TODO: Maybe share with 8-bit strings, but the SURROGATE option does not have an equivalent there.
+//
+// TODO: A possible alternative to an enum might be some kind of function template
+// which would be fully customizable.
+// The operator*() return value might then want to be a template parameter as well.
+// For example, for a well-formed sequence, the return value could be
+// a tuple of (code point, well-formed), or a string view, or...
+// (And then the caller could choose between UChar32 and char32_t.)
+// However, all of that would make the API more complex and daunting.
+enum U16IllFormedBehavior {
+    U16_BEHAVIOR_NEGATIVE,
+    U16_BEHAVIOR_FFFD,
+    U16_BEHAVIOR_SURROGATE
+};
+
+// Validating iterator over the code points in a Unicode 16-bit string.
+// TODO: all @draft ICU 76
+template<typename Unit16, U16IllFormedBehavior behavior>
+class U16Iterator {
+public:
+    // TODO: make private, make friends
+    U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+            start(start), p(p), limit(limit) {
+        if (p != limit) {
+            readOneForward();
+        }
+    }
+    // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
+    // Test pointers for == or != but not < or >.
+
+    U16Iterator(const U16Iterator &other) = default;
+    U16Iterator(U16Iterator &&other) noexcept = default;
+
+    bool operator==(const U16Iterator &other) const { return p == other.p; }
+    bool operator!=(const U16Iterator &other) const { return !operator==(other); }
+
+    UChar32 operator*() const {
+        return c;
+    }
+
+    // TODO: good function names?
+    // It would be nice to avoid a prefix like "current", "one", "cp",
+    // but just length() on the iterator could be confusing.
+    int32_t currentLength() const { return len; }
+
+    std::basic_string_view<Unit16> currentView() const {
+        return std::basic_string_view<Unit16>(p, len);
+    }
+
+    bool currentIsWellFormed() const { return ok; }
+
+    U16Iterator &operator++() {  // pre-increment
+        // TODO: think about switching directions etc.
+        // Assume that readOneForward() was called and set `len`.
+        // Skip the current code point, then read the next one.
+        p += len;
+        if (p != limit) {
+            readOneForward();
+        }
+        return *this;
+    }
+
+    U16Iterator operator++(int) {  // post-increment
+        U16Iterator result(*this);
+        // TODO: think about switching directions etc.
+        // Assume that readOneForward() was called and set `len`.
+        // Skip the current code point, then read the next one.
+        p += len;
+        if (p != limit) {
+            readOneForward();
+        }
+        return result;
+    }
+
+private:
+    void readOneForward() {
+        // see U16_NEXT_OR_FFFD()
+        c = *p;
+        len = 1;
+        ok = true;
+        if (U16_IS_SURROGATE(c)) {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
+                c = U16_GET_SUPPLEMENTARY(c, c2);
+                len = 2;
+            } else {
+                // TODO: U16IllFormedBehavior
+                c = 0xfffd;
+                ok = false;
+            }
+        }
+    }
+
+    // In a validating iterator, we need start & limit so that when we read a code point
+    // (forward or backward) we can test if there are enough code units.
+    const Unit16 *start;
+    const Unit16 *p;
+    const Unit16 *limit;
+    UChar32 c = 0;
+    int8_t len = 0;
+    bool ok = false;
+};
+
+// ------------------------------------------------------------------------- ***
+
+// TODO: Non-validating iterator over the code points in a Unicode 16-bit string.
+// Assumes well-formed UTF-16. Otherwise the behavior is undefined.
+// TODO: all @draft ICU 76
+// template<typename Unit16>
+// class U16UnsafeIterator
+// TODO: only p, no start, no limit
+// TODO: can/should we read the code point only in operator*()?
+// if we read it in the constructor, then we would still need start/limit...
+
+}  // namespace U_HEADER_ONLY_NAMESPACE
+
+#endif  // U_SHOW_CPLUSPLUS_API
+#endif  // __UTF16CPPITER_H__
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index 81ad55578072..64f36bd061f8 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -75,7 +75,7 @@ numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
 static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
 formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
 units_data_test.o units_router_test.o units_test.o displayoptions_test.o \
-numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o
+numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o utfcppitertest.o
 
 DEPS = $(OBJECTS:.o=.d)
 
diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp
index 4585792126d6..b9df5935414c 100644
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@@ -48,6 +48,7 @@ extern IntlTest *createPluralMapTest();
 extern IntlTest *createStaticUnicodeSetsTest();
 #endif
 static IntlTest *createUHashTest();
+extern IntlTest *createU16IteratorTest();
 
 void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
 {
@@ -84,6 +85,7 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
     TESTCASE_AUTO_CREATE_CLASS(LocaleMatcherTest);
     TESTCASE_AUTO_CREATE_CLASS(UHashTest);
     TESTCASE_AUTO_CREATE_CLASS(USetHeaderOnlyTest);
+    TESTCASE_AUTO_CREATE_CLASS(U16IteratorTest);
     TESTCASE_AUTO_END;
 }
 
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
new file mode 100644
index 000000000000..c0a914b579c1
--- /dev/null
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -0,0 +1,59 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utfcppitertest.cpp
+// created: 2024aug12 Markus W. Scherer
+
+#include <string_view>
+
+#include "unicode/utypes.h"
+#include "unicode/utf16cppiter.h"
+#include "intltest.h"
+
+// Makes u"literal"sv std::u16string_view literals possible.
+// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
+using namespace std::string_view_literals;
+
+using U_HEADER_ONLY_NAMESPACE::U16Iterator;
+using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
+
+class U16IteratorTest : public IntlTest {
+public:
+    U16IteratorTest() {}
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=nullptr) override;
+
+    void testExperiment();
+};
+
+extern IntlTest *createU16IteratorTest() {
+    return new U16IteratorTest();
+}
+
+void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+    if(exec) {
+        logln("TestSuite U16IteratorTest: ");
+    }
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(testExperiment);
+    TESTCASE_AUTO_END;
+}
+
+void U16IteratorTest::testExperiment() {
+    IcuTestErrorCode errorCode(*this, "testExperiment");
+    std::u16string_view good(u"abçカ🚴"sv);
+    const char16_t *goodLimit = good.data() + good.length();
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodIter(good.data(), good.data(), goodLimit);
+    assertEquals("goodIter[0] *", u'a', *goodIter);
+    ++goodIter;  // pre-increment
+    assertEquals("goodIter[1] *", u'b', *goodIter);
+    ++goodIter;
+    assertEquals("goodIter[2] *", u'ç', *goodIter++);  // post-increment
+    assertEquals("goodIter[3] *", u'カ', *goodIter);
+    ++goodIter;
+    assertEquals("goodIter[4] *", U'🚴', *goodIter++);
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodEndIter(good.data(), goodLimit, goodLimit);
+    assertTrue("goodIter == goodEndIter", goodIter == goodEndIter);
+
+    // TODO: test ill-formed, and much more...
+}

From 6568b04c7017473667dfb8d8b5de7bf07499d073 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 12 Aug 2024 19:17:23 -0700
Subject: [PATCH 02/23] U16Iterator op*() returns U16OneSeq

---
 icu4c/source/common/common.vcxproj.filters    |   3 +
 icu4c/source/common/unicode/utf16cppiter.h    | 136 +++++++++++-------
 icu4c/source/test/intltest/intltest.vcxproj   |   1 +
 .../test/intltest/intltest.vcxproj.filters    |   3 +
 icu4c/source/test/intltest/utfcppitertest.cpp |  16 ++-
 5 files changed, 100 insertions(+), 59 deletions(-)

diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 1faff8765d33..72f02de9cc3b 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -1258,6 +1258,9 @@
     <CustomBuild Include="unicode\utf16.h">
       <Filter>strings</Filter>
     </CustomBuild>
+    <CustomBuild Include="unicode\utf16cppiter.h">
+      <Filter>strings</Filter>
+    </CustomBuild>
     <CustomBuild Include="unicode\utf32.h">
       <Filter>strings</Filter>
     </CustomBuild>
diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 5fb0b87dae06..582ce1d1b6de 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -39,92 +39,122 @@ enum U16IllFormedBehavior {
     U16_BEHAVIOR_SURROGATE
 };
 
-// Validating iterator over the code points in a Unicode 16-bit string.
-// TODO: all @draft ICU 76
+/**
+ * A code unit sequence for one code point returned by U16Iterator.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @draft ICU 76
+ */
+template<typename Unit16>
+class U16OneSeq {
+public:
+    U16OneSeq(const U16OneSeq &other) = default;
+
+    const Unit16 *data() { return p; }
+    int32_t length() const { return len; }
+
+    std::basic_string_view<Unit16> stringView() const {
+        return std::basic_string_view<Unit16>(p, len);
+    }
+
+    bool isWellFormed() const { return ok; }
+
+    UChar32 codePoint() const { return c; }
+
+    // TODO: std::optional<UChar32> maybeCodePoint() const ? (nullopt if !ok)
+
+private:
+    // TODO: Why can't we just use Unit16 here?
+    // error: declaration of 'Unit16' shadows template parameter
+    template<typename SomeOtherUnit16, U16IllFormedBehavior behavior>
+    friend class U16Iterator;
+
+    U16OneSeq(const Unit16 *p) : p(p) {}
+
+    void fwd1() { p += len; }
+
+    void readOneForward(const Unit16 *limit) {
+        if (p == limit) {
+            len = 0;
+            return;
+        }
+        // see U16_NEXT_OR_FFFD()
+        c = *p;
+        len = 1;
+        ok = true;
+        if (U16_IS_SURROGATE(c)) {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
+                c = U16_GET_SUPPLEMENTARY(c, c2);
+                len = 2;
+            } else {
+                // TODO: U16IllFormedBehavior
+                c = 0xfffd;
+                ok = false;
+            }
+        }
+    }
+
+    const Unit16 *p;
+    UChar32 c = 0;
+    int8_t len = 0;
+    bool ok = false;
+};
+
+/**
+ * Validating iterator over the code points in a Unicode 16-bit string.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @param U16IllFormedBehavior TODO
+ * @draft ICU 76
+ */
 template<typename Unit16, U16IllFormedBehavior behavior>
 class U16Iterator {
 public:
     // TODO: make private, make friends
     U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
-            start(start), p(p), limit(limit) {
-        if (p != limit) {
-            readOneForward();
-        }
+            start(start), limit(limit), seq(p) {
+        seq.readOneForward(limit);
     }
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
 
     U16Iterator(const U16Iterator &other) = default;
-    U16Iterator(U16Iterator &&other) noexcept = default;
 
-    bool operator==(const U16Iterator &other) const { return p == other.p; }
+    bool operator==(const U16Iterator &other) const { return seq.p == other.seq.p; }
     bool operator!=(const U16Iterator &other) const { return !operator==(other); }
 
-    UChar32 operator*() const {
-        return c;
-    }
-
-    // TODO: good function names?
-    // It would be nice to avoid a prefix like "current", "one", "cp",
-    // but just length() on the iterator could be confusing.
-    int32_t currentLength() const { return len; }
-
-    std::basic_string_view<Unit16> currentView() const {
-        return std::basic_string_view<Unit16>(p, len);
+    const U16OneSeq<Unit16> &operator*() const {
+        return seq;
     }
 
-    bool currentIsWellFormed() const { return ok; }
-
     U16Iterator &operator++() {  // pre-increment
         // TODO: think about switching directions etc.
-        // Assume that readOneForward() was called and set `len`.
+        // Assume that readOneForward() was called and set seq.len.
         // Skip the current code point, then read the next one.
-        p += len;
-        if (p != limit) {
-            readOneForward();
-        }
+        seq.fwd1();
+        seq.readOneForward(limit);
         return *this;
     }
 
     U16Iterator operator++(int) {  // post-increment
         U16Iterator result(*this);
         // TODO: think about switching directions etc.
-        // Assume that readOneForward() was called and set `len`.
+        // Assume that readOneForward() was called and set seq.len.
         // Skip the current code point, then read the next one.
-        p += len;
-        if (p != limit) {
-            readOneForward();
-        }
+        seq.fwd1();
+        seq.readOneForward(limit);
         return result;
     }
 
 private:
-    void readOneForward() {
-        // see U16_NEXT_OR_FFFD()
-        c = *p;
-        len = 1;
-        ok = true;
-        if (U16_IS_SURROGATE(c)) {
-            uint16_t c2;
-            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
-                c = U16_GET_SUPPLEMENTARY(c, c2);
-                len = 2;
-            } else {
-                // TODO: U16IllFormedBehavior
-                c = 0xfffd;
-                ok = false;
-            }
-        }
-    }
-
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
     const Unit16 *start;
-    const Unit16 *p;
     const Unit16 *limit;
-    UChar32 c = 0;
-    int8_t len = 0;
-    bool ok = false;
+    U16OneSeq<Unit16> seq;
 };
 
 // ------------------------------------------------------------------------- ***
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index b58b29b3d4e7..8d9bba021508 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -223,6 +223,7 @@
     <ClCompile Include="sfwdchit.cpp" />
     <ClCompile Include="strcase.cpp" />
     <ClCompile Include="ustrtest.cpp" />
+    <ClCompile Include="utfcppitertest.cpp" />
     <ClCompile Include="utxttest.cpp" />
     <ClCompile Include="cpdtrtst.cpp" />
     <ClCompile Include="ittrans.cpp" />
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index d5c23d5e4cb5..0abc4608d1a6 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -490,6 +490,9 @@
     <ClCompile Include="ustrtest.cpp">
       <Filter>strings</Filter>
     </ClCompile>
+    <ClCompile Include="utfcppitertest.cpp">
+      <Filter>strings</Filter>
+    </ClCompile>
     <ClCompile Include="utxttest.cpp">
       <Filter>strings</Filter>
     </ClCompile>
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index c0a914b579c1..f71f23327386 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -14,8 +14,9 @@
 // https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
 using namespace std::string_view_literals;
 
-using U_HEADER_ONLY_NAMESPACE::U16Iterator;
 using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
+using U_HEADER_ONLY_NAMESPACE::U16Iterator;
+using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
 
 class U16IteratorTest : public IntlTest {
 public:
@@ -44,14 +45,17 @@ void U16IteratorTest::testExperiment() {
     std::u16string_view good(u"abçカ🚴"sv);
     const char16_t *goodLimit = good.data() + good.length();
     U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodIter(good.data(), good.data(), goodLimit);
-    assertEquals("goodIter[0] *", u'a', *goodIter);
+    assertEquals("goodIter[0] * codePoint()", u'a', (*goodIter).codePoint());
     ++goodIter;  // pre-increment
-    assertEquals("goodIter[1] *", u'b', *goodIter);
+    assertEquals("goodIter[1] * codePoint()", u'b', (*goodIter).codePoint());
     ++goodIter;
-    assertEquals("goodIter[2] *", u'ç', *goodIter++);  // post-increment
-    assertEquals("goodIter[3] *", u'カ', *goodIter);
+    assertEquals("goodIter[2] * codePoint()", u'ç', (*goodIter++).codePoint());  // post-increment
+    assertEquals("goodIter[3] * codePoint()", u'カ', (*goodIter).codePoint());
     ++goodIter;
-    assertEquals("goodIter[4] *", U'🚴', *goodIter++);
+    const U16OneSeq<char16_t> &seq = *goodIter++;
+    assertEquals("goodIter[4] * codePoint()", U'🚴', seq.codePoint());
+    assertEquals("goodIter[4] * length()", 2, seq.length());
+    assertTrue("goodIter[4] * stringView()", seq.stringView() == u"🚴"sv);
     U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodEndIter(good.data(), goodLimit, goodLimit);
     assertTrue("goodIter == goodEndIter", goodIter == goodEndIter);
 

From 1bcd5ee309172aaeed8467fde4dda82f7244bdc6 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 23 Dec 2024 14:26:16 -0800
Subject: [PATCH 03/23] header-only

---
 icu4c/source/common/unicode/utf16cppiter.h    | 15 +++++++++------
 icu4c/source/test/intltest/utfcppitertest.cpp |  6 ++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 582ce1d1b6de..0050b60fee49 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -11,16 +11,18 @@
 
 #include "unicode/utypes.h"
 
-#if U_SHOW_CPLUSPLUS_API
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
 
 #include "unicode/utf16.h"
 #include "unicode/uversion.h"
 
 /**
  * \file
- * \brief C++ API: C++ iterators over Unicode 16-bit strings (=UTF-16 if well-formed).
+ * \brief C++ header-only API: C++ iterators over Unicode 16-bit strings (=UTF-16 if well-formed).
  */
 
+#ifndef U_HIDE_DRAFT_API
+
 namespace U_HEADER_ONLY_NAMESPACE {
 
 // Some defined behaviors for handling ill-formed 16-bit strings.
@@ -44,7 +46,7 @@ enum U16IllFormedBehavior {
  *
  * TODO: check doxygen syntax for template parameters
  * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
- * @draft ICU 76
+ * @draft ICU 77
  */
 template<typename Unit16>
 class U16OneSeq {
@@ -108,7 +110,7 @@ class U16OneSeq {
  * TODO: check doxygen syntax for template parameters
  * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
  * @param U16IllFormedBehavior TODO
- * @draft ICU 76
+ * @draft ICU 77
  */
 template<typename Unit16, U16IllFormedBehavior behavior>
 class U16Iterator {
@@ -161,7 +163,7 @@ class U16Iterator {
 
 // TODO: Non-validating iterator over the code points in a Unicode 16-bit string.
 // Assumes well-formed UTF-16. Otherwise the behavior is undefined.
-// TODO: all @draft ICU 76
+// TODO: all @draft ICU 77
 // template<typename Unit16>
 // class U16UnsafeIterator
 // TODO: only p, no start, no limit
@@ -170,5 +172,6 @@ class U16Iterator {
 
 }  // namespace U_HEADER_ONLY_NAMESPACE
 
-#endif  // U_SHOW_CPLUSPLUS_API
+#endif  // U_HIDE_DRAFT_API
+#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
 #endif  // __UTF16CPPITER_H__
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index f71f23327386..c1162dde1594 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -6,6 +6,12 @@
 
 #include <string_view>
 
+// Test header-only ICU C++ APIs. Do not use other ICU C++ APIs.
+// Non-default configuration:
+#define U_SHOW_CPLUSPLUS_API 0
+// Default configuration:
+// #define U_SHOW_CPLUSPLUS_HEADER_API 1
+
 #include "unicode/utypes.h"
 #include "unicode/utf16cppiter.h"
 #include "intltest.h"

From 20f890be31682c7467bbc4c03bd0b05aa92a5aa2 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 23 Dec 2024 16:55:31 -0800
Subject: [PATCH 04/23] operator* read on the fly

---
 icu4c/source/common/unicode/utf16cppiter.h    | 114 +++++++-----------
 icu4c/source/test/intltest/utfcppitertest.cpp |  12 +-
 2 files changed, 51 insertions(+), 75 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 0050b60fee49..9b03ab4486bc 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -41,6 +41,8 @@ enum U16IllFormedBehavior {
     U16_BEHAVIOR_SURROGATE
 };
 
+// TODO: Consider a template parameter for UChar32 vs. char32_t vs. uint32_t.
+
 /**
  * A code unit sequence for one code point returned by U16Iterator.
  *
@@ -49,59 +51,18 @@ enum U16IllFormedBehavior {
  * @draft ICU 77
  */
 template<typename Unit16>
-class U16OneSeq {
-public:
-    U16OneSeq(const U16OneSeq &other) = default;
-
-    const Unit16 *data() { return p; }
-    int32_t length() const { return len; }
+struct U16OneSeq {
+    // Order of fields with padding and access frequency in mind.
+    UChar32 codePoint = 0;
+    uint8_t length = 0;
+    bool isWellFormed = false;
+    const Unit16 *data;
 
     std::basic_string_view<Unit16> stringView() const {
-        return std::basic_string_view<Unit16>(p, len);
-    }
-
-    bool isWellFormed() const { return ok; }
-
-    UChar32 codePoint() const { return c; }
-
-    // TODO: std::optional<UChar32> maybeCodePoint() const ? (nullopt if !ok)
-
-private:
-    // TODO: Why can't we just use Unit16 here?
-    // error: declaration of 'Unit16' shadows template parameter
-    template<typename SomeOtherUnit16, U16IllFormedBehavior behavior>
-    friend class U16Iterator;
-
-    U16OneSeq(const Unit16 *p) : p(p) {}
-
-    void fwd1() { p += len; }
-
-    void readOneForward(const Unit16 *limit) {
-        if (p == limit) {
-            len = 0;
-            return;
-        }
-        // see U16_NEXT_OR_FFFD()
-        c = *p;
-        len = 1;
-        ok = true;
-        if (U16_IS_SURROGATE(c)) {
-            uint16_t c2;
-            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
-                c = U16_GET_SUPPLEMENTARY(c, c2);
-                len = 2;
-            } else {
-                // TODO: U16IllFormedBehavior
-                c = 0xfffd;
-                ok = false;
-            }
-        }
+        return std::basic_string_view<Unit16>(data, length);
     }
 
-    const Unit16 *p;
-    UChar32 c = 0;
-    int8_t len = 0;
-    bool ok = false;
+    // TODO: std::optional<UChar32> maybeCodePoint() const ? (nullopt if !isWellFormed)
 };
 
 /**
@@ -117,53 +78,68 @@ class U16Iterator {
 public:
     // TODO: make private, make friends
     U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
-            start(start), limit(limit), seq(p) {
-        seq.readOneForward(limit);
-    }
+            start(start), p(p), limit(limit) {}
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
 
     U16Iterator(const U16Iterator &other) = default;
 
-    bool operator==(const U16Iterator &other) const { return seq.p == other.seq.p; }
+    bool operator==(const U16Iterator &other) const { return p == other.p; }
     bool operator!=(const U16Iterator &other) const { return !operator==(other); }
 
-    const U16OneSeq<Unit16> &operator*() const {
-        return seq;
+    const U16OneSeq<Unit16> operator*() const {
+        // TODO: assert p != limit -- more precisely: start <= p < limit
+        // Similar to U16_NEXT_OR_FFFD().
+        UChar32 c = *p;
+        if (!U16_IS_SURROGATE(c)) {
+            return {c, 1, true, p};
+        } else {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
+                c = U16_GET_SUPPLEMENTARY(c, c2);
+                return {c, 2, true, p};
+            } else {
+                // TODO: U16IllFormedBehavior
+                return {0xfffd, 1, false, p};
+            }
+        }
     }
 
     U16Iterator &operator++() {  // pre-increment
-        // TODO: think about switching directions etc.
-        // Assume that readOneForward() was called and set seq.len.
-        // Skip the current code point, then read the next one.
-        seq.fwd1();
-        seq.readOneForward(limit);
+        // TODO: assert p != limit -- more precisely: start <= p < limit
+        // Similar to U16_FWD_1().
+        if (U16_IS_LEAD(*p++) && p != limit && U16_IS_TRAIL(*p)) {
+            ++p;
+        }
         return *this;
     }
 
     U16Iterator operator++(int) {  // post-increment
+        // TODO: assert p != limit -- more precisely: start <= p < limit
         U16Iterator result(*this);
-        // TODO: think about switching directions etc.
-        // Assume that readOneForward() was called and set seq.len.
-        // Skip the current code point, then read the next one.
-        seq.fwd1();
-        seq.readOneForward(limit);
+        // More similar to U16_NEXT_OR_FFFD() than U16_FWD_1() to try to help the compiler
+        // amortize work between operator*() and operator++(int) in typical *it++ usage.
+        // Otherwise this is slightly less efficient because it tests a lead surrogate twice.
+        UChar32 c = *p++;
+        if (U16_IS_SURROGATE(c) &&
+                U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(*p)) {
+            ++p;
+        }
         return result;
     }
 
 private:
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
-    const Unit16 *start;
-    const Unit16 *limit;
-    U16OneSeq<Unit16> seq;
+    const Unit16 *const start;
+    const Unit16 *p;
+    const Unit16 *const limit;
 };
 
 // ------------------------------------------------------------------------- ***
 
 // TODO: Non-validating iterator over the code points in a Unicode 16-bit string.
 // Assumes well-formed UTF-16. Otherwise the behavior is undefined.
-// TODO: all @draft ICU 77
 // template<typename Unit16>
 // class U16UnsafeIterator
 // TODO: only p, no start, no limit
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index c1162dde1594..0ae44937d294 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -51,16 +51,16 @@ void U16IteratorTest::testExperiment() {
     std::u16string_view good(u"abçカ🚴"sv);
     const char16_t *goodLimit = good.data() + good.length();
     U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodIter(good.data(), good.data(), goodLimit);
-    assertEquals("goodIter[0] * codePoint()", u'a', (*goodIter).codePoint());
+    assertEquals("goodIter[0] * codePoint", u'a', (*goodIter).codePoint);
     ++goodIter;  // pre-increment
-    assertEquals("goodIter[1] * codePoint()", u'b', (*goodIter).codePoint());
+    assertEquals("goodIter[1] * codePoint", u'b', (*goodIter).codePoint);
     ++goodIter;
-    assertEquals("goodIter[2] * codePoint()", u'ç', (*goodIter++).codePoint());  // post-increment
-    assertEquals("goodIter[3] * codePoint()", u'カ', (*goodIter).codePoint());
+    assertEquals("goodIter[2] * codePoint", u'ç', (*goodIter++).codePoint);  // post-increment
+    assertEquals("goodIter[3] * codePoint", u'カ', (*goodIter).codePoint);
     ++goodIter;
     const U16OneSeq<char16_t> &seq = *goodIter++;
-    assertEquals("goodIter[4] * codePoint()", U'🚴', seq.codePoint());
-    assertEquals("goodIter[4] * length()", 2, seq.length());
+    assertEquals("goodIter[4] * codePoint", U'🚴', seq.codePoint);
+    assertEquals("goodIter[4] * length", 2, seq.length);
     assertTrue("goodIter[4] * stringView()", seq.stringView() == u"🚴"sv);
     U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodEndIter(good.data(), goodLimit, goodLimit);
     assertTrue("goodIter == goodEndIter", goodIter == goodEndIter);

From 7dc31d2e5961f63dbfe6c1352a535b4d1d21459b Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 11:18:07 -0800
Subject: [PATCH 05/23] fix hdrtest

---
 icu4c/source/common/unicode/utf16cppiter.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 9b03ab4486bc..0967904e574d 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -7,12 +7,11 @@
 #ifndef __UTF16CPPITER_H__
 #define __UTF16CPPITER_H__
 
-#include <string_view>
-
 #include "unicode/utypes.h"
 
 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
 
+#include <string_view>
 #include "unicode/utf16.h"
 #include "unicode/uversion.h"
 

From b381c2b569d051e7e765027c086e77028268fbd4 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 12:07:24 -0800
Subject: [PATCH 06/23] U16IllFormedBehavior

---
 icu4c/source/common/unicode/utf16cppiter.h    |  14 +-
 icu4c/source/test/intltest/utfcppitertest.cpp | 126 +++++++++++++++---
 2 files changed, 117 insertions(+), 23 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 0967904e574d..4205f2d5a0fe 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -86,7 +86,7 @@ class U16Iterator {
     bool operator==(const U16Iterator &other) const { return p == other.p; }
     bool operator!=(const U16Iterator &other) const { return !operator==(other); }
 
-    const U16OneSeq<Unit16> operator*() const {
+    U16OneSeq<Unit16> operator*() const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Similar to U16_NEXT_OR_FFFD().
         UChar32 c = *p;
@@ -98,8 +98,7 @@ class U16Iterator {
                 c = U16_GET_SUPPLEMENTARY(c, c2);
                 return {c, 2, true, p};
             } else {
-                // TODO: U16IllFormedBehavior
-                return {0xfffd, 1, false, p};
+                return {sub(c), 1, false, p};
             }
         }
     }
@@ -128,6 +127,15 @@ class U16Iterator {
     }
 
 private:
+    // Handle ill-formed UTF-16: One unpaired surrogate.
+    UChar32 sub(UChar32 surrogate) const {
+        switch (behavior) {
+            case U16_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+            case U16_BEHAVIOR_FFFD: return 0xfffd;
+            case U16_BEHAVIOR_SURROGATE: return surrogate;
+        }
+    }
+
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
     const Unit16 *const start;
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index 0ae44937d294..eb698bc9f699 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -21,6 +21,8 @@
 using namespace std::string_view_literals;
 
 using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
+using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_FFFD;
+using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_SURROGATE;
 using U_HEADER_ONLY_NAMESPACE::U16Iterator;
 using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
 
@@ -30,7 +32,10 @@ class U16IteratorTest : public IntlTest {
 
     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=nullptr) override;
 
-    void testExperiment();
+    void testGood();
+    void testNegative();
+    void testFFFD();
+    void testSurrogate();
 };
 
 extern IntlTest *createU16IteratorTest() {
@@ -42,28 +47,109 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
         logln("TestSuite U16IteratorTest: ");
     }
     TESTCASE_AUTO_BEGIN;
-    TESTCASE_AUTO(testExperiment);
+    TESTCASE_AUTO(testGood);
+    TESTCASE_AUTO(testNegative);
+    TESTCASE_AUTO(testFFFD);
+    TESTCASE_AUTO(testSurrogate);
     TESTCASE_AUTO_END;
 }
 
-void U16IteratorTest::testExperiment() {
-    IcuTestErrorCode errorCode(*this, "testExperiment");
+void U16IteratorTest::testGood() {
+    IcuTestErrorCode errorCode(*this, "testGood");
     std::u16string_view good(u"abçカ🚴"sv);
-    const char16_t *goodLimit = good.data() + good.length();
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodIter(good.data(), good.data(), goodLimit);
-    assertEquals("goodIter[0] * codePoint", u'a', (*goodIter).codePoint);
-    ++goodIter;  // pre-increment
-    assertEquals("goodIter[1] * codePoint", u'b', (*goodIter).codePoint);
-    ++goodIter;
-    assertEquals("goodIter[2] * codePoint", u'ç', (*goodIter++).codePoint);  // post-increment
-    assertEquals("goodIter[3] * codePoint", u'カ', (*goodIter).codePoint);
-    ++goodIter;
-    const U16OneSeq<char16_t> &seq = *goodIter++;
-    assertEquals("goodIter[4] * codePoint", U'🚴', seq.codePoint);
-    assertEquals("goodIter[4] * length", 2, seq.length);
-    assertTrue("goodIter[4] * stringView()", seq.stringView() == u"🚴"sv);
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> goodEndIter(good.data(), goodLimit, goodLimit);
-    assertTrue("goodIter == goodEndIter", goodIter == goodEndIter);
+    const char16_t *limit = good.data() + good.length();
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> iter(good.data(), good.data(), limit);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    ++iter;  // pre-increment
+    U16OneSeq<char16_t> seq = *iter;
+    assertEquals("iter[1] * codePoint", u'b', seq.codePoint);
+    assertEquals("iter[1] * length", 1, seq.length);
+    assertTrue("iter[1] * isWellFormed", seq.isWellFormed);
+    assertTrue("iter[1] * stringView()", seq.stringView() == u"b"sv);
+    ++iter;
+    assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint);  // post-increment
+    assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint);
+    ++iter;
+    seq = *iter++;
+    assertEquals("iter[4] * codePoint", U'🚴', seq.codePoint);
+    assertEquals("iter[4] * length", 2, seq.length);
+    assertTrue("iter[4] * isWellFormed", seq.isWellFormed);
+    assertTrue("iter[4] * stringView()", seq.stringView() == u"🚴"sv);
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> endIter(good.data(), limit, limit);
+    assertTrue("iter == endIter", iter == endIter);
+}
+
+void U16IteratorTest::testNegative() {
+    IcuTestErrorCode errorCode(*this, "testNegative");
+    static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+    std::u16string_view bad(badChars, 5);
+    const char16_t *limit = bad.data() + bad.length();
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> iter(bad.data(), bad.data(), limit);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    ++iter;  // pre-increment
+    U16OneSeq<char16_t> seq = *iter;
+    assertEquals("iter[1] * codePoint", -1, seq.codePoint);
+    assertEquals("iter[1] * length", 1, seq.length);
+    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
+    auto sv = seq.stringView();
+    assertEquals("iter[1] * stringView().length()", 1, sv.length());
+    assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+    ++iter;
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
+    seq = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", -1, seq.codePoint);
+    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
+    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> endIter(bad.data(), limit, limit);
+    assertTrue("iter == endIter", iter == endIter);
+}
+
+void U16IteratorTest::testFFFD() {
+    IcuTestErrorCode errorCode(*this, "testFFFD");
+    static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+    std::u16string_view bad(badChars, 5);
+    const char16_t *limit = bad.data() + bad.length();
+    U16Iterator<char16_t, U16_BEHAVIOR_FFFD> iter(bad.data(), bad.data(), limit);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    ++iter;  // pre-increment
+    U16OneSeq<char16_t> seq = *iter;
+    assertEquals("iter[1] * codePoint", 0xfffd, seq.codePoint);
+    assertEquals("iter[1] * length", 1, seq.length);
+    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
+    auto sv = seq.stringView();
+    assertEquals("iter[1] * stringView().length()", 1, sv.length());
+    assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+    ++iter;
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
+    seq = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", 0xfffd, seq.codePoint);
+    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
+    U16Iterator<char16_t, U16_BEHAVIOR_FFFD> endIter(bad.data(), limit, limit);
+    assertTrue("iter == endIter", iter == endIter);
+}
 
-    // TODO: test ill-formed, and much more...
+void U16IteratorTest::testSurrogate() {
+    IcuTestErrorCode errorCode(*this, "testSurrogate");
+    static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
+    std::u16string_view bad(badChars, 5);
+    const char16_t *limit = bad.data() + bad.length();
+    U16Iterator<char16_t, U16_BEHAVIOR_SURROGATE> iter(bad.data(), bad.data(), limit);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    ++iter;  // pre-increment
+    U16OneSeq<char16_t> seq = *iter;
+    assertEquals("iter[1] * codePoint", 0xd900, seq.codePoint);
+    assertEquals("iter[1] * length", 1, seq.length);
+    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
+    auto sv = seq.stringView();
+    assertEquals("iter[1] * stringView().length()", 1, sv.length());
+    assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+    ++iter;
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
+    seq = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", 0xdc05, seq.codePoint);
+    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
+    U16Iterator<char16_t, U16_BEHAVIOR_SURROGATE> endIter(bad.data(), limit, limit);
+    assertTrue("iter == endIter", iter == endIter);
 }

From 6851e8db32692469db22a4d5a440f1b4883b4aed Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 12:24:04 -0800
Subject: [PATCH 07/23] C++ range: U16StringCodePoints

---
 icu4c/source/common/unicode/utf16cppiter.h    | 33 +++++++++++++++++++
 icu4c/source/test/intltest/utfcppitertest.cpp | 29 ++++++++--------
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 4205f2d5a0fe..a6603203dd32 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -143,6 +143,39 @@ class U16Iterator {
     const Unit16 *const limit;
 };
 
+/**
+ * A C++ "range" for iterating over all of the code points of a 16-bit Unicode string.
+ *
+ * @return a code point iterator.
+ * @draft ICU 77
+ */
+template<typename Unit16, U16IllFormedBehavior behavior>
+class U16StringCodePoints {
+public:
+    /**
+     * Constructs a C++ "range" object over the code points in the string.
+     * @draft ICU 77
+     */
+    U16StringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
+
+    /** @draft ICU 77 */
+    U16StringCodePoints(const U16StringCodePoints &other) = default;
+
+    /** @draft ICU 77 */
+    U16Iterator<Unit16, behavior> begin() const {
+        return {s.data(), s.data(), s.data() + s.length()};
+    }
+
+    /** @draft ICU 77 */
+    U16Iterator<Unit16, behavior> end() const {
+        const Unit16 *limit = s.data() + s.length();
+        return {s.data(), limit, limit};
+    }
+
+private:
+    std::basic_string_view<Unit16> s;
+};
+
 // ------------------------------------------------------------------------- ***
 
 // TODO: Non-validating iterator over the code points in a Unicode 16-bit string.
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index eb698bc9f699..8e429d8a74b2 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -25,6 +25,7 @@ using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_FFFD;
 using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_SURROGATE;
 using U_HEADER_ONLY_NAMESPACE::U16Iterator;
 using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
+using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
 
 class U16IteratorTest : public IntlTest {
 public:
@@ -57,8 +58,8 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
 void U16IteratorTest::testGood() {
     IcuTestErrorCode errorCode(*this, "testGood");
     std::u16string_view good(u"abçカ🚴"sv);
-    const char16_t *limit = good.data() + good.length();
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> iter(good.data(), good.data(), limit);
+    U16StringCodePoints<char16_t, U16_BEHAVIOR_NEGATIVE> range(good);
+    auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
     U16OneSeq<char16_t> seq = *iter;
@@ -75,16 +76,15 @@ void U16IteratorTest::testGood() {
     assertEquals("iter[4] * length", 2, seq.length);
     assertTrue("iter[4] * isWellFormed", seq.isWellFormed);
     assertTrue("iter[4] * stringView()", seq.stringView() == u"🚴"sv);
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> endIter(good.data(), limit, limit);
-    assertTrue("iter == endIter", iter == endIter);
+    assertTrue("iter == endIter", iter == range.end());
 }
 
 void U16IteratorTest::testNegative() {
     IcuTestErrorCode errorCode(*this, "testNegative");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    const char16_t *limit = bad.data() + bad.length();
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> iter(bad.data(), bad.data(), limit);
+    U16StringCodePoints<char16_t, U16_BEHAVIOR_NEGATIVE> range(bad);
+    auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
     U16OneSeq<char16_t> seq = *iter;
@@ -100,16 +100,15 @@ void U16IteratorTest::testNegative() {
     assertEquals("iter[3] * codePoint", -1, seq.codePoint);
     assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
-    U16Iterator<char16_t, U16_BEHAVIOR_NEGATIVE> endIter(bad.data(), limit, limit);
-    assertTrue("iter == endIter", iter == endIter);
+    assertTrue("iter == endIter", iter == range.end());
 }
 
 void U16IteratorTest::testFFFD() {
     IcuTestErrorCode errorCode(*this, "testFFFD");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    const char16_t *limit = bad.data() + bad.length();
-    U16Iterator<char16_t, U16_BEHAVIOR_FFFD> iter(bad.data(), bad.data(), limit);
+    U16StringCodePoints<char16_t, U16_BEHAVIOR_FFFD> range(bad);
+    auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
     U16OneSeq<char16_t> seq = *iter;
@@ -125,16 +124,15 @@ void U16IteratorTest::testFFFD() {
     assertEquals("iter[3] * codePoint", 0xfffd, seq.codePoint);
     assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
-    U16Iterator<char16_t, U16_BEHAVIOR_FFFD> endIter(bad.data(), limit, limit);
-    assertTrue("iter == endIter", iter == endIter);
+    assertTrue("iter == endIter", iter == range.end());
 }
 
 void U16IteratorTest::testSurrogate() {
     IcuTestErrorCode errorCode(*this, "testSurrogate");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    const char16_t *limit = bad.data() + bad.length();
-    U16Iterator<char16_t, U16_BEHAVIOR_SURROGATE> iter(bad.data(), bad.data(), limit);
+    U16StringCodePoints<char16_t, U16_BEHAVIOR_SURROGATE> range(bad);
+    auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
     U16OneSeq<char16_t> seq = *iter;
@@ -150,6 +148,5 @@ void U16IteratorTest::testSurrogate() {
     assertEquals("iter[3] * codePoint", 0xdc05, seq.codePoint);
     assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
-    U16Iterator<char16_t, U16_BEHAVIOR_SURROGATE> endIter(bad.data(), limit, limit);
-    assertTrue("iter == endIter", iter == endIter);
+    assertTrue("iter == endIter", iter == range.end());
 }

From 64ea1100ea4d956036316b5140ee97bd305330af Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 12:45:17 -0800
Subject: [PATCH 08/23] template param: code point type

---
 icu4c/source/common/unicode/utf16cppiter.h    | 44 +++++++++----------
 icu4c/source/test/intltest/utfcppitertest.cpp | 16 +++----
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index a6603203dd32..56116378fef2 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -29,30 +29,26 @@ namespace U_HEADER_ONLY_NAMESPACE {
 //
 // TODO: A possible alternative to an enum might be some kind of function template
 // which would be fully customizable.
-// The operator*() return value might then want to be a template parameter as well.
-// For example, for a well-formed sequence, the return value could be
-// a tuple of (code point, well-formed), or a string view, or...
-// (And then the caller could choose between UChar32 and char32_t.)
-// However, all of that would make the API more complex and daunting.
 enum U16IllFormedBehavior {
     U16_BEHAVIOR_NEGATIVE,
     U16_BEHAVIOR_FFFD,
     U16_BEHAVIOR_SURROGATE
 };
 
-// TODO: Consider a template parameter for UChar32 vs. char32_t vs. uint32_t.
-
 /**
  * A code unit sequence for one code point returned by U16Iterator.
+ * TODO: Share with UTF-8?
  *
  * TODO: check doxygen syntax for template parameters
- * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *             should be signed if U16_BEHAVIOR_NEGATIVE
  * @draft ICU 77
  */
-template<typename Unit16>
+template<typename Unit16, typename CP32>
 struct U16OneSeq {
     // Order of fields with padding and access frequency in mind.
-    UChar32 codePoint = 0;
+    CP32 codePoint = 0;
     uint8_t length = 0;
     bool isWellFormed = false;
     const Unit16 *data;
@@ -61,18 +57,20 @@ struct U16OneSeq {
         return std::basic_string_view<Unit16>(data, length);
     }
 
-    // TODO: std::optional<UChar32> maybeCodePoint() const ? (nullopt if !isWellFormed)
+    // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if !isWellFormed)
 };
 
 /**
  * Validating iterator over the code points in a Unicode 16-bit string.
  *
  * TODO: check doxygen syntax for template parameters
- * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *             should be signed if U16_BEHAVIOR_NEGATIVE
  * @param U16IllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
 class U16Iterator {
 public:
     // TODO: make private, make friends
@@ -86,10 +84,10 @@ class U16Iterator {
     bool operator==(const U16Iterator &other) const { return p == other.p; }
     bool operator!=(const U16Iterator &other) const { return !operator==(other); }
 
-    U16OneSeq<Unit16> operator*() const {
+    U16OneSeq<Unit16, CP32> operator*() const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Similar to U16_NEXT_OR_FFFD().
-        UChar32 c = *p;
+        CP32 c = *p;
         if (!U16_IS_SURROGATE(c)) {
             return {c, 1, true, p};
         } else {
@@ -118,7 +116,7 @@ class U16Iterator {
         // More similar to U16_NEXT_OR_FFFD() than U16_FWD_1() to try to help the compiler
         // amortize work between operator*() and operator++(int) in typical *it++ usage.
         // Otherwise this is slightly less efficient because it tests a lead surrogate twice.
-        UChar32 c = *p++;
+        CP32 c = *p++;
         if (U16_IS_SURROGATE(c) &&
                 U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(*p)) {
             ++p;
@@ -126,9 +124,13 @@ class U16Iterator {
         return result;
     }
 
+    // TODO: operator--()
+    // TODO: maybe fused readAndInc()?
+    // TODO: maybe fused decAndRead()?
+
 private:
     // Handle ill-formed UTF-16: One unpaired surrogate.
-    UChar32 sub(UChar32 surrogate) const {
+    CP32 sub(CP32 surrogate) const {
         switch (behavior) {
             case U16_BEHAVIOR_NEGATIVE: return U_SENTINEL;
             case U16_BEHAVIOR_FFFD: return 0xfffd;
@@ -149,7 +151,7 @@ class U16Iterator {
  * @return a code point iterator.
  * @draft ICU 77
  */
-template<typename Unit16, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
 class U16StringCodePoints {
 public:
     /**
@@ -162,12 +164,12 @@ class U16StringCodePoints {
     U16StringCodePoints(const U16StringCodePoints &other) = default;
 
     /** @draft ICU 77 */
-    U16Iterator<Unit16, behavior> begin() const {
+    U16Iterator<Unit16, CP32, behavior> begin() const {
         return {s.data(), s.data(), s.data() + s.length()};
     }
 
     /** @draft ICU 77 */
-    U16Iterator<Unit16, behavior> end() const {
+    U16Iterator<Unit16, CP32, behavior> end() const {
         const Unit16 *limit = s.data() + s.length();
         return {s.data(), limit, limit};
     }
@@ -183,8 +185,6 @@ class U16StringCodePoints {
 // template<typename Unit16>
 // class U16UnsafeIterator
 // TODO: only p, no start, no limit
-// TODO: can/should we read the code point only in operator*()?
-// if we read it in the constructor, then we would still need start/limit...
 
 }  // namespace U_HEADER_ONLY_NAMESPACE
 
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index 8e429d8a74b2..15db94e613b0 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -58,11 +58,11 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
 void U16IteratorTest::testGood() {
     IcuTestErrorCode errorCode(*this, "testGood");
     std::u16string_view good(u"abçカ🚴"sv);
-    U16StringCodePoints<char16_t, U16_BEHAVIOR_NEGATIVE> range(good);
+    U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(good);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
-    U16OneSeq<char16_t> seq = *iter;
+    auto seq = *iter;
     assertEquals("iter[1] * codePoint", u'b', seq.codePoint);
     assertEquals("iter[1] * length", 1, seq.length);
     assertTrue("iter[1] * isWellFormed", seq.isWellFormed);
@@ -83,11 +83,11 @@ void U16IteratorTest::testNegative() {
     IcuTestErrorCode errorCode(*this, "testNegative");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, U16_BEHAVIOR_NEGATIVE> range(bad);
+    U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
-    U16OneSeq<char16_t> seq = *iter;
+    auto seq = *iter;
     assertEquals("iter[1] * codePoint", -1, seq.codePoint);
     assertEquals("iter[1] * length", 1, seq.length);
     assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
@@ -107,11 +107,11 @@ void U16IteratorTest::testFFFD() {
     IcuTestErrorCode errorCode(*this, "testFFFD");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, U16_BEHAVIOR_FFFD> range(bad);
+    U16StringCodePoints<char16_t, char32_t, U16_BEHAVIOR_FFFD> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
-    U16OneSeq<char16_t> seq = *iter;
+    auto seq = *iter;
     assertEquals("iter[1] * codePoint", 0xfffd, seq.codePoint);
     assertEquals("iter[1] * length", 1, seq.length);
     assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
@@ -131,11 +131,11 @@ void U16IteratorTest::testSurrogate() {
     IcuTestErrorCode errorCode(*this, "testSurrogate");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, U16_BEHAVIOR_SURROGATE> range(bad);
+    U16StringCodePoints<char16_t, uint32_t, U16_BEHAVIOR_SURROGATE> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
-    U16OneSeq<char16_t> seq = *iter;
+    auto seq = *iter;
     assertEquals("iter[1] * codePoint", 0xd900, seq.codePoint);
     assertEquals("iter[1] * length", 1, seq.length);
     assertFalse("iter[1] * isWellFormed", seq.isWellFormed);

From 7bbeefcdd35426b279a2f58b6ff49cc5ae4cd1d7 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 17:19:08 -0800
Subject: [PATCH 09/23] make it work outside of ICU

---
 icu4c/source/common/unicode/utf16cppiter.h | 26 +++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 56116378fef2..41bc9cf4e230 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -7,13 +7,37 @@
 #ifndef __UTF16CPPITER_H__
 #define __UTF16CPPITER_H__
 
+// TODO: For experimentation outside of ICU, comment out this include.
+// Experimentally conditional code below checks for UTYPES_H and
+// otherwise uses copies of bits of ICU.
 #include "unicode/utypes.h"
 
-#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
 
 #include <string_view>
+#ifdef UTYPES_H
 #include "unicode/utf16.h"
 #include "unicode/uversion.h"
+#else
+// TODO: Remove checks for UTYPES_H and replacement definitions.
+// unicode/utypes.h etc.
+#include <inttypes.h>
+typedef int32_t UChar32;
+constexpr UChar32 U_SENTINEL = -1;
+// unicode/uversion.h
+#define U_HEADER_ONLY_NAMESPACE header
+namespace header {}
+// unicode/utf.h
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+// unicode/utf16.h
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+#endif
 
 /**
  * \file

From 43e99e082ef3015df1481e900e8ca2c5de3e8247 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 17:24:02 -0800
Subject: [PATCH 10/23] experimental sample code

---
 icu4c/source/common/unicode/utf16cppiter.h | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 41bc9cf4e230..96bc0ad1cbef 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -210,6 +210,29 @@ class U16StringCodePoints {
 // class U16UnsafeIterator
 // TODO: only p, no start, no limit
 
+// TODO: remove experimental sample code
+#ifndef UTYPES_H
+int32_t rangeLoop(std::u16string_view s) {
+   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   int32_t sum = 0;
+   for (auto seq : range) {
+       sum += seq.codePoint;
+   }
+   return sum;
+}
+
+int32_t loopIterPlusPlus(std::u16string_view s) {
+   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   int32_t sum = 0;
+   auto iter = range.begin();
+   auto limit = range.end();
+   while (iter != limit) {
+       sum += (*iter++).codePoint;
+   }
+   return sum;
+}
+#endif
+
 }  // namespace U_HEADER_ONLY_NAMESPACE
 
 #endif  // U_HIDE_DRAFT_API

From bfc722e398c53a7e82e1f32e4937af87efc5b625 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 26 Dec 2024 17:38:51 -0800
Subject: [PATCH 11/23] pre=post-inc, fused readAndInc()

---
 icu4c/source/common/unicode/utf16cppiter.h | 51 +++++++++++++++++-----
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 96bc0ad1cbef..14f04c0111a3 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -127,16 +127,42 @@ class U16Iterator {
 
     U16Iterator &operator++() {  // pre-increment
         // TODO: assert p != limit -- more precisely: start <= p < limit
-        // Similar to U16_FWD_1().
-        if (U16_IS_LEAD(*p++) && p != limit && U16_IS_TRAIL(*p)) {
-            ++p;
-        }
+        inc();
         return *this;
     }
 
     U16Iterator operator++(int) {  // post-increment
         // TODO: assert p != limit -- more precisely: start <= p < limit
         U16Iterator result(*this);
+        inc();
+        return result;
+    }
+
+    // Fused/optimized *iter++
+    U16OneSeq<Unit16, CP32> readAndInc() {
+        // TODO: assert p != limit -- more precisely: start <= p < limit
+        // Very similar to U16_NEXT_OR_FFFD().
+        const Unit16 *p0 = p;
+        CP32 c = *p++;
+        if (!U16_IS_SURROGATE(c)) {
+            return {c, 1, true, p0};
+        } else {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(c2 = *p)) {
+                ++p;
+                c = U16_GET_SUPPLEMENTARY(c, c2);
+                return {c, 2, true, p0};
+            } else {
+                return {sub(c), 1, false, p0};
+            }
+        }
+    }
+
+    // TODO: operator--()
+    // TODO: maybe fused decAndRead()?
+
+private:
+    void inc() {
         // More similar to U16_NEXT_OR_FFFD() than U16_FWD_1() to try to help the compiler
         // amortize work between operator*() and operator++(int) in typical *it++ usage.
         // Otherwise this is slightly less efficient because it tests a lead surrogate twice.
@@ -145,14 +171,8 @@ class U16Iterator {
                 U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(*p)) {
             ++p;
         }
-        return result;
     }
 
-    // TODO: operator--()
-    // TODO: maybe fused readAndInc()?
-    // TODO: maybe fused decAndRead()?
-
-private:
     // Handle ill-formed UTF-16: One unpaired surrogate.
     CP32 sub(CP32 surrogate) const {
         switch (behavior) {
@@ -231,6 +251,17 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
    }
    return sum;
 }
+
+int32_t loopReadAndInc(std::u16string_view s) {
+   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   int32_t sum = 0;
+   auto iter = range.begin();
+   auto limit = range.end();
+   while (iter != limit) {
+       sum += iter.readAndInc().codePoint;
+   }
+   return sum;
+}
 #endif
 
 }  // namespace U_HEADER_ONLY_NAMESPACE

From c156434b2caf446af0d0ba1d09b70332388112a1 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 2 Jan 2025 13:59:10 -0800
Subject: [PATCH 12/23] readAndInc() for all

---
 icu4c/source/common/unicode/utf16cppiter.h | 70 ++++++++++------------
 1 file changed, 33 insertions(+), 37 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 14f04c0111a3..ae513a3d5b27 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -99,47 +99,58 @@ class U16Iterator {
 public:
     // TODO: make private, make friends
     U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
-            start(start), p(p), limit(limit) {}
+            start(start), current(p), limit(limit) {}
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
 
     U16Iterator(const U16Iterator &other) = default;
 
-    bool operator==(const U16Iterator &other) const { return p == other.p; }
+    bool operator==(const U16Iterator &other) const { return current == other.current; }
     bool operator!=(const U16Iterator &other) const { return !operator==(other); }
 
     U16OneSeq<Unit16, CP32> operator*() const {
-        // TODO: assert p != limit -- more precisely: start <= p < limit
-        // Similar to U16_NEXT_OR_FFFD().
-        CP32 c = *p;
-        if (!U16_IS_SURROGATE(c)) {
-            return {c, 1, true, p};
-        } else {
-            uint16_t c2;
-            if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
-                c = U16_GET_SUPPLEMENTARY(c, c2);
-                return {c, 2, true, p};
-            } else {
-                return {sub(c), 1, false, p};
-            }
-        }
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        const Unit16 *p = current;
+        return readAndInc(p);
     }
 
     U16Iterator &operator++() {  // pre-increment
-        // TODO: assert p != limit -- more precisely: start <= p < limit
-        inc();
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        readAndInc(current);
         return *this;
     }
 
     U16Iterator operator++(int) {  // post-increment
-        // TODO: assert p != limit -- more precisely: start <= p < limit
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
         U16Iterator result(*this);
-        inc();
+        readAndInc(current);
         return result;
     }
 
-    // Fused/optimized *iter++
+    // Explicitly fused/optimized *iter++
     U16OneSeq<Unit16, CP32> readAndInc() {
+        return readAndInc(current);
+    }
+
+    // Same as pre-increment operator++() but slightly faster if used by itself.
+    // operator++() should be used together with operator*() for best compiler optimization.
+    U16Iterator &inc() {
+        // TODO: assert current != limit -- more precisely: start <= current < limit
+        // Very similar to U16_FWD_1().
+        if (U16_IS_LEAD(*current++) && current != limit && U16_IS_TRAIL(*current)) {
+            ++current;
+        }
+        return *this;
+    }
+
+    // TODO: operator--()
+    // TODO: maybe fused decAndRead()?
+
+private:
+    U16OneSeq<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_NEXT_OR_FFFD().
         const Unit16 *p0 = p;
@@ -158,21 +169,6 @@ class U16Iterator {
         }
     }
 
-    // TODO: operator--()
-    // TODO: maybe fused decAndRead()?
-
-private:
-    void inc() {
-        // More similar to U16_NEXT_OR_FFFD() than U16_FWD_1() to try to help the compiler
-        // amortize work between operator*() and operator++(int) in typical *it++ usage.
-        // Otherwise this is slightly less efficient because it tests a lead surrogate twice.
-        CP32 c = *p++;
-        if (U16_IS_SURROGATE(c) &&
-                U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(*p)) {
-            ++p;
-        }
-    }
-
     // Handle ill-formed UTF-16: One unpaired surrogate.
     CP32 sub(CP32 surrogate) const {
         switch (behavior) {
@@ -185,7 +181,7 @@ class U16Iterator {
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
     const Unit16 *const start;
-    const Unit16 *p;
+    const Unit16 *current;
     const Unit16 *const limit;
 };
 

From e0cf8f7a6bce488b1f104d00cdd251b82356cf1f Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 2 Jan 2025 14:55:05 -0800
Subject: [PATCH 13/23] bidirectional

---
 icu4c/source/common/unicode/utf16cppiter.h | 46 ++++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index ae513a3d5b27..dea877887ab0 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -85,7 +85,7 @@ struct U16OneSeq {
 };
 
 /**
- * Validating iterator over the code points in a Unicode 16-bit string.
+ * Validating, bidirectional iterator over the code points in a Unicode 16-bit string.
  *
  * TODO: check doxygen syntax for template parameters
  * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
@@ -130,6 +130,16 @@ class U16Iterator {
         return result;
     }
 
+    U16Iterator &operator--() {  // pre-decrement
+        return dec();
+    }
+
+    U16Iterator operator--(int) {  // post-decrement
+        U16Iterator result(*this);
+        dec();
+        return result;
+    }
+
     // Explicitly fused/optimized *iter++
     U16OneSeq<Unit16, CP32> readAndInc() {
         return readAndInc(current);
@@ -146,8 +156,20 @@ class U16Iterator {
         return *this;
     }
 
-    // TODO: operator--()
-    // TODO: maybe fused decAndRead()?
+    // Explicitly fused/optimized *--iter
+    U16OneSeq<Unit16, CP32> decAndRead() {
+        return decAndRead(current);
+    }
+
+    // Same as pre-decrement operator--(), for API symmetry.
+    U16Iterator &dec() {
+        // TODO: assert current != limit -- more precisely: start <= current < limit
+        // Very similar to U16_BACK_1().
+        if (U16_IS_TRAIL(*(--current)) && current != start && U16_IS_LEAD(*(current - 1))) {
+            --current;
+        }
+        return *this;
+    }
 
 private:
     U16OneSeq<Unit16, CP32> readAndInc(const Unit16 *&p) const {
@@ -169,6 +191,24 @@ class U16Iterator {
         }
     }
 
+    U16OneSeq<Unit16, CP32> decAndRead(const Unit16 *&p) const {
+        // TODO: assert p != limit -- more precisely: start <= p < limit
+        // Very similar to U16_PREV_OR_FFFD().
+        CP32 c = *--p;
+        if (!U16_IS_SURROGATE(c)) {
+            return {c, 1, true, p};
+        } else {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_TRAIL(c) && p != start && U16_IS_LEAD(c2 = *(p - 1))) {
+                --p;
+                c = U16_GET_SUPPLEMENTARY(c2, c);
+                return {c, 2, true, p};
+            } else {
+                return {sub(c), 1, false, p};
+            }
+        }
+    }
+
     // Handle ill-formed UTF-16: One unpaired surrogate.
     CP32 sub(CP32 surrogate) const {
         switch (behavior) {

From ca4787eeffa2a6c248d126a36295c16999e0f8d0 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 2 Jan 2025 15:54:08 -0800
Subject: [PATCH 14/23] efficient rbegin() & rend()

---
 icu4c/source/common/unicode/utf16cppiter.h | 235 +++++++++++++++------
 1 file changed, 169 insertions(+), 66 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index dea877887ab0..c42c4f1823a3 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -34,6 +34,7 @@ namespace header {}
 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
 #define U16_GET_SUPPLEMENTARY(lead, trail) \
     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
@@ -85,93 +86,43 @@ struct U16OneSeq {
 };
 
 /**
- * Validating, bidirectional iterator over the code points in a Unicode 16-bit string.
- *
- * TODO: check doxygen syntax for template parameters
- * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
- * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *             should be signed if U16_BEHAVIOR_NEGATIVE
- * @param U16IllFormedBehavior TODO
- * @draft ICU 77
+ * Internal base class for public U16Iterator & U16ReverseIterator.
+ * Not intended for public subclassing.
+ * @internal
  */
 template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
-class U16Iterator {
-public:
-    // TODO: make private, make friends
-    U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+class U16IteratorBase {
+protected:
+    // @internal
+    U16IteratorBase(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
             start(start), current(p), limit(limit) {}
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
 
-    U16Iterator(const U16Iterator &other) = default;
-
-    bool operator==(const U16Iterator &other) const { return current == other.current; }
-    bool operator!=(const U16Iterator &other) const { return !operator==(other); }
-
-    U16OneSeq<Unit16, CP32> operator*() const {
-        // Call the same function in both operator*() and operator++() so that an
-        // optimizing compiler can easily eliminate redundant work when alternating between the two.
-        const Unit16 *p = current;
-        return readAndInc(p);
-    }
-
-    U16Iterator &operator++() {  // pre-increment
-        // Call the same function in both operator*() and operator++() so that an
-        // optimizing compiler can easily eliminate redundant work when alternating between the two.
-        readAndInc(current);
-        return *this;
-    }
-
-    U16Iterator operator++(int) {  // post-increment
-        // Call the same function in both operator*() and operator++() so that an
-        // optimizing compiler can easily eliminate redundant work when alternating between the two.
-        U16Iterator result(*this);
-        readAndInc(current);
-        return result;
-    }
-
-    U16Iterator &operator--() {  // pre-decrement
-        return dec();
-    }
-
-    U16Iterator operator--(int) {  // post-decrement
-        U16Iterator result(*this);
-        dec();
-        return result;
-    }
-
-    // Explicitly fused/optimized *iter++
-    U16OneSeq<Unit16, CP32> readAndInc() {
-        return readAndInc(current);
-    }
+    // @internal
+    bool operator==(const U16IteratorBase &other) const { return current == other.current; }
+    // @internal
+    bool operator!=(const U16IteratorBase &other) const { return !operator==(other); }
 
-    // Same as pre-increment operator++() but slightly faster if used by itself.
-    // operator++() should be used together with operator*() for best compiler optimization.
-    U16Iterator &inc() {
+    // @internal
+    void inc() {
         // TODO: assert current != limit -- more precisely: start <= current < limit
         // Very similar to U16_FWD_1().
         if (U16_IS_LEAD(*current++) && current != limit && U16_IS_TRAIL(*current)) {
             ++current;
         }
-        return *this;
     }
 
-    // Explicitly fused/optimized *--iter
-    U16OneSeq<Unit16, CP32> decAndRead() {
-        return decAndRead(current);
-    }
-
-    // Same as pre-decrement operator--(), for API symmetry.
-    U16Iterator &dec() {
+    // @internal
+    void dec() {
         // TODO: assert current != limit -- more precisely: start <= current < limit
         // Very similar to U16_BACK_1().
         if (U16_IS_TRAIL(*(--current)) && current != start && U16_IS_LEAD(*(current - 1))) {
             --current;
         }
-        return *this;
     }
 
-private:
+    // @internal
     U16OneSeq<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_NEXT_OR_FFFD().
@@ -191,6 +142,7 @@ class U16Iterator {
         }
     }
 
+    // @internal
     U16OneSeq<Unit16, CP32> decAndRead(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_PREV_OR_FFFD().
@@ -210,6 +162,7 @@ class U16Iterator {
     }
 
     // Handle ill-formed UTF-16: One unpaired surrogate.
+    // @internal
     CP32 sub(CP32 surrogate) const {
         switch (behavior) {
             case U16_BEHAVIOR_NEGATIVE: return U_SENTINEL;
@@ -220,11 +173,141 @@ class U16Iterator {
 
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
+    // @internal
     const Unit16 *const start;
+    // @internal
     const Unit16 *current;
+    // @internal
     const Unit16 *const limit;
 };
 
+/**
+ * Validating bidirectional iterator over the code points in a Unicode 16-bit string.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *             should be signed if U16_BEHAVIOR_NEGATIVE
+ * @param U16IllFormedBehavior TODO
+ * @draft ICU 77
+ */
+template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
+    // FYI: We need to qualify all accesses to super class members because of private inheritance.
+    using Super = U16IteratorBase<Unit16, CP32, behavior>;
+public:
+    // TODO: make private, make friends
+    U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+            Super(start, p, limit) {}
+
+    U16Iterator(const U16Iterator &other) = default;
+
+    bool operator==(const U16Iterator &other) const { return Super::operator==(other); }
+    bool operator!=(const U16Iterator &other) const { return !Super::operator==(other); }
+
+    U16OneSeq<Unit16, CP32> operator*() const {
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        const Unit16 *p = Super::current;
+        return Super::readAndInc(p);
+    }
+
+    U16Iterator &operator++() {  // pre-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        Super::readAndInc(Super::current);
+        return *this;
+    }
+
+    U16Iterator operator++(int) {  // post-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        U16Iterator result(*this);
+        Super::readAndInc(Super::current);
+        return result;
+    }
+
+    U16Iterator &operator--() {  // pre-decrement
+        return Super::dec();
+    }
+
+    U16Iterator operator--(int) {  // post-decrement
+        U16Iterator result(*this);
+        Super::dec();
+        return result;
+    }
+
+    // Same as pre-increment operator++() but slightly faster if used by itself.
+    // operator++() should be used together with operator*() for best compiler optimization.
+    U16Iterator &inc() {
+        Super::inc();
+        return *this;
+    }
+
+    // Same as pre-decrement operator--(), for API symmetry.
+    U16Iterator &dec() {
+        Super::dec();
+        return *this;
+    }
+
+    // Explicitly fused/optimized *iter++
+    U16OneSeq<Unit16, CP32> readAndInc() {
+        return Super::readAndInc(Super::current);
+    }
+
+    // Explicitly fused/optimized *--iter
+    U16OneSeq<Unit16, CP32> decAndRead() {
+        return Super::decAndRead(Super::current);
+    }
+};
+
+/**
+ * Validating reverse iterator over the code points in a Unicode 16-bit string.
+ * Not bidirectional, but optimized for reverse iteration.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *             should be signed if U16_BEHAVIOR_NEGATIVE
+ * @param U16IllFormedBehavior TODO
+ * @draft ICU 77
+ */
+template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
+    using Super = U16IteratorBase<Unit16, CP32, behavior>;
+public:
+    // TODO: make private, make friends
+    U16ReverseIterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+            Super(start, p, limit) {}
+
+    U16ReverseIterator(const U16ReverseIterator &other) = default;
+
+    bool operator==(const U16ReverseIterator &other) const { return Super::operator==(other); }
+    bool operator!=(const U16ReverseIterator &other) const { return !Super::operator==(other); }
+
+    U16OneSeq<Unit16, CP32> operator*() const {
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        const Unit16 *p = Super::current;
+        return Super::decAndRead(p);
+    }
+
+    U16ReverseIterator &operator++() {  // pre-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        Super::decAndRead(Super::current);
+        return *this;
+    }
+
+    U16ReverseIterator operator++(int) {  // post-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        U16ReverseIterator result(*this);
+        Super::decAndRead(Super::current);
+        return result;
+    }
+};
+
 /**
  * A C++ "range" for iterating over all of the code points of a 16-bit Unicode string.
  *
@@ -254,6 +337,17 @@ class U16StringCodePoints {
         return {s.data(), limit, limit};
     }
 
+    /** @draft ICU 77 */
+    U16ReverseIterator<Unit16, CP32, behavior> rbegin() const {
+        const Unit16 *limit = s.data() + s.length();
+        return {s.data(), limit, limit};
+    }
+
+    /** @draft ICU 77 */
+    U16ReverseIterator<Unit16, CP32, behavior> rend() const {
+        return {s.data(), s.data(), s.data() + s.length()};
+    }
+
 private:
     std::basic_string_view<Unit16> s;
 };
@@ -298,6 +392,15 @@ int32_t loopReadAndInc(std::u16string_view s) {
    }
    return sum;
 }
+
+int32_t reverseLoop(std::u16string_view s) {
+   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   int32_t sum = 0;
+   for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
+       sum += (*iter).codePoint;
+   }
+   return sum;
+}
 #endif
 
 }  // namespace U_HEADER_ONLY_NAMESPACE

From a24b710f336243852c21495e74ea97ceba53f4db Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 2 Jan 2025 16:11:26 -0800
Subject: [PATCH 15/23] doxygen tparam

---
 icu4c/source/common/unicode/utf16cppiter.h | 30 +++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index c42c4f1823a3..7b0870d00d08 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -64,10 +64,9 @@ enum U16IllFormedBehavior {
  * A code unit sequence for one code point returned by U16Iterator.
  * TODO: Share with UTF-8?
  *
- * TODO: check doxygen syntax for template parameters
- * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
- * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *             should be signed if U16_BEHAVIOR_NEGATIVE
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U16_BEHAVIOR_NEGATIVE
  * @draft ICU 77
  */
 template<typename Unit16, typename CP32>
@@ -184,11 +183,10 @@ class U16IteratorBase {
 /**
  * Validating bidirectional iterator over the code points in a Unicode 16-bit string.
  *
- * TODO: check doxygen syntax for template parameters
- * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
- * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *             should be signed if U16_BEHAVIOR_NEGATIVE
- * @param U16IllFormedBehavior TODO
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U16_BEHAVIOR_NEGATIVE
+ * @tparam U16IllFormedBehavior TODO
  * @draft ICU 77
  */
 template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
@@ -265,11 +263,10 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
  * Validating reverse iterator over the code points in a Unicode 16-bit string.
  * Not bidirectional, but optimized for reverse iteration.
  *
- * TODO: check doxygen syntax for template parameters
- * @param Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
- * @param CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *             should be signed if U16_BEHAVIOR_NEGATIVE
- * @param U16IllFormedBehavior TODO
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U16_BEHAVIOR_NEGATIVE
+ * @tparam U16IllFormedBehavior TODO
  * @draft ICU 77
  */
 template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
@@ -311,7 +308,10 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
 /**
  * A C++ "range" for iterating over all of the code points of a 16-bit Unicode string.
  *
- * @return a code point iterator.
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U16_BEHAVIOR_NEGATIVE
+ * @tparam U16IllFormedBehavior TODO
  * @draft ICU 77
  */
 template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>

From 633fafafda122c9d49f226a6052f65bd246b704c Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 14:39:37 -0800
Subject: [PATCH 16/23] remove non-standard iter API

---
 icu4c/source/common/unicode/utf16cppiter.h | 52 ----------------------
 1 file changed, 52 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 7b0870d00d08..e9c58d2ab0b0 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -103,24 +103,6 @@ class U16IteratorBase {
     // @internal
     bool operator!=(const U16IteratorBase &other) const { return !operator==(other); }
 
-    // @internal
-    void inc() {
-        // TODO: assert current != limit -- more precisely: start <= current < limit
-        // Very similar to U16_FWD_1().
-        if (U16_IS_LEAD(*current++) && current != limit && U16_IS_TRAIL(*current)) {
-            ++current;
-        }
-    }
-
-    // @internal
-    void dec() {
-        // TODO: assert current != limit -- more precisely: start <= current < limit
-        // Very similar to U16_BACK_1().
-        if (U16_IS_TRAIL(*(--current)) && current != start && U16_IS_LEAD(*(current - 1))) {
-            --current;
-        }
-    }
-
     // @internal
     U16OneSeq<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
@@ -234,29 +216,6 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
         Super::dec();
         return result;
     }
-
-    // Same as pre-increment operator++() but slightly faster if used by itself.
-    // operator++() should be used together with operator*() for best compiler optimization.
-    U16Iterator &inc() {
-        Super::inc();
-        return *this;
-    }
-
-    // Same as pre-decrement operator--(), for API symmetry.
-    U16Iterator &dec() {
-        Super::dec();
-        return *this;
-    }
-
-    // Explicitly fused/optimized *iter++
-    U16OneSeq<Unit16, CP32> readAndInc() {
-        return Super::readAndInc(Super::current);
-    }
-
-    // Explicitly fused/optimized *--iter
-    U16OneSeq<Unit16, CP32> decAndRead() {
-        return Super::decAndRead(Super::current);
-    }
 };
 
 /**
@@ -382,17 +341,6 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
    return sum;
 }
 
-int32_t loopReadAndInc(std::u16string_view s) {
-   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
-   int32_t sum = 0;
-   auto iter = range.begin();
-   auto limit = range.end();
-   while (iter != limit) {
-       sum += iter.readAndInc().codePoint;
-   }
-   return sum;
-}
-
 int32_t reverseLoop(std::u16string_view s) {
    header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;

From 70ef2fa0d37f4a75315d78fab8248d45cca8a4cf Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 14:48:30 -0800
Subject: [PATCH 17/23] C enum UIllFormedBehavior will be shared with 8-bit

---
 icu4c/source/common/unicode/utf16cppiter.h    | 53 +++++++++----------
 icu4c/source/test/intltest/utfcppitertest.cpp | 11 ++--
 2 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index e9c58d2ab0b0..3c60f461b954 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -47,18 +47,15 @@ namespace header {}
 
 #ifndef U_HIDE_DRAFT_API
 
-namespace U_HEADER_ONLY_NAMESPACE {
+// Some defined behaviors for handling ill-formed Unicode strings.
+// TODO: For 8-bit strings, the SURROGATE option does not have an equivalent -- static_assert.
+typedef enum UIllFormedBehavior {
+    U_BEHAVIOR_NEGATIVE,
+    U_BEHAVIOR_FFFD,
+    U_BEHAVIOR_SURROGATE
+} UIllFormedBehavior;
 
-// Some defined behaviors for handling ill-formed 16-bit strings.
-// TODO: Maybe share with 8-bit strings, but the SURROGATE option does not have an equivalent there.
-//
-// TODO: A possible alternative to an enum might be some kind of function template
-// which would be fully customizable.
-enum U16IllFormedBehavior {
-    U16_BEHAVIOR_NEGATIVE,
-    U16_BEHAVIOR_FFFD,
-    U16_BEHAVIOR_SURROGATE
-};
+namespace U_HEADER_ONLY_NAMESPACE {
 
 /**
  * A code unit sequence for one code point returned by U16Iterator.
@@ -66,7 +63,7 @@ enum U16IllFormedBehavior {
  *
  * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *              should be signed if U16_BEHAVIOR_NEGATIVE
+ *              should be signed if U_BEHAVIOR_NEGATIVE
  * @draft ICU 77
  */
 template<typename Unit16, typename CP32>
@@ -89,7 +86,7 @@ struct U16OneSeq {
  * Not intended for public subclassing.
  * @internal
  */
-template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
 class U16IteratorBase {
 protected:
     // @internal
@@ -146,9 +143,9 @@ class U16IteratorBase {
     // @internal
     CP32 sub(CP32 surrogate) const {
         switch (behavior) {
-            case U16_BEHAVIOR_NEGATIVE: return U_SENTINEL;
-            case U16_BEHAVIOR_FFFD: return 0xfffd;
-            case U16_BEHAVIOR_SURROGATE: return surrogate;
+            case U_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+            case U_BEHAVIOR_FFFD: return 0xfffd;
+            case U_BEHAVIOR_SURROGATE: return surrogate;
         }
     }
 
@@ -167,11 +164,11 @@ class U16IteratorBase {
  *
  * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *              should be signed if U16_BEHAVIOR_NEGATIVE
- * @tparam U16IllFormedBehavior TODO
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
 class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
     // FYI: We need to qualify all accesses to super class members because of private inheritance.
     using Super = U16IteratorBase<Unit16, CP32, behavior>;
@@ -224,11 +221,11 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
  *
  * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *              should be signed if U16_BEHAVIOR_NEGATIVE
- * @tparam U16IllFormedBehavior TODO
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
 class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
     using Super = U16IteratorBase<Unit16, CP32, behavior>;
 public:
@@ -269,11 +266,11 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
  *
  * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
- *              should be signed if U16_BEHAVIOR_NEGATIVE
- * @tparam U16IllFormedBehavior TODO
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32, U16IllFormedBehavior behavior>
+template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
 class U16StringCodePoints {
 public:
     /**
@@ -322,7 +319,7 @@ class U16StringCodePoints {
 // TODO: remove experimental sample code
 #ifndef UTYPES_H
 int32_t rangeLoop(std::u16string_view s) {
-   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;
    for (auto seq : range) {
        sum += seq.codePoint;
@@ -331,7 +328,7 @@ int32_t rangeLoop(std::u16string_view s) {
 }
 
 int32_t loopIterPlusPlus(std::u16string_view s) {
-   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;
    auto iter = range.begin();
    auto limit = range.end();
@@ -342,7 +339,7 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
 }
 
 int32_t reverseLoop(std::u16string_view s) {
-   header::U16StringCodePoints<char16_t, UChar32, header::U16_BEHAVIOR_NEGATIVE> range(s);
+   header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;
    for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
        sum += (*iter).codePoint;
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index 15db94e613b0..9fc7c1ab5969 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -20,9 +20,6 @@
 // https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
 using namespace std::string_view_literals;
 
-using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
-using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_FFFD;
-using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_SURROGATE;
 using U_HEADER_ONLY_NAMESPACE::U16Iterator;
 using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
 using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
@@ -58,7 +55,7 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
 void U16IteratorTest::testGood() {
     IcuTestErrorCode errorCode(*this, "testGood");
     std::u16string_view good(u"abçカ🚴"sv);
-    U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(good);
+    U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
@@ -83,7 +80,7 @@ void U16IteratorTest::testNegative() {
     IcuTestErrorCode errorCode(*this, "testNegative");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, UChar32, U16_BEHAVIOR_NEGATIVE> range(bad);
+    U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
@@ -107,7 +104,7 @@ void U16IteratorTest::testFFFD() {
     IcuTestErrorCode errorCode(*this, "testFFFD");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, char32_t, U16_BEHAVIOR_FFFD> range(bad);
+    U16StringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment
@@ -131,7 +128,7 @@ void U16IteratorTest::testSurrogate() {
     IcuTestErrorCode errorCode(*this, "testSurrogate");
     static const char16_t badChars[] = { u'a', 0xd900, u'b', 0xdc05, u'ç' };
     std::u16string_view bad(badChars, 5);
-    U16StringCodePoints<char16_t, uint32_t, U16_BEHAVIOR_SURROGATE> range(bad);
+    U16StringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
     auto iter = range.begin();
     assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
     ++iter;  // pre-increment

From da93999f6d0bac632e0910c033299be37b510f6d Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 14:56:42 -0800
Subject: [PATCH 18/23] CodeUnits result will be shared with 8-bit

---
 icu4c/source/common/unicode/utf16cppiter.h    | 30 +++++++++++--------
 icu4c/source/test/intltest/utfcppitertest.cpp |  1 -
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 3c60f461b954..94d217abdb11 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -58,24 +58,25 @@ typedef enum UIllFormedBehavior {
 namespace U_HEADER_ONLY_NAMESPACE {
 
 /**
- * A code unit sequence for one code point returned by U16Iterator.
- * TODO: Share with UTF-8?
+ * Result of decoding a minimal Unicode code unit sequence.
  *
- * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam Unit Code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
  *              should be signed if U_BEHAVIOR_NEGATIVE
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32>
-struct U16OneSeq {
+template<typename Unit, typename CP32>
+struct CodeUnits {
     // Order of fields with padding and access frequency in mind.
     CP32 codePoint = 0;
     uint8_t length = 0;
     bool isWellFormed = false;
-    const Unit16 *data;
+    const Unit *data;
 
-    std::basic_string_view<Unit16> stringView() const {
-        return std::basic_string_view<Unit16>(data, length);
+    std::basic_string_view<Unit> stringView() const {
+        return std::basic_string_view<Unit>(data, length);
     }
 
     // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if !isWellFormed)
@@ -84,6 +85,11 @@ struct U16OneSeq {
 /**
  * Internal base class for public U16Iterator & U16ReverseIterator.
  * Not intended for public subclassing.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @tparam UIllFormedBehavior TODO
  * @internal
  */
 template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
@@ -101,7 +107,7 @@ class U16IteratorBase {
     bool operator!=(const U16IteratorBase &other) const { return !operator==(other); }
 
     // @internal
-    U16OneSeq<Unit16, CP32> readAndInc(const Unit16 *&p) const {
+    CodeUnits<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_NEXT_OR_FFFD().
         const Unit16 *p0 = p;
@@ -121,7 +127,7 @@ class U16IteratorBase {
     }
 
     // @internal
-    U16OneSeq<Unit16, CP32> decAndRead(const Unit16 *&p) const {
+    CodeUnits<Unit16, CP32> decAndRead(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_PREV_OR_FFFD().
         CP32 c = *--p;
@@ -182,7 +188,7 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
     bool operator==(const U16Iterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16Iterator &other) const { return !Super::operator==(other); }
 
-    U16OneSeq<Unit16, CP32> operator*() const {
+    CodeUnits<Unit16, CP32> operator*() const {
         // Call the same function in both operator*() and operator++() so that an
         // optimizing compiler can easily eliminate redundant work when alternating between the two.
         const Unit16 *p = Super::current;
@@ -238,7 +244,7 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
     bool operator==(const U16ReverseIterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16ReverseIterator &other) const { return !Super::operator==(other); }
 
-    U16OneSeq<Unit16, CP32> operator*() const {
+    CodeUnits<Unit16, CP32> operator*() const {
         // Call the same function in both operator*() and operator++() so that an
         // optimizing compiler can easily eliminate redundant work when alternating between the two.
         const Unit16 *p = Super::current;
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index 9fc7c1ab5969..cbb9c9728ba7 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -21,7 +21,6 @@
 using namespace std::string_view_literals;
 
 using U_HEADER_ONLY_NAMESPACE::U16Iterator;
-using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
 using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
 
 class U16IteratorTest : public IntlTest {

From 5c6e1a6a76cf8da739dc5a54daf9a59d042c8555 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 15:16:35 -0800
Subject: [PATCH 19/23] CodeUnits: getters / private fields

---
 icu4c/source/common/unicode/utf16cppiter.h    | 41 ++++++---
 icu4c/source/test/intltest/utfcppitertest.cpp | 86 +++++++++----------
 2 files changed, 72 insertions(+), 55 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 94d217abdb11..529bc844d70d 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -68,18 +68,35 @@ namespace U_HEADER_ONLY_NAMESPACE {
  * @draft ICU 77
  */
 template<typename Unit, typename CP32>
-struct CodeUnits {
-    // Order of fields with padding and access frequency in mind.
-    CP32 codePoint = 0;
-    uint8_t length = 0;
-    bool isWellFormed = false;
-    const Unit *data;
+class CodeUnits {
+public:
+    // @internal
+    CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, const Unit *data) :
+            c(codePoint), len(length), ok(wellFormed), p(data) {}
+
+    CodeUnits(const CodeUnits &other) = default;
+    CodeUnits &operator=(const CodeUnits &other) = default;
+
+    UChar32 codePoint() const { return c; }
+
+    bool wellFormed() const { return ok; }
+
+    const Unit *data() const { return p; }
+
+    int32_t length() const { return len; }
 
     std::basic_string_view<Unit> stringView() const {
-        return std::basic_string_view<Unit>(data, length);
+        return std::basic_string_view<Unit>(p, len);
     }
 
-    // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if !isWellFormed)
+    // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if ill-formed)
+
+private:
+    // Order of fields with padding and access frequency in mind.
+    CP32 c;
+    uint8_t len;
+    bool ok;
+    const Unit *p;
 };
 
 /**
@@ -327,8 +344,8 @@ class U16StringCodePoints {
 int32_t rangeLoop(std::u16string_view s) {
    header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;
-   for (auto seq : range) {
-       sum += seq.codePoint;
+   for (auto units : range) {
+       sum += units.codePoint();
    }
    return sum;
 }
@@ -339,7 +356,7 @@ int32_t loopIterPlusPlus(std::u16string_view s) {
    auto iter = range.begin();
    auto limit = range.end();
    while (iter != limit) {
-       sum += (*iter++).codePoint;
+       sum += (*iter++).codePoint();
    }
    return sum;
 }
@@ -348,7 +365,7 @@ int32_t reverseLoop(std::u16string_view s) {
    header::U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(s);
    int32_t sum = 0;
    for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
-       sum += (*iter).codePoint;
+       sum += (*iter).codePoint();
    }
    return sum;
 }
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index cbb9c9728ba7..16ed3d0c627a 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -56,22 +56,22 @@ void U16IteratorTest::testGood() {
     std::u16string_view good(u"abçカ🚴"sv);
     U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
     auto iter = range.begin();
-    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
     ++iter;  // pre-increment
-    auto seq = *iter;
-    assertEquals("iter[1] * codePoint", u'b', seq.codePoint);
-    assertEquals("iter[1] * length", 1, seq.length);
-    assertTrue("iter[1] * isWellFormed", seq.isWellFormed);
-    assertTrue("iter[1] * stringView()", seq.stringView() == u"b"sv);
+    auto units = *iter;
+    assertEquals("iter[1] * codePoint", u'b', units.codePoint());
+    assertEquals("iter[1] * length", 1, units.length());
+    assertTrue("iter[1] * wellFormed", units.wellFormed());
+    assertTrue("iter[1] * stringView()", units.stringView() == u"b"sv);
     ++iter;
-    assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint);  // post-increment
-    assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint);
+    assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint());  // post-increment
+    assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint());
     ++iter;
-    seq = *iter++;
-    assertEquals("iter[4] * codePoint", U'🚴', seq.codePoint);
-    assertEquals("iter[4] * length", 2, seq.length);
-    assertTrue("iter[4] * isWellFormed", seq.isWellFormed);
-    assertTrue("iter[4] * stringView()", seq.stringView() == u"🚴"sv);
+    units = *iter++;
+    assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
+    assertEquals("iter[4] * length", 2, units.length());
+    assertTrue("iter[4] * wellFormed", units.wellFormed());
+    assertTrue("iter[4] * stringView()", units.stringView() == u"🚴"sv);
     assertTrue("iter == endIter", iter == range.end());
 }
 
@@ -81,20 +81,20 @@ void U16IteratorTest::testNegative() {
     std::u16string_view bad(badChars, 5);
     U16StringCodePoints<char16_t, UChar32, U_BEHAVIOR_NEGATIVE> range(bad);
     auto iter = range.begin();
-    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
     ++iter;  // pre-increment
-    auto seq = *iter;
-    assertEquals("iter[1] * codePoint", -1, seq.codePoint);
-    assertEquals("iter[1] * length", 1, seq.length);
-    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
-    auto sv = seq.stringView();
+    auto units = *iter;
+    assertEquals("iter[1] * codePoint", -1, units.codePoint());
+    assertEquals("iter[1] * length", 1, units.length());
+    assertFalse("iter[1] * wellFormed", units.wellFormed());
+    auto sv = units.stringView();
     assertEquals("iter[1] * stringView().length()", 1, sv.length());
     assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
     ++iter;
-    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
-    seq = *iter++;  // post-increment
-    assertEquals("iter[3] * codePoint", -1, seq.codePoint);
-    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint());  // post-increment
+    units = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", -1, units.codePoint());
+    assertFalse("iter[3] * wellFormed", units.wellFormed());
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
     assertTrue("iter == endIter", iter == range.end());
 }
@@ -105,20 +105,20 @@ void U16IteratorTest::testFFFD() {
     std::u16string_view bad(badChars, 5);
     U16StringCodePoints<char16_t, char32_t, U_BEHAVIOR_FFFD> range(bad);
     auto iter = range.begin();
-    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
     ++iter;  // pre-increment
-    auto seq = *iter;
-    assertEquals("iter[1] * codePoint", 0xfffd, seq.codePoint);
-    assertEquals("iter[1] * length", 1, seq.length);
-    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
-    auto sv = seq.stringView();
+    auto units = *iter;
+    assertEquals("iter[1] * codePoint", 0xfffd, units.codePoint());
+    assertEquals("iter[1] * length", 1, units.length());
+    assertFalse("iter[1] * wellFormed", units.wellFormed());
+    auto sv = units.stringView();
     assertEquals("iter[1] * stringView().length()", 1, sv.length());
     assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
     ++iter;
-    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
-    seq = *iter++;  // post-increment
-    assertEquals("iter[3] * codePoint", 0xfffd, seq.codePoint);
-    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint());  // post-increment
+    units = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", 0xfffd, units.codePoint());
+    assertFalse("iter[3] * wellFormed", units.wellFormed());
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
     assertTrue("iter == endIter", iter == range.end());
 }
@@ -129,20 +129,20 @@ void U16IteratorTest::testSurrogate() {
     std::u16string_view bad(badChars, 5);
     U16StringCodePoints<char16_t, uint32_t, U_BEHAVIOR_SURROGATE> range(bad);
     auto iter = range.begin();
-    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint);
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
     ++iter;  // pre-increment
-    auto seq = *iter;
-    assertEquals("iter[1] * codePoint", 0xd900, seq.codePoint);
-    assertEquals("iter[1] * length", 1, seq.length);
-    assertFalse("iter[1] * isWellFormed", seq.isWellFormed);
-    auto sv = seq.stringView();
+    auto units = *iter;
+    assertEquals("iter[1] * codePoint", 0xd900, units.codePoint());
+    assertEquals("iter[1] * length", 1, units.length());
+    assertFalse("iter[1] * wellFormed", units.wellFormed());
+    auto sv = units.stringView();
     assertEquals("iter[1] * stringView().length()", 1, sv.length());
     assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
     ++iter;
-    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint);  // post-increment
-    seq = *iter++;  // post-increment
-    assertEquals("iter[3] * codePoint", 0xdc05, seq.codePoint);
-    assertFalse("iter[3] * isWellFormed", seq.isWellFormed);
+    assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint());  // post-increment
+    units = *iter++;  // post-increment
+    assertEquals("iter[3] * codePoint", 0xdc05, units.codePoint());
+    assertFalse("iter[3] * wellFormed", units.wellFormed());
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
     assertTrue("iter == endIter", iter == range.end());
 }

From 84dc5f46195f9e22bb0fe307fa197aa16ef78d63 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 16:05:05 -0800
Subject: [PATCH 20/23] unsafe=well-formed iterators

---
 icu4c/source/common/unicode/utf16cppiter.h | 284 ++++++++++++++++++++-
 1 file changed, 277 insertions(+), 7 deletions(-)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 529bc844d70d..777cea6174e4 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -58,7 +58,8 @@ typedef enum UIllFormedBehavior {
 namespace U_HEADER_ONLY_NAMESPACE {
 
 /**
- * Result of decoding a minimal Unicode code unit sequence.
+ * Result of validating and decoding a minimal Unicode code unit sequence.
+ * Returned from validating Unicode string code point iterators.
  *
  * @tparam Unit Code unit type:
  *     UTF-8: char or char8_t or uint8_t;
@@ -99,6 +100,46 @@ class CodeUnits {
     const Unit *p;
 };
 
+/**
+ * Result of decoding a minimal Unicode code unit sequence which must be well-formed.
+ * Returned from non-validating Unicode string code point iterators.
+ *
+ * @tparam Unit Code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template<typename Unit, typename CP32>
+class UnsafeCodeUnits {
+public:
+    // @internal
+    UnsafeCodeUnits(CP32 codePoint, uint8_t length, const Unit *data) :
+            c(codePoint), len(length), p(data) {}
+
+    UnsafeCodeUnits(const UnsafeCodeUnits &other) = default;
+    UnsafeCodeUnits &operator=(const UnsafeCodeUnits &other) = default;
+
+    UChar32 codePoint() const { return c; }
+
+    const Unit *data() const { return p; }
+
+    int32_t length() const { return len; }
+
+    std::basic_string_view<Unit> stringView() const {
+        return std::basic_string_view<Unit>(p, len);
+    }
+
+    // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if ill-formed)
+
+private:
+    // Order of fields with padding and access frequency in mind.
+    CP32 c;
+    uint8_t len;
+    const Unit *p;
+};
+
 /**
  * Internal base class for public U16Iterator & U16ReverseIterator.
  * Not intended for public subclassing.
@@ -118,6 +159,11 @@ class U16IteratorBase {
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
 
+    // @internal
+    U16IteratorBase(const U16IteratorBase &other) = default;
+    // @internal
+    U16IteratorBase &operator=(const U16IteratorBase &other) = default;
+
     // @internal
     bool operator==(const U16IteratorBase &other) const { return current == other.current; }
     // @internal
@@ -201,6 +247,7 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
             Super(start, p, limit) {}
 
     U16Iterator(const U16Iterator &other) = default;
+    U16Iterator &operator=(const U16Iterator &other) = default;
 
     bool operator==(const U16Iterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16Iterator &other) const { return !Super::operator==(other); }
@@ -257,6 +304,7 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
             Super(start, p, limit) {}
 
     U16ReverseIterator(const U16ReverseIterator &other) = default;
+    U16ReverseIterator &operator=(const U16ReverseIterator &other) = default;
 
     bool operator==(const U16ReverseIterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16ReverseIterator &other) const { return !Super::operator==(other); }
@@ -285,7 +333,7 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
 };
 
 /**
- * A C++ "range" for iterating over all of the code points of a 16-bit Unicode string.
+ * A C++ "range" for validating iteration over all of the code points of a 16-bit Unicode string.
  *
  * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
@@ -305,6 +353,9 @@ class U16StringCodePoints {
     /** @draft ICU 77 */
     U16StringCodePoints(const U16StringCodePoints &other) = default;
 
+    /** @draft ICU 77 */
+    U16StringCodePoints &operator=(const U16StringCodePoints &other) = default;
+
     /** @draft ICU 77 */
     U16Iterator<Unit16, CP32, behavior> begin() const {
         return {s.data(), s.data(), s.data() + s.length()};
@@ -333,11 +384,212 @@ class U16StringCodePoints {
 
 // ------------------------------------------------------------------------- ***
 
-// TODO: Non-validating iterator over the code points in a Unicode 16-bit string.
-// Assumes well-formed UTF-16. Otherwise the behavior is undefined.
-// template<typename Unit16>
-// class U16UnsafeIterator
-// TODO: only p, no start, no limit
+/**
+ * Internal base class for public U16UnsafeIterator & U16UnsafeReverseIterator.
+ * Not intended for public subclassing.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @internal
+ */
+template<typename Unit16, typename CP32>
+class U16UnsafeIteratorBase {
+protected:
+    // @internal
+    U16UnsafeIteratorBase(const Unit16 *p) : current(p) {}
+    // Test pointers for == or != but not < or >.
+
+    // @internal
+    U16UnsafeIteratorBase(const U16UnsafeIteratorBase &other) = default;
+    // @internal
+    U16UnsafeIteratorBase &operator=(const U16UnsafeIteratorBase &other) = default;
+
+    // @internal
+    bool operator==(const U16UnsafeIteratorBase &other) const { return current == other.current; }
+    // @internal
+    bool operator!=(const U16UnsafeIteratorBase &other) const { return !operator==(other); }
+
+    // @internal
+    UnsafeCodeUnits<Unit16, CP32> readAndInc(const Unit16 *&p) const {
+        // Very similar to U16_NEXT_UNSAFE().
+        const Unit16 *p0 = p;
+        CP32 c = *p++;
+        if (!U16_IS_LEAD(c)) {
+            return {c, 1, p0};
+        } else {
+            c = U16_GET_SUPPLEMENTARY(c, *p++);
+            return {c, 2, p0};
+        }
+    }
+
+    // @internal
+    UnsafeCodeUnits<Unit16, CP32> decAndRead(const Unit16 *&p) const {
+        // Very similar to U16_PREV_UNSAFE().
+        CP32 c = *--p;
+        if (!U16_IS_TRAIL(c)) {
+            return {c, 1, p};
+        } else {
+            c = U16_GET_SUPPLEMENTARY(*--p, c);
+            return {c, 2, p};
+        }
+    }
+
+    // @internal
+    const Unit16 *current;
+};
+
+/**
+ * Non-validating bidirectional iterator over the code points in a UTF-16 string.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template<typename Unit16, typename CP32>
+class U16UnsafeIterator : private U16UnsafeIteratorBase<Unit16, CP32> {
+    // FYI: We need to qualify all accesses to super class members because of private inheritance.
+    using Super = U16UnsafeIteratorBase<Unit16, CP32>;
+public:
+    // TODO: make private, make friends
+    U16UnsafeIterator(const Unit16 *p) : Super(p) {}
+
+    U16UnsafeIterator(const U16UnsafeIterator &other) = default;
+    U16UnsafeIterator &operator=(const U16UnsafeIterator &other) = default;
+
+    bool operator==(const U16UnsafeIterator &other) const { return Super::operator==(other); }
+    bool operator!=(const U16UnsafeIterator &other) const { return !Super::operator==(other); }
+
+    UnsafeCodeUnits<Unit16, CP32> operator*() const {
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        const Unit16 *p = Super::current;
+        return Super::readAndInc(p);
+    }
+
+    U16UnsafeIterator &operator++() {  // pre-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        Super::readAndInc(Super::current);
+        return *this;
+    }
+
+    U16UnsafeIterator operator++(int) {  // post-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        U16UnsafeIterator result(*this);
+        Super::readAndInc(Super::current);
+        return result;
+    }
+
+    U16UnsafeIterator &operator--() {  // pre-decrement
+        return Super::dec();
+    }
+
+    U16UnsafeIterator operator--(int) {  // post-decrement
+        U16UnsafeIterator result(*this);
+        Super::dec();
+        return result;
+    }
+};
+
+/**
+ * Non-validating reverse iterator over the code points in a UTF-16 string.
+ * Not bidirectional, but optimized for reverse iteration.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template<typename Unit16, typename CP32>
+class U16UnsafeReverseIterator : private U16UnsafeIteratorBase<Unit16, CP32> {
+    using Super = U16UnsafeIteratorBase<Unit16, CP32>;
+public:
+    // TODO: make private, make friends
+    U16UnsafeReverseIterator(const Unit16 *p) : Super(p) {}
+
+    U16UnsafeReverseIterator(const U16UnsafeReverseIterator &other) = default;
+    U16UnsafeReverseIterator &operator=(const U16UnsafeReverseIterator &other) = default;
+
+    bool operator==(const U16UnsafeReverseIterator &other) const { return Super::operator==(other); }
+    bool operator!=(const U16UnsafeReverseIterator &other) const { return !Super::operator==(other); }
+
+    UnsafeCodeUnits<Unit16, CP32> operator*() const {
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        const Unit16 *p = Super::current;
+        return Super::decAndRead(p);
+    }
+
+    U16UnsafeReverseIterator &operator++() {  // pre-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        Super::decAndRead(Super::current);
+        return *this;
+    }
+
+    U16UnsafeReverseIterator operator++(int) {  // post-increment
+        // Call the same function in both operator*() and operator++() so that an
+        // optimizing compiler can easily eliminate redundant work when alternating between the two.
+        U16UnsafeReverseIterator result(*this);
+        Super::decAndRead(Super::current);
+        return result;
+    }
+};
+
+/**
+ * A C++ "range" for non-validating iteration over all of the code points of a UTF-16 string.
+ * The string must be well-formed.
+ *
+ * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if U_BEHAVIOR_NEGATIVE
+ * @draft ICU 77
+ */
+template<typename Unit16, typename CP32>
+class U16UnsafeStringCodePoints {
+public:
+    /**
+     * Constructs a C++ "range" object over the code points in the string.
+     * @draft ICU 77
+     */
+    U16UnsafeStringCodePoints(std::basic_string_view<Unit16> s) : s(s) {}
+
+    /** @draft ICU 77 */
+    U16UnsafeStringCodePoints(const U16UnsafeStringCodePoints &other) = default;
+    U16UnsafeStringCodePoints &operator=(const U16UnsafeStringCodePoints &other) = default;
+
+    /** @draft ICU 77 */
+    U16UnsafeIterator<Unit16, CP32> begin() const {
+        return {s.data()};
+    }
+
+    /** @draft ICU 77 */
+    U16UnsafeIterator<Unit16, CP32> end() const {
+        return {s.data() + s.length()};
+    }
+
+    /** @draft ICU 77 */
+    U16UnsafeReverseIterator<Unit16, CP32> rbegin() const {
+        return {s.data() + s.length()};
+    }
+
+    /** @draft ICU 77 */
+    U16UnsafeReverseIterator<Unit16, CP32> rend() const {
+        return {s.data()};
+    }
+
+private:
+    std::basic_string_view<Unit16> s;
+};
+
+// ------------------------------------------------------------------------- ***
+
+// TODO: UTF-8
 
 // TODO: remove experimental sample code
 #ifndef UTYPES_H
@@ -369,6 +621,24 @@ int32_t reverseLoop(std::u16string_view s) {
    }
    return sum;
 }
+
+int32_t unsafeRangeLoop(std::u16string_view s) {
+   header::U16UnsafeStringCodePoints<char16_t, UChar32> range(s);
+   int32_t sum = 0;
+   for (auto units : range) {
+       sum += units.codePoint();
+   }
+   return sum;
+}
+
+int32_t unsafeReverseLoop(std::u16string_view s) {
+   header::U16UnsafeStringCodePoints<char16_t, UChar32> range(s);
+   int32_t sum = 0;
+   for (auto iter = range.rbegin(); iter != range.rend(); ++iter) {
+       sum += (*iter).codePoint();
+   }
+   return sum;
+}
 #endif
 
 }  // namespace U_HEADER_ONLY_NAMESPACE

From 8bea75e17ba91277e5c023aaebbfa43de38688da Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 16:10:05 -0800
Subject: [PATCH 21/23] restore base dec() (oops)

---
 icu4c/source/common/unicode/utf16cppiter.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 777cea6174e4..bcb7221fd5b3 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -169,6 +169,15 @@ class U16IteratorBase {
     // @internal
     bool operator!=(const U16IteratorBase &other) const { return !operator==(other); }
 
+    // @internal
+    void dec() {
+        // TODO: assert current != limit -- more precisely: start <= current < limit
+        // Very similar to U16_BACK_1().
+        if (U16_IS_TRAIL(*(--current)) && current != start && U16_IS_LEAD(*(current - 1))) {
+            --current;
+        }
+    }
+
     // @internal
     CodeUnits<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
@@ -410,6 +419,14 @@ class U16UnsafeIteratorBase {
     // @internal
     bool operator!=(const U16UnsafeIteratorBase &other) const { return !operator==(other); }
 
+    // @internal
+    void dec() {
+        // Very similar to U16_BACK_1_UNSAFE().
+        if (U16_IS_TRAIL(*(--current))) {
+            --current;
+        }
+    }
+
     // @internal
     UnsafeCodeUnits<Unit16, CP32> readAndInc(const Unit16 *&p) const {
         // Very similar to U16_NEXT_UNSAFE().

From 5281d61f6d0c2315792c5e82d91eb650f61eaa2f Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Mon, 6 Jan 2025 17:37:42 -0800
Subject: [PATCH 22/23] rename to utfiter.h, also test

---
 icu4c/source/common/unicode/{utf16cppiter.h => utfiter.h}     | 0
 icu4c/source/test/intltest/Makefile.in                        | 2 +-
 icu4c/source/test/intltest/intltest.vcxproj                   | 2 +-
 icu4c/source/test/intltest/intltest.vcxproj.filters           | 2 +-
 .../test/intltest/{utfcppitertest.cpp => utfitertest.cpp}     | 4 ++--
 5 files changed, 5 insertions(+), 5 deletions(-)
 rename icu4c/source/common/unicode/{utf16cppiter.h => utfiter.h} (100%)
 rename icu4c/source/test/intltest/{utfcppitertest.cpp => utfitertest.cpp} (99%)

diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utfiter.h
similarity index 100%
rename from icu4c/source/common/unicode/utf16cppiter.h
rename to icu4c/source/common/unicode/utfiter.h
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index 64f36bd061f8..8a12daa2f5de 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -75,7 +75,7 @@ numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
 static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
 formattedvaluetest.o formatted_string_builder_test.o numbertest_permutation.o \
 units_data_test.o units_router_test.o units_test.o displayoptions_test.o \
-numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o utfcppitertest.o
+numbertest_simple.o uchar_type_build_test.o usetheaderonlytest.o utfitertest.o
 
 DEPS = $(OBJECTS:.o=.d)
 
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index 8d9bba021508..476b4b3b5934 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -223,7 +223,7 @@
     <ClCompile Include="sfwdchit.cpp" />
     <ClCompile Include="strcase.cpp" />
     <ClCompile Include="ustrtest.cpp" />
-    <ClCompile Include="utfcppitertest.cpp" />
+    <ClCompile Include="utfitertest.cpp" />
     <ClCompile Include="utxttest.cpp" />
     <ClCompile Include="cpdtrtst.cpp" />
     <ClCompile Include="ittrans.cpp" />
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index 0abc4608d1a6..7fc0c646647a 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -490,7 +490,7 @@
     <ClCompile Include="ustrtest.cpp">
       <Filter>strings</Filter>
     </ClCompile>
-    <ClCompile Include="utfcppitertest.cpp">
+    <ClCompile Include="utfitertest.cpp">
       <Filter>strings</Filter>
     </ClCompile>
     <ClCompile Include="utxttest.cpp">
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfitertest.cpp
similarity index 99%
rename from icu4c/source/test/intltest/utfcppitertest.cpp
rename to icu4c/source/test/intltest/utfitertest.cpp
index 16ed3d0c627a..8f40229abe16 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfitertest.cpp
@@ -1,7 +1,7 @@
 // © 2024 and later: Unicode, Inc. and others.
 // License & terms of use: https://www.unicode.org/copyright.html
 
-// utfcppitertest.cpp
+// utfitertest.cpp
 // created: 2024aug12 Markus W. Scherer
 
 #include <string_view>
@@ -13,7 +13,7 @@
 // #define U_SHOW_CPLUSPLUS_HEADER_API 1
 
 #include "unicode/utypes.h"
-#include "unicode/utf16cppiter.h"
+#include "unicode/utfiter.h"
 #include "intltest.h"
 
 // Makes u"literal"sv std::u16string_view literals possible.

From 035c2c1fba9b106dc166a2273dff371705fb93c8 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Tue, 7 Jan 2025 16:32:35 -0800
Subject: [PATCH 23/23] validating iter based on other iter

---
 icu4c/source/common/unicode/utfiter.h      | 81 ++++++++++++----------
 icu4c/source/test/intltest/utfitertest.cpp | 61 ++++++++++++++++
 2 files changed, 105 insertions(+), 37 deletions(-)

diff --git a/icu4c/source/common/unicode/utfiter.h b/icu4c/source/common/unicode/utfiter.h
index bcb7221fd5b3..78252ddebc14 100644
--- a/icu4c/source/common/unicode/utfiter.h
+++ b/icu4c/source/common/unicode/utfiter.h
@@ -61,18 +61,19 @@ namespace U_HEADER_ONLY_NAMESPACE {
  * Result of validating and decoding a minimal Unicode code unit sequence.
  * Returned from validating Unicode string code point iterators.
  *
- * @tparam Unit Code unit type:
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
  *     UTF-8: char or char8_t or uint8_t;
  *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
  *              should be signed if U_BEHAVIOR_NEGATIVE
  * @draft ICU 77
  */
-template<typename Unit, typename CP32>
+template<typename UnitIter, typename CP32>
 class CodeUnits {
+    using Unit = typename std::iterator_traits<UnitIter>::value_type;
 public:
     // @internal
-    CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, const Unit *data) :
+    CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, UnitIter data) :
             c(codePoint), len(length), ok(wellFormed), p(data) {}
 
     CodeUnits(const CodeUnits &other) = default;
@@ -82,22 +83,24 @@ class CodeUnits {
 
     bool wellFormed() const { return ok; }
 
-    const Unit *data() const { return p; }
+    UnitIter data() const { return p; }
 
     int32_t length() const { return len; }
 
-    std::basic_string_view<Unit> stringView() const {
+    template<typename Iter = UnitIter>
+    std::enable_if_t<
+        std::is_pointer_v<Iter>,
+        std::basic_string_view<Unit>>
+    stringView() const {
         return std::basic_string_view<Unit>(p, len);
     }
 
-    // TODO: std::optional<CP32> maybeCodePoint() const ? (nullopt if ill-formed)
-
 private:
     // Order of fields with padding and access frequency in mind.
     CP32 c;
     uint8_t len;
     bool ok;
-    const Unit *p;
+    UnitIter p;
 };
 
 /**
@@ -144,17 +147,18 @@ class UnsafeCodeUnits {
  * Internal base class for public U16Iterator & U16ReverseIterator.
  * Not intended for public subclassing.
  *
- * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
  *              should be signed if U_BEHAVIOR_NEGATIVE
  * @tparam UIllFormedBehavior TODO
  * @internal
  */
-template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
+template<typename UnitIter, typename CP32, UIllFormedBehavior behavior>
 class U16IteratorBase {
 protected:
     // @internal
-    U16IteratorBase(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+    U16IteratorBase(UnitIter start, UnitIter p, UnitIter limit) :
             start(start), current(p), limit(limit) {}
     // TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
     // Test pointers for == or != but not < or >.
@@ -179,10 +183,10 @@ class U16IteratorBase {
     }
 
     // @internal
-    CodeUnits<Unit16, CP32> readAndInc(const Unit16 *&p) const {
+    CodeUnits<UnitIter, CP32> readAndInc(UnitIter &p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_NEXT_OR_FFFD().
-        const Unit16 *p0 = p;
+        UnitIter p0 = p;
         CP32 c = *p++;
         if (!U16_IS_SURROGATE(c)) {
             return {c, 1, true, p0};
@@ -199,16 +203,17 @@ class U16IteratorBase {
     }
 
     // @internal
-    CodeUnits<Unit16, CP32> decAndRead(const Unit16 *&p) const {
+    CodeUnits<UnitIter, CP32> decAndRead(UnitIter &p) const {
         // TODO: assert p != limit -- more precisely: start <= p < limit
         // Very similar to U16_PREV_OR_FFFD().
         CP32 c = *--p;
         if (!U16_IS_SURROGATE(c)) {
             return {c, 1, true, p};
         } else {
+            UnitIter p1;
             uint16_t c2;
-            if (U16_IS_SURROGATE_TRAIL(c) && p != start && U16_IS_LEAD(c2 = *(p - 1))) {
-                --p;
+            if (U16_IS_SURROGATE_TRAIL(c) && p != start && (p1 = p--, U16_IS_LEAD(c2 = *p1))) {
+                p = p1;
                 c = U16_GET_SUPPLEMENTARY(c2, c);
                 return {c, 2, true, p};
             } else {
@@ -230,29 +235,30 @@ class U16IteratorBase {
     // In a validating iterator, we need start & limit so that when we read a code point
     // (forward or backward) we can test if there are enough code units.
     // @internal
-    const Unit16 *const start;
+    const UnitIter start;
     // @internal
-    const Unit16 *current;
+    UnitIter current;
     // @internal
-    const Unit16 *const limit;
+    const UnitIter limit;
 };
 
 /**
  * Validating bidirectional iterator over the code points in a Unicode 16-bit string.
  *
- * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
  *              should be signed if U_BEHAVIOR_NEGATIVE
  * @tparam UIllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
-class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
+template<typename UnitIter, typename CP32, UIllFormedBehavior behavior>
+class U16Iterator : private U16IteratorBase<UnitIter, CP32, behavior> {
     // FYI: We need to qualify all accesses to super class members because of private inheritance.
-    using Super = U16IteratorBase<Unit16, CP32, behavior>;
+    using Super = U16IteratorBase<UnitIter, CP32, behavior>;
 public:
     // TODO: make private, make friends
-    U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+    U16Iterator(UnitIter start, UnitIter p, UnitIter limit) :
             Super(start, p, limit) {}
 
     U16Iterator(const U16Iterator &other) = default;
@@ -261,10 +267,10 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
     bool operator==(const U16Iterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16Iterator &other) const { return !Super::operator==(other); }
 
-    CodeUnits<Unit16, CP32> operator*() const {
+    CodeUnits<UnitIter, CP32> operator*() const {
         // Call the same function in both operator*() and operator++() so that an
         // optimizing compiler can easily eliminate redundant work when alternating between the two.
-        const Unit16 *p = Super::current;
+        UnitIter p = Super::current;
         return Super::readAndInc(p);
     }
 
@@ -298,18 +304,19 @@ class U16Iterator : private U16IteratorBase<Unit16, CP32, behavior> {
  * Validating reverse iterator over the code points in a Unicode 16-bit string.
  * Not bidirectional, but optimized for reverse iteration.
  *
- * @tparam Unit16 Code unit type: char16_t or uint16_t or (on Windows) wchar_t
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t
  * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
  *              should be signed if U_BEHAVIOR_NEGATIVE
  * @tparam UIllFormedBehavior TODO
  * @draft ICU 77
  */
-template<typename Unit16, typename CP32, UIllFormedBehavior behavior>
-class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
-    using Super = U16IteratorBase<Unit16, CP32, behavior>;
+template<typename UnitIter, typename CP32, UIllFormedBehavior behavior>
+class U16ReverseIterator : private U16IteratorBase<UnitIter, CP32, behavior> {
+    using Super = U16IteratorBase<UnitIter, CP32, behavior>;
 public:
     // TODO: make private, make friends
-    U16ReverseIterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
+    U16ReverseIterator(UnitIter start, UnitIter p, UnitIter limit) :
             Super(start, p, limit) {}
 
     U16ReverseIterator(const U16ReverseIterator &other) = default;
@@ -318,10 +325,10 @@ class U16ReverseIterator : private U16IteratorBase<Unit16, CP32, behavior> {
     bool operator==(const U16ReverseIterator &other) const { return Super::operator==(other); }
     bool operator!=(const U16ReverseIterator &other) const { return !Super::operator==(other); }
 
-    CodeUnits<Unit16, CP32> operator*() const {
+    CodeUnits<UnitIter, CP32> operator*() const {
         // Call the same function in both operator*() and operator++() so that an
         // optimizing compiler can easily eliminate redundant work when alternating between the two.
-        const Unit16 *p = Super::current;
+        UnitIter p = Super::current;
         return Super::decAndRead(p);
     }
 
@@ -366,24 +373,24 @@ class U16StringCodePoints {
     U16StringCodePoints &operator=(const U16StringCodePoints &other) = default;
 
     /** @draft ICU 77 */
-    U16Iterator<Unit16, CP32, behavior> begin() const {
+    U16Iterator<const Unit16 *, CP32, behavior> begin() const {
         return {s.data(), s.data(), s.data() + s.length()};
     }
 
     /** @draft ICU 77 */
-    U16Iterator<Unit16, CP32, behavior> end() const {
+    U16Iterator<const Unit16 *, CP32, behavior> end() const {
         const Unit16 *limit = s.data() + s.length();
         return {s.data(), limit, limit};
     }
 
     /** @draft ICU 77 */
-    U16ReverseIterator<Unit16, CP32, behavior> rbegin() const {
+    U16ReverseIterator<const Unit16 *, CP32, behavior> rbegin() const {
         const Unit16 *limit = s.data() + s.length();
         return {s.data(), limit, limit};
     }
 
     /** @draft ICU 77 */
-    U16ReverseIterator<Unit16, CP32, behavior> rend() const {
+    U16ReverseIterator<const Unit16 *, CP32, behavior> rend() const {
         return {s.data(), s.data(), s.data() + s.length()};
     }
 
diff --git a/icu4c/source/test/intltest/utfitertest.cpp b/icu4c/source/test/intltest/utfitertest.cpp
index 8f40229abe16..a8bda260bc4d 100644
--- a/icu4c/source/test/intltest/utfitertest.cpp
+++ b/icu4c/source/test/intltest/utfitertest.cpp
@@ -23,6 +23,31 @@ using namespace std::string_view_literals;
 using U_HEADER_ONLY_NAMESPACE::U16Iterator;
 using U_HEADER_ONLY_NAMESPACE::U16StringCodePoints;
 
+template<typename Unit>
+class FwdIter {
+public:
+    typedef Unit value_type;
+
+    FwdIter(const Unit *data) : p(data) {}
+
+    bool operator==(const FwdIter &other) const { return p == other.p; }
+    bool operator!=(const FwdIter &other) const { return !operator==(other); }
+
+    Unit operator*() const { return *p; }
+    FwdIter &operator++() {  // pre-increment
+        ++p;
+        return *this;
+    }
+    FwdIter operator++(int) {  // post-increment
+        FwdIter result(*this);
+        ++p;
+        return result;
+    }
+
+private:
+    const Unit *p;
+};
+
 class U16IteratorTest : public IntlTest {
 public:
     U16IteratorTest() {}
@@ -33,6 +58,7 @@ class U16IteratorTest : public IntlTest {
     void testNegative();
     void testFFFD();
     void testSurrogate();
+    void testFwdIter();
 };
 
 extern IntlTest *createU16IteratorTest() {
@@ -48,6 +74,7 @@ void U16IteratorTest::runIndexedTest(int32_t index, UBool exec, const char *&nam
     TESTCASE_AUTO(testNegative);
     TESTCASE_AUTO(testFFFD);
     TESTCASE_AUTO(testSurrogate);
+    TESTCASE_AUTO(testFwdIter);
     TESTCASE_AUTO_END;
 }
 
@@ -90,6 +117,7 @@ void U16IteratorTest::testNegative() {
     auto sv = units.stringView();
     assertEquals("iter[1] * stringView().length()", 1, sv.length());
     assertEquals("iter[1] * stringView()[0]", 0xd900, sv[0]);
+    // TODO: test units.data()
     ++iter;
     assertEquals("iter[2] * codePoint", u'b', (*iter++).codePoint());  // post-increment
     units = *iter++;  // post-increment
@@ -146,3 +174,36 @@ void U16IteratorTest::testSurrogate() {
     assertEquals("iter[4] * stringView()", u"ç", (*iter++).stringView());  // post-increment
     assertTrue("iter == endIter", iter == range.end());
 }
+
+void U16IteratorTest::testFwdIter() {
+    IcuTestErrorCode errorCode(*this, "testFwdIter");
+    std::u16string_view good(u"abçカ🚴"sv);
+    FwdIter<char16_t> goodBegin(good.data());
+    FwdIter<char16_t> goodLimit(good.data() + good.length());
+    U16Iterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeBegin(
+        goodBegin, goodBegin, goodLimit);
+    U16Iterator<FwdIter<char16_t>, UChar32, U_BEHAVIOR_NEGATIVE> rangeLimit(
+        goodBegin, goodLimit, goodLimit);
+    // TODO: U16StringCodePoints<FwdIter, UChar32, U_BEHAVIOR_NEGATIVE> range(good);
+    auto iter = rangeBegin;
+    assertEquals("iter[0] * codePoint", u'a', (*iter).codePoint());
+    ++iter;  // pre-increment
+    auto units = *iter;
+    assertEquals("iter[1] * codePoint", u'b', units.codePoint());
+    assertEquals("iter[1] * length", 1, units.length());
+    assertTrue("iter[1] * wellFormed", units.wellFormed());
+    // No units.stringView() when the unit iterator is not a pointer.
+    assertTrue("iter[1] * data()[0]", *units.data() == u'b');
+    ++iter;
+    assertEquals("iter[2] * codePoint", u'ç', (*iter++).codePoint());  // post-increment
+    assertEquals("iter[3] * codePoint", u'カ', (*iter).codePoint());
+    ++iter;
+    units = *iter++;
+    assertEquals("iter[4] * codePoint", U'🚴', units.codePoint());
+    assertEquals("iter[4] * length", 2, units.length());
+    assertTrue("iter[4] * wellFormed", units.wellFormed());
+    FwdIter<char16_t> data = units.data();
+    assertTrue("iter[4] * data()[0]", *data++ == u"🚴"[0]);
+    assertTrue("iter[4] * data()[1]", *data == u"🚴"[1]);
+    assertTrue("iter == endIter", iter == rangeLimit);
+}