<charconv>: Fix from_chars for floating-point types (#3670)

Co-authored-by: Stephan T. Lavavej <[email protected]>
microsoft · Apr 28, 2023 · 091cad2 · 091cad2
1 parent c5a5efa
commit 091cad2
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 19 deletions.
diff --git a/stl/inc/charconv b/stl/inc/charconv
@@ -1579,6 +1579,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
     }
     const char* const _Leading_zero_end = _Next;
 
+    bool _Has_zero_tail = true;
+
     // Scan the integer part of the mantissa:
     for (; _Next != _Last; ++_Next) {
         const unsigned char _Digit_value = _Digit_from_char(*_Next);
@@ -1589,20 +1591,18 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
 
         if (_Mantissa_it != _Mantissa_last) {
             *_Mantissa_it++ = _Digit_value;
+        } else {
+            _Has_zero_tail = _Has_zero_tail && _Digit_value == 0;
         }
     }
     const char* const _Whole_end = _Next;
 
-    // Defend against _Exponent_adjustment integer overflow. (These values don't need to be strict.)
-    constexpr ptrdiff_t _Maximum_adjustment = 1'000'000;
-    constexpr ptrdiff_t _Minimum_adjustment = -1'000'000;
-
     // The exponent adjustment holds the number of digits in the mantissa buffer that appeared before the radix point.
     // It can be negative, and leading zeroes in the integer part are ignored. Examples:
     // For "03333.111", it is 4.
     // For "00000.111", it is 0.
     // For "00000.001", it is -2.
-    int _Exponent_adjustment = static_cast<int>((_STD min)(_Whole_end - _Leading_zero_end, _Maximum_adjustment));
+    ptrdiff_t _Exponent_adjustment = _Whole_end - _Leading_zero_end;
 
     // [_Whole_end, _Dot_end) will contain 0 or 1 '.' characters
     if (_Next != _Last && *_Next == '.') {
@@ -1618,12 +1618,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         for (; _Next != _Last && *_Next == '0'; ++_Next) {
         }
 
-        _Exponent_adjustment = static_cast<int>((_STD max)(_Dot_end - _Next, _Minimum_adjustment));
+        _Exponent_adjustment = _Dot_end - _Next;
     }
 
     // Scan the fractional part of the mantissa:
-    bool _Has_zero_tail = true;
-
     for (; _Next != _Last; ++_Next) {
         const unsigned char _Digit_value = _Digit_from_char(*_Next);
 
@@ -1647,7 +1645,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
     const char _Exponent_prefix{_Is_hexadecimal ? 'p' : 'e'};
 
     bool _Exponent_is_negative = false;
-    int _Exponent              = 0;
+    bool _Exp_abs_too_large    = false;
+    ptrdiff_t _Exponent        = 0;
 
     constexpr int _Maximum_temporary_decimal_exponent = 5200;
     constexpr int _Minimum_temporary_decimal_exponent = -5200;
@@ -1672,8 +1671,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
 
             // found decimal digit
 
-            if (_Exponent <= _Maximum_temporary_decimal_exponent) {
+            if (_Exponent < PTRDIFF_MAX / 10 || (_Exponent == PTRDIFF_MAX / 10 && _Digit_value <= PTRDIFF_MAX % 10)) {
                 _Exponent = _Exponent * 10 + _Digit_value;
+            } else {
+                _Exp_abs_too_large = true;
             }
 
             ++_Unread;
@@ -1710,23 +1711,59 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         return {_Next, errc{}};
     }
 
-    // Before we adjust the exponent, handle the case where we detected a wildly
-    // out of range exponent during parsing and clamped the value:
-    if (_Exponent > _Maximum_temporary_decimal_exponent) {
-        _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
-        return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
+    // Handle exponent of an overly large absolute value.
+    if (_Exp_abs_too_large) {
+        if (_Exponent > 0) {
+            _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
+            return {_Next, errc::result_out_of_range};
+        } else {
+            _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
+            return {_Next, errc::result_out_of_range};
+        }
     }
 
-    if (_Exponent < _Minimum_temporary_decimal_exponent) {
-        _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
-        return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
+    // Adjust _Exponent and _Exponent_adjustment when they have different signedness to avoid overflow.
+    if (_Exponent > 0 && _Exponent_adjustment < 0) {
+        if (_Is_hexadecimal) {
+            const ptrdiff_t _Further_adjustment = (_STD max)(-((_Exponent - 1) / 4 + 1), _Exponent_adjustment);
+            _Exponent += _Further_adjustment * 4;
+            _Exponent_adjustment -= _Further_adjustment;
+        } else {
+            const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment);
+            _Exponent += _Further_adjustment;
+            _Exponent_adjustment -= _Further_adjustment;
+        }
+    } else if (_Exponent < 0 && _Exponent_adjustment > 0) {
+        if (_Is_hexadecimal) {
+            const ptrdiff_t _Further_adjustment = (_STD min)((-_Exponent - 1) / 4 + 1, _Exponent_adjustment);
+            _Exponent += _Further_adjustment * 4;
+            _Exponent_adjustment -= _Further_adjustment;
+        } else {
+            const ptrdiff_t _Further_adjustment = (_STD min)(-_Exponent, _Exponent_adjustment);
+            _Exponent += _Further_adjustment;
+            _Exponent_adjustment -= _Further_adjustment;
+        }
     }
 
     // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during
     // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants).
     // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16):
     const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1};
 
+    // And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive.
+    // So we can detect out-of-range cases directly.
+    if (_Exponent > _Maximum_temporary_decimal_exponent
+        || _Exponent_adjustment > _Maximum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
+        _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
+        return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
+    }
+
+    if (_Exponent < _Minimum_temporary_decimal_exponent
+        || _Exponent_adjustment < _Minimum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
+        _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
+        return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
+    }
+
     _Exponent += _Exponent_adjustment * _Exponent_adjustment_multiplier;
 
     // Verify that after adjustment the exponent isn't wildly out of range (if it is, it isn't representable
@@ -1741,7 +1778,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         return {_Next, errc::result_out_of_range}; // Underflow example: "0.001e-5199"
     }
 
-    _Fp_string._Myexponent       = _Exponent;
+    _Fp_string._Myexponent       = static_cast<int32_t>(_Exponent);
     _Fp_string._Mymantissa_count = static_cast<uint32_t>(_Mantissa_it - _Mantissa_first);
 
     if (_Is_hexadecimal) {

diff --git a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
@@ -1526,6 +1526,17 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = {
      "315290680984617210924625396728515625e-308",
         chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022},
 
+    // GH-3161: "<charconv>: from_chars, incorrect conversion of tiny doubles, when specified without a fractional part"
+    // slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022, without decimal point
+    {"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481"
+     "86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863"
+     "07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241"
+     "02739077746853707044081281775302910700845395388393628022004086658191413817026937499382994523889310476633341355125"
+     "36970161512201336865537906990405470959087815470627336860267082642705818724890923485099076407427813196992821890396"
+     "81414673574614863062987306855746291767811978264371471321046227200851894843139416687953473478884075454592166055730"
+     "14774895683738184268219531683594714240155669493243656420489173797250259667634963989257812500000001e-1083",
+        chars_format::scientific, 782, errc{}, 0x1.4258265642fe9p-1022},
+
     // (1 + 1 - 2 * 2^-53) * 2^1023 exactly
     {"17976931348623157081452742373170435679807056752584499659891747680315726078002853876058955863276687817154045895351"
      "43824642343213268894641827684675467035375169860499105765512820762454900903893289440758685084551339423045832369032"

diff --git a/tests/std/tests/P0067R5_charconv/test.cpp b/tests/std/tests/P0067R5_charconv/test.cpp
@@ -936,6 +936,10 @@ void test_floating_from_chars(const chars_format fmt) {
         test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
         test_from_chars<T>("-0e9999", fmt, 7, errc{}, T{-0.0});
         test_from_chars<T>("-0e-9999", fmt, 8, errc{}, T{-0.0});
+        test_from_chars<T>("1" + string(10000, '0') + "e-10000", fmt, 10008, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(9999, '0') + "1e10000", fmt, 10008, errc{}, T{1.0});
+        test_from_chars<T>("1" + string(1280000, '0') + "e-1280000", fmt, 1280010, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(1279999, '0') + "1e1280000", fmt, 1280010, errc{}, T{1.0});
         test_from_chars<T>("1e9999", fmt, 6, errc::result_out_of_range, inf);
         test_from_chars<T>("-1e9999", fmt, 7, errc::result_out_of_range, -inf);
         test_from_chars<T>("1e-9999", fmt, 7, errc::result_out_of_range, T{0});
@@ -978,6 +982,10 @@ void test_floating_from_chars(const chars_format fmt) {
         test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
         test_from_chars<T>("-0p9999", fmt, 7, errc{}, T{-0.0});
         test_from_chars<T>("-0p-9999", fmt, 8, errc{}, T{-0.0});
+        test_from_chars<T>("1" + string(2500, '0') + "p-10000", fmt, 2508, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(2499, '0') + "1p10000", fmt, 2508, errc{}, T{1.0});
+        test_from_chars<T>("1" + string(320000, '0') + "p-1280000", fmt, 320010, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(319999, '0') + "1p1280000", fmt, 320010, errc{}, T{1.0});
         test_from_chars<T>("1p9999", fmt, 6, errc::result_out_of_range, inf);
         test_from_chars<T>("-1p9999", fmt, 7, errc::result_out_of_range, -inf);
         test_from_chars<T>("1p-9999", fmt, 7, errc::result_out_of_range, T{0});