Skip to content

Commit

Permalink
<charconv>: Fix from_chars for floating-point types (#3670)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
frederick-vs-ja and StephanTLavavej authored Apr 28, 2023
1 parent c5a5efa commit 091cad2
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 19 deletions.
75 changes: 56 additions & 19 deletions stl/inc/charconv
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
}
const char* const _Leading_zero_end = _Next;

bool _Has_zero_tail = true;

// Scan the integer part of the mantissa:
for (; _Next != _Last; ++_Next) {
const unsigned char _Digit_value = _Digit_from_char(*_Next);
Expand All @@ -1589,20 +1591,18 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi

if (_Mantissa_it != _Mantissa_last) {
*_Mantissa_it++ = _Digit_value;
} else {
_Has_zero_tail = _Has_zero_tail && _Digit_value == 0;
}
}
const char* const _Whole_end = _Next;

// Defend against _Exponent_adjustment integer overflow. (These values don't need to be strict.)
constexpr ptrdiff_t _Maximum_adjustment = 1'000'000;
constexpr ptrdiff_t _Minimum_adjustment = -1'000'000;

// The exponent adjustment holds the number of digits in the mantissa buffer that appeared before the radix point.
// It can be negative, and leading zeroes in the integer part are ignored. Examples:
// For "03333.111", it is 4.
// For "00000.111", it is 0.
// For "00000.001", it is -2.
int _Exponent_adjustment = static_cast<int>((_STD min)(_Whole_end - _Leading_zero_end, _Maximum_adjustment));
ptrdiff_t _Exponent_adjustment = _Whole_end - _Leading_zero_end;

// [_Whole_end, _Dot_end) will contain 0 or 1 '.' characters
if (_Next != _Last && *_Next == '.') {
Expand All @@ -1618,12 +1618,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
for (; _Next != _Last && *_Next == '0'; ++_Next) {
}

_Exponent_adjustment = static_cast<int>((_STD max)(_Dot_end - _Next, _Minimum_adjustment));
_Exponent_adjustment = _Dot_end - _Next;
}

// Scan the fractional part of the mantissa:
bool _Has_zero_tail = true;

for (; _Next != _Last; ++_Next) {
const unsigned char _Digit_value = _Digit_from_char(*_Next);

Expand All @@ -1647,7 +1645,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
const char _Exponent_prefix{_Is_hexadecimal ? 'p' : 'e'};

bool _Exponent_is_negative = false;
int _Exponent = 0;
bool _Exp_abs_too_large = false;
ptrdiff_t _Exponent = 0;

constexpr int _Maximum_temporary_decimal_exponent = 5200;
constexpr int _Minimum_temporary_decimal_exponent = -5200;
Expand All @@ -1672,8 +1671,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi

// found decimal digit

if (_Exponent <= _Maximum_temporary_decimal_exponent) {
if (_Exponent < PTRDIFF_MAX / 10 || (_Exponent == PTRDIFF_MAX / 10 && _Digit_value <= PTRDIFF_MAX % 10)) {
_Exponent = _Exponent * 10 + _Digit_value;
} else {
_Exp_abs_too_large = true;
}

++_Unread;
Expand Down Expand Up @@ -1710,23 +1711,59 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
return {_Next, errc{}};
}

// Before we adjust the exponent, handle the case where we detected a wildly
// out of range exponent during parsing and clamped the value:
if (_Exponent > _Maximum_temporary_decimal_exponent) {
_Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
// Handle exponent of an overly large absolute value.
if (_Exp_abs_too_large) {
if (_Exponent > 0) {
_Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range};
} else {
_Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range};
}
}

if (_Exponent < _Minimum_temporary_decimal_exponent) {
_Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
// Adjust _Exponent and _Exponent_adjustment when they have different signedness to avoid overflow.
if (_Exponent > 0 && _Exponent_adjustment < 0) {
if (_Is_hexadecimal) {
const ptrdiff_t _Further_adjustment = (_STD max)(-((_Exponent - 1) / 4 + 1), _Exponent_adjustment);
_Exponent += _Further_adjustment * 4;
_Exponent_adjustment -= _Further_adjustment;
} else {
const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment);
_Exponent += _Further_adjustment;
_Exponent_adjustment -= _Further_adjustment;
}
} else if (_Exponent < 0 && _Exponent_adjustment > 0) {
if (_Is_hexadecimal) {
const ptrdiff_t _Further_adjustment = (_STD min)((-_Exponent - 1) / 4 + 1, _Exponent_adjustment);
_Exponent += _Further_adjustment * 4;
_Exponent_adjustment -= _Further_adjustment;
} else {
const ptrdiff_t _Further_adjustment = (_STD min)(-_Exponent, _Exponent_adjustment);
_Exponent += _Further_adjustment;
_Exponent_adjustment -= _Further_adjustment;
}
}

// In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during
// parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants).
// We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16):
const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1};

// And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive.
// So we can detect out-of-range cases directly.
if (_Exponent > _Maximum_temporary_decimal_exponent
|| _Exponent_adjustment > _Maximum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
_Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
}

if (_Exponent < _Minimum_temporary_decimal_exponent
|| _Exponent_adjustment < _Minimum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
_Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
}

_Exponent += _Exponent_adjustment * _Exponent_adjustment_multiplier;

// Verify that after adjustment the exponent isn't wildly out of range (if it is, it isn't representable
Expand All @@ -1741,7 +1778,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
return {_Next, errc::result_out_of_range}; // Underflow example: "0.001e-5199"
}

_Fp_string._Myexponent = _Exponent;
_Fp_string._Myexponent = static_cast<int32_t>(_Exponent);
_Fp_string._Mymantissa_count = static_cast<uint32_t>(_Mantissa_it - _Mantissa_first);

if (_Is_hexadecimal) {
Expand Down
11 changes: 11 additions & 0 deletions tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1526,6 +1526,17 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = {
"315290680984617210924625396728515625e-308",
chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022},

// GH-3161: "<charconv>: from_chars, incorrect conversion of tiny doubles, when specified without a fractional part"
// slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022, without decimal point
{"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481"
"86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863"
"07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241"
"02739077746853707044081281775302910700845395388393628022004086658191413817026937499382994523889310476633341355125"
"36970161512201336865537906990405470959087815470627336860267082642705818724890923485099076407427813196992821890396"
"81414673574614863062987306855746291767811978264371471321046227200851894843139416687953473478884075454592166055730"
"14774895683738184268219531683594714240155669493243656420489173797250259667634963989257812500000001e-1083",
chars_format::scientific, 782, errc{}, 0x1.4258265642fe9p-1022},

// (1 + 1 - 2 * 2^-53) * 2^1023 exactly
{"17976931348623157081452742373170435679807056752584499659891747680315726078002853876058955863276687817154045895351"
"43824642343213268894641827684675467035375169860499105765512820762454900903893289440758685084551339423045832369032"
Expand Down
8 changes: 8 additions & 0 deletions tests/std/tests/P0067R5_charconv/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,10 @@ void test_floating_from_chars(const chars_format fmt) {
test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
test_from_chars<T>("-0e9999", fmt, 7, errc{}, T{-0.0});
test_from_chars<T>("-0e-9999", fmt, 8, errc{}, T{-0.0});
test_from_chars<T>("1" + string(10000, '0') + "e-10000", fmt, 10008, errc{}, T{1.0});
test_from_chars<T>("0." + string(9999, '0') + "1e10000", fmt, 10008, errc{}, T{1.0});
test_from_chars<T>("1" + string(1280000, '0') + "e-1280000", fmt, 1280010, errc{}, T{1.0});
test_from_chars<T>("0." + string(1279999, '0') + "1e1280000", fmt, 1280010, errc{}, T{1.0});
test_from_chars<T>("1e9999", fmt, 6, errc::result_out_of_range, inf);
test_from_chars<T>("-1e9999", fmt, 7, errc::result_out_of_range, -inf);
test_from_chars<T>("1e-9999", fmt, 7, errc::result_out_of_range, T{0});
Expand Down Expand Up @@ -978,6 +982,10 @@ void test_floating_from_chars(const chars_format fmt) {
test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
test_from_chars<T>("-0p9999", fmt, 7, errc{}, T{-0.0});
test_from_chars<T>("-0p-9999", fmt, 8, errc{}, T{-0.0});
test_from_chars<T>("1" + string(2500, '0') + "p-10000", fmt, 2508, errc{}, T{1.0});
test_from_chars<T>("0." + string(2499, '0') + "1p10000", fmt, 2508, errc{}, T{1.0});
test_from_chars<T>("1" + string(320000, '0') + "p-1280000", fmt, 320010, errc{}, T{1.0});
test_from_chars<T>("0." + string(319999, '0') + "1p1280000", fmt, 320010, errc{}, T{1.0});
test_from_chars<T>("1p9999", fmt, 6, errc::result_out_of_range, inf);
test_from_chars<T>("-1p9999", fmt, 7, errc::result_out_of_range, -inf);
test_from_chars<T>("1p-9999", fmt, 7, errc::result_out_of_range, T{0});
Expand Down

0 comments on commit 091cad2

Please sign in to comment.