From 1a45abace1d4535a0c5edeb9bf822bc1c1c57589 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Thu, 11 Jul 2024 14:54:16 +0300 Subject: [PATCH] Support list format fallbacks --- babel/lists.py | 51 +++++++++++++++++++++++++++++++++++---------- tests/test_lists.py | 14 ++++++++++++- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/babel/lists.py b/babel/lists.py index 376bc963e..6c34cb099 100644 --- a/babel/lists.py +++ b/babel/lists.py @@ -26,9 +26,11 @@ DEFAULT_LOCALE = default_locale() -def format_list(lst: Sequence[str], - style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', - locale: Locale | str | None = DEFAULT_LOCALE) -> str: +def format_list( + lst: Sequence[str], + style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', + locale: Locale | str | None = DEFAULT_LOCALE, +) -> str: """ Format the items in `lst` as a list. @@ -39,7 +41,11 @@ def format_list(lst: Sequence[str], >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi') u'omena, peruna tai aplari' - These styles are defined, but not all are necessarily available in all locales. + Not all styles are necessarily available in all locales. + The function will attempt to fall back to replacement styles according to the rules + set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement + can be found. + The following text is verbatim from the Unicode TR35-49 spec [1]. * standard: @@ -76,14 +82,9 @@ def format_list(lst: Sequence[str], if len(lst) == 1: return lst[0] - if style not in locale.list_patterns: - raise ValueError( - f'Locale {locale} does not support list formatting style {style!r} ' - f'(supported are {sorted(locale.list_patterns)})', - ) - patterns = locale.list_patterns[style] + patterns = _resolve_list_style(locale, style) - if len(lst) == 2: + if len(lst) == 2 and '2' in patterns: return patterns['2'].format(*lst) result = patterns['start'].format(lst[0], lst[1]) @@ -92,3 +93,31 @@ def format_list(lst: Sequence[str], result = patterns['end'].format(result, lst[-1]) return result + + +# Based on CLDR 45's root.xml file's ``es. +# The root file defines both `standard` and `or`, +# so they're always available. +# TODO: It would likely be better to use the +# babel.localedata.Alias mechanism for this, +# but I'm not quite sure how it's supposed to +# work with inheritance and data in the root. +_style_fallbacks = { + "or-narrow": ["or-short", "or"], + "or-short": ["or"], + "standard-narrow": ["standard-short", "standard"], + "standard-short": ["standard"], + "unit": ["unit-short", "standard"], + "unit-narrow": ["unit-short", "unit", "standard"], + "unit-short": ["standard"], +} + + +def _resolve_list_style(locale: Locale, style: str): + for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020 + if style in locale.list_patterns: + return locale.list_patterns[style] + raise ValueError( + f"Locale {locale} does not support list formatting style {style!r} " + f"(supported are {sorted(locale.list_patterns)})", + ) diff --git a/tests/test_lists.py b/tests/test_lists.py index 2b2453bb8..46ca10d02 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -1,6 +1,6 @@ import pytest -from babel import lists +from babel import lists, units @pytest.mark.parametrize(('list', 'locale', 'expected'), [ @@ -18,3 +18,15 @@ def test_format_list(list, locale, expected): def test_format_list_error(): with pytest.raises(ValueError): lists.format_list(['a', 'b', 'c'], style='orange', locale='en') + + +def test_issue_1098(): + one_foot = units.format_unit(1, "length-foot", length="short", locale="zh_CN") + five_inches = units.format_unit(5, "length-inch", length="short", locale="zh_CN") + # zh-CN does not specify the "unit" style, so we fall back to "unit-short" style. + assert ( + lists.format_list([one_foot, five_inches], style="unit", locale="zh_CN") == + lists.format_list([one_foot, five_inches], style="unit-short", locale="zh_CN") == + # Translation verified using Google Translate. It would add more spacing, but the glyphs are correct. + "1英尺5英寸" + )