Source code for toolforge_i18n._formatters
"""A collection of formatters for i18n messages.
You usually want to use I18nFormatter, which combines all the other
formatters in this module. Create it like this::
formatter = I18nFormatter(locale_identifier='en',
get_gender=lambda arg: 'n')
And then use format strings such as::
formatter.format('You have {count!p:one=one message:other={count} messages} from {users!l}.',
count=len(messages),
users=set(users))
formatter.format('Reply to {user!g:m=him:f=her:n=them}?',
user=user)
See the documentation of the other formatter classes for details on
the supported conversions.
If the ``markupsafe`` module is available, then all the formatters defined
in this module become MarkupSafe-aware: if the format string is
``Markup``, then the returned formatted value will also be ``Markup``, and any
non-``Markup`` arguments will be escaped.
"""
import string
from collections.abc import Callable, Mapping, Sequence
from decimal import Decimal
from typing import Literal, TypeAlias, cast
import babel
import babel.lists
try:
import markupsafe
except ModuleNotFoundError:
markup_type = str
class BaseI18nFormatter(string.Formatter):
"""Base class of our i18n formatters."""
type_of_format_string: type[str] | None = str
def __init__(self, **_kwargs: object):
"""Strip away all constructor arguments.
``string.Formatter`` complains if it gets any constructor
arguments, but our individual constructors need to pass
through their constructor arguments so the classes can be
combined freely, so we throw away the arguments here.
"""
super().__init__()
def escape(self, s: str) -> str:
"""No-op function with a signature matching the escape below.
Used by :py:class:`~HyperlinkFormatter` / :py:class:`~_Hyperlink`.
"""
return s
else:
markup_type = markupsafe.Markup # type: ignore
class BaseI18nFormatter(markupsafe.EscapeFormatter): # type: ignore
"""Base class of our i18n formatters."""
type_of_format_string: type[str] | None = None
def __init__(self, **_kwargs: object):
"""Use a context-sensitive escape function.
``EscapeFormatter`` calls the escape function on all fields,
since it expects to be reached through ``Markup.format()``,
i.e. we know we’ll want to produce ``Markup`` and escape
strings. We change this so that fields are only escaped if
the format string was ``Markup`` in the first place.
Also, throw away other constructor arguments, so that
``string.Formatter`` doesn’t complain about them.
"""
def escape(s: object) -> markupsafe.Markup:
assert self.type_of_format_string is not None
if issubclass(self.type_of_format_string, markupsafe.Markup):
return markupsafe.Markup.escape(s)
else:
# this cast is a lie but returning str seems to work in practice
return cast(markupsafe.Markup, s)
super().__init__(escape=escape)
def vformat(self, format_string: str, args: Sequence[object], kwargs: Mapping[str, object]) -> str:
"""Store the type of the format string and apply it to the result.
``string``.Formatter is partially implemented in C, so the
format spec when formatting individual fields is always a
string, never a ``Markup`` object. To still allow fields to
properly format themselves as markup, store the type of
the format string here. Also, apply it to the final result
of string formatting.
"""
self.type_of_format_string = type(format_string)
try:
ret = super().vformat(format_string, args, kwargs)
return self.type_of_format_string(ret)
finally:
self.type_of_format_string = None
[docs]
class PluralFormatter(BaseI18nFormatter):
"""A string formatter supporting a ``!p`` plural conversion.
Format string examples::
"I ate {count!p:0=no apples:one={count} apple:other={count} apples}."
"{size} (0x{size:04X}) {size!p:one=bajt:two=bajtaj:few=bajty:other=bajtow}"
For numeric values converted with ``!p``, the format spec is
interpreted differently: it consists of a set of ``key=text`` specs,
separated by colons. The key should be one of the CLDR plural rule
tags, currently “zero”, “one”, “two”, “few”, “many”, or “other”,
or an explicit value. The text for the matching value or tag,
according to the plural rules of the locale specified in the
constructor, is substituted into the message. Attempting to
convert non-numeric values with ``!p`` is an error.
Note that most languages do not use all possible tags, and only
exactly those tags used in a language should occur in the format
string. For example, even though there is a “zero” tag, English
only uses the “one” and “other” ones, and to make a special case
for a value of zero with a ``PluralFormatter('en')``, you need to use
the key “0”, not “zero”. On the other hand, failing to specify all
tags used in a language may make the formatter raise a KeyError:
for instance, if the first example above used the key “1” instead
of “one”, then it would fail when given a count of ``-1`` or ``1.0``.
Value keys always take precedence over tag keys, no matter in
which order they are specified in the format spec. To match the
value, they must be identical to the ``str()`` of the value: for
instance, a “1” key will not match a ``1.0`` value or vice versa.
"""
def __init__(self, *, locale_identifier: str, **kwargs: object):
"""The locale identifier must be understood by ``Locale.parse``."""
self.locale = babel.Locale.parse(locale_identifier)
super().__init__(locale_identifier=locale_identifier, **kwargs)
def convert_field(self, value: float | Decimal, conversion: str | None) -> object:
if conversion == 'p':
assert self.type_of_format_string is not None
return _Plural(value, self.locale, self.type_of_format_string)
return super().convert_field(value, conversion)
class _Plural:
"""Wrapper around a numeric value with special formatting.
This class formats itself as described in :py:class:`~PluralFormatter`.
"""
def __init__(self, value: float | Decimal, locale: babel.Locale, type_of_format_spec: type[str]):
self.value = value
self.locale = locale
self.type_of_format_spec = type_of_format_spec
def __format__(self, format_spec: str) -> str:
format_spec = self.type_of_format_spec(format_spec)
plurals = format_spec.split(':')
value_eq = str(self.value) + '='
for plural in plurals:
if plural.startswith(value_eq):
return plural[len(value_eq) :]
tag = self.locale.plural_form(self.value)
tag_eq = tag + '='
for plural in plurals:
if plural.startswith(tag_eq):
return plural[len(tag_eq) :]
# fall back to "other"
for plural in plurals:
if plural.startswith('other='):
return plural[len('other=') :]
raise KeyError(f'No plurals for tag "{tag}" or "other" found in format spec "{format_spec}"!')
[docs]
class CommaSeparatedListFormatter(BaseI18nFormatter):
"""A string formatter supporting a ``!l`` list conversion.
Format string example::
"We went to {cities!l}."
For iterable values converted with ``!l``, the format spec is applied
to each list element. Afterwards, the list elements are joined
into a standard list using the locale specified in the
constructor. (For English, this means separating most items with
an ASCII comma plus a space, and the final two with an extra
“and”; Chinese and Japanese, for instance, use a fullwidth comma
instead.) Attempting to convert non-iterable values with ``!l`` is an
error.
"""
def __init__(self, *, locale_identifier: str, **kwargs: object):
"""The locale identifier must be understood by ``Locale.parse``."""
self.locale = babel.Locale.parse(locale_identifier)
# convert Babel’s list patterns to Markup
for style in self.locale.list_patterns:
for key, pattern in self.locale.list_patterns[style].items():
self.locale.list_patterns[style][key] = markup_type(pattern)
super().__init__(locale_identifier=locale_identifier, **kwargs)
def convert_field(self, value: Sequence[object], conversion: str | None) -> object:
if conversion == 'l':
assert self.type_of_format_string is not None
return _CommaSeparatedList(value, self.locale, self.type_of_format_string, self.format_field)
return super().convert_field(value, conversion)
class _CommaSeparatedList:
"""Wrapper around a list with special formatting.
This class formats itself as described in :py:class:`~CommaSeparatedListFormatter`.
"""
def __init__(
self,
value: Sequence[object],
locale: babel.Locale,
type_of_format_spec: type[str],
format_function: Callable[[object, str], object],
):
self.value = value
self.locale = locale
self.type_of_format_spec = type_of_format_spec
self.format_function = format_function
def __format__(self, format_spec: str) -> str:
format_spec = self.type_of_format_spec(format_spec)
formatted_values = [markup_type(self.format_function(item, format_spec)) for item in self.value]
return babel.lists.format_list(formatted_values, locale=self.locale)
GetGender: TypeAlias = Callable[[object], Literal['m', 'f', 'n']]
[docs]
class GenderFormatter(BaseI18nFormatter):
"""A string formatter supporting a ``!g`` grammatical gender conversion.
Format string examples::
"Leave a message on {user!g:m=his:f=her:n=their} talk page."
"Ci dispiace, ma non sei {user!g:m=autorizzato:f=autorizzata:n=autorizzato/a} a usare il caricamento di massa."
The formatted value, which can be anything as far as this
formatter is concerned, is passed into a function specified in the
constructor, which should return one of the values ``"m"``, ``"f"``, or
``"n"``, to select the grammatically masculine, feminine, or neutral
replacement, respectively. The format spec specifies these three
replacements separated by colons. Gender values not specified in
the format spec fall back to ``"m"``.
"""
def __init__(self, *, get_gender: GetGender, **kwargs: object):
self.get_gender = get_gender
super().__init__(get_gender=get_gender, **kwargs)
def convert_field(self, value: object, conversion: str | None) -> object:
if conversion == 'g':
assert self.type_of_format_string is not None
return _Gender(value, self.get_gender, self.type_of_format_string)
return super().convert_field(value, conversion)
class _Gender:
"""Wrapper around a value with special formatting.
This class formats itself as described in :py:class:`~GenderFormatter`.
"""
def __init__(self, value: object, get_gender: GetGender, type_of_format_spec: type[str]):
self.value = value
self.get_gender = get_gender
self.type_of_format_spec = type_of_format_spec
def __format__(self, format_spec: str) -> str:
format_spec = self.type_of_format_spec(format_spec)
replacements = format_spec.split(':')
if len(replacements) == 1:
replacement = replacements[0]
assert replacement.startswith('m=')
return replacement[len('m=') :]
gender = self.get_gender(self.value)
gender_eq = gender + '='
for replacement in replacements:
if replacement.startswith(gender_eq):
return replacement[len(gender_eq) :]
# fall back to "m"
for replacement in replacements:
if replacement.startswith('m='):
return replacement[len('m=') :]
raise KeyError(f'No replacement for gender "{gender}" or "m" found in format spec "{format_spec}"!')
[docs]
class HyperlinkFormatter(BaseI18nFormatter):
"""A string formatter supporting an ``!h`` hyperlink conversion.
Format string example::
"You need to {url!h:log in} before you can edit."
The formatted value is interpreted as the ``href`` attribute of an
HTML ``<a>`` element, whose inner HTML is given by the format spec.
"""
def convert_field(self, value: str, conversion: str | None) -> object:
if conversion == 'h':
assert self.type_of_format_string is not None
return _Hyperlink(value, self.type_of_format_string, self.escape)
return super().convert_field(value, conversion)
class _Hyperlink:
"""Wrapper around a URL with special formatting.
This class formats itself as described in :py:class`~HyperlinkFormatter`.
"""
def __init__(self, value: str, type_of_format_spec: type[str], escape: Callable[[str], str]):
self.value = value
self.type_of_format_spec = type_of_format_spec
self.escape = escape
def __format__(self, format_spec: str) -> str:
format_spec = self.type_of_format_spec(format_spec)
# turn the value into the type of the format spec,
# so that if format spec is str and value is Markup,
# the value doesn’t escape everything around it –
# but first escape the value, in case it’s str
# and the format spec is Markup
value = self.type_of_format_spec(self.escape(self.value))
return (
self.type_of_format_spec(r'<a href="')
+ value
+ self.type_of_format_spec(r'">')
+ format_spec
+ self.type_of_format_spec(r'</a>')
)
[docs]
class I18nFormatter(PluralFormatter, CommaSeparatedListFormatter, GenderFormatter, HyperlinkFormatter): # type: ignore[misc]
"""A string formatter supporting ``!p`` (plural), ``!l`` (list),
``!g`` (gender) and ``!h`` (hyperlink) conversions.
See :py:class:`~PluralFormatter`, :py:class:`~CommaSeparatedListFormatter`,
:py:class:`~HyperlinkFormatter` and :py:class:`~GenderFormatter` for details.
Flask-based tools don’t need to use this class directly
(it’s used by :py:class:`~message`).
"""