Skip to content

Commit

Permalink
Add language support
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristopherSpelt authored and uittenbroekrobbert committed Jun 27, 2024
1 parent d540737 commit 43b59cc
Show file tree
Hide file tree
Showing 24 changed files with 663 additions and 168 deletions.
4 changes: 4 additions & 0 deletions BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ To run the application you use this command:
docker compose up
```

## Language support

We use babel for translations and a custom yaml for dynamic translations. Babel does not support Frysian, so we added a custom piece of code to support this. To generate, update or compile the language files, use the script in ./script/translate.

## Testing, Linting etc

For testing, linting and other feature we use several tools. You can look up the documentation on how to use these:
Expand Down
3 changes: 3 additions & 0 deletions babel-mapping.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[jinja2: **/templates/**.jinja]
extensions=jinja2.ext.i18n
silent=False
294 changes: 154 additions & 140 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pydantic-settings = "^2.3.3"
psycopg2-binary = "^2.9.9"
uvicorn = {extras = ["standard"], version = "^0.30.1"}
pyyaml = "^6.0.1"
babel = "^2.15.0"


[tool.poetry.group.test.dependencies]
Expand Down
62 changes: 62 additions & 0 deletions script/pybabel-for-tad
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python

import re
import sys

from babel.core import get_global
from babel.messages.frontend import main


def hack_babel(custom_locales: dict):
"""Hack Babel core to make it support custom locale names.
Copied from https://github.com/python-babel/babel/issues/454
Args:
custom_locales: Mapping from { custom name => ordinary name }
"""

# In order for Babel to know "en_CUSTOM", we have to hack its database and put our custom
# locale names there.
# This database is pickle-loaded from a .dat file and cached, so we only have to do it once.
db = get_global("likely_subtags")
for custom_name in custom_locales:
db[custom_name] = custom_name

# Also, monkey-patch the exists() and load() functions that load locale data from 'babel/locale-data'
import babel.localedata

# Originals
o_exists, o_load = babel.localedata.exists, babel.localedata.load

# Make sure we do not patch twice
if o_exists.__module__ != __name__:
# Definitions
def exists(name):
# Convert custom names to normalized names
name = custom_locales.get(name, name)
return o_exists(name)

def load(name, merge_inherited=True):
# Convert custom names to normalized names
name = custom_locales.get(name, name)
return o_load(name, merge_inherited)

# Patch
babel.localedata.exists = exists
babel.localedata.load = load

# See that they actually exist
for normalized_name in custom_locales.values():
assert o_exists(normalized_name) # noqa


hack_babel(
{
"nl_FY": "fy", # custom => original
}
)


if __name__ == "__main__":
sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
sys.exit(main())
27 changes: 27 additions & 0 deletions script/translate
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

if [ "$#" -ne 1 ]
then

echo "Usage: ./translate command"
echo "command is init (caution: only needed once, it creates empty translation files), update, compile"
exit 1
fi

if [ "$1" = "compile" ]
then
./script/pybabel-for-tad compile -d tad/locale
elif [ "$1" = "init" ]
then
./script/pybabel-for-tad extract -F babel-mapping.ini . -o tad/locale/base.pot
./script/pybabel-for-tad init -l en_US -i tad/locale/base.pot -d tad/locale
./script/pybabel-for-tad init -l nl_NL -i tad/locale/base.pot -d tad/locale
./script/pybabel-for-tad init -l nl_FY -i tad/locale/base.pot -d tad/locale
elif [ "$1" = "update" ]
then
./script/pybabel-for-tad extract -F babel-mapping.ini . -o tad/locale/base.pot
./script/pybabel-for-tad update -i tad/locale/base.pot -d tad/locale
else
echo "Unknown command"
exit 1
fi
50 changes: 44 additions & 6 deletions tad/api/deps.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,53 @@
import logging
import typing
from os import PathLike

from fastapi import Request
from fastapi.templating import Jinja2Templates
from jinja2 import Environment
from starlette.templating import _TemplateResponse # type: ignore

from tad.core.config import VERSION
from tad.core.internationalization import (
format_datetime,
get_dynamic_field_translations,
get_requested_language,
get_supported_translation,
get_translation,
supported_translations,
)

logger = logging.getLogger(__name__)

def version_context_processor(request: Request):
return {"version": VERSION}

def custom_context_processor(request: Request) -> dict[str, str | list[str] | dict[str, str]]:
lang = get_requested_language(request)
return {
"version": VERSION,
"available_translations": list(supported_translations),
"language": lang,
"translations": get_dynamic_field_translations(lang),
}

env = Environment(
autoescape=True,
)
templates = Jinja2Templates(directory="tad/site/templates/", context_processors=[version_context_processor], env=env)

# we use a custom override so we can add the translation per request, which is parsed in the Request object in kwargs
class LocaleJinja2Templates(Jinja2Templates):
def _create_env(
self,
directory: str | PathLike[typing.AnyStr] | typing.Sequence[str | PathLike[typing.AnyStr]],
**env_options: typing.Any,
) -> Environment:
env: Environment = super()._create_env(directory, **env_options) # type: ignore
env.add_extension("jinja2.ext.i18n") # type: ignore
return env # type: ignore

def TemplateResponse(self, *args: typing.Any, **kwargs: typing.Any) -> _TemplateResponse:
content_language = get_supported_translation(get_requested_language(kwargs["request"]))
translations = get_translation(content_language)
kwargs["headers"] = {"Content-Language": ",".join(supported_translations)}
self.env.install_gettext_translations(translations, newstyle=True) # type: ignore
return super().TemplateResponse(*args, **kwargs)


templates = LocaleJinja2Templates(directory="tad/site/templates/", context_processors=[custom_context_processor])
templates.env.filters["format_datetime"] = format_datetime # type: ignore
57 changes: 57 additions & 0 deletions tad/core/internationalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import logging
from datetime import datetime
from functools import lru_cache

import yaml
from babel import dates
from babel.support import NullTranslations, Translations
from starlette.requests import Request

logger = logging.getLogger(__name__)

_default_language_fallback = "en"
supported_translations: tuple[str, ...] = ("en", "nl", "fy")
# babel does not support Frysian, to be able to load the right MO file, we need to 'map' it ourselves
_translations_to_locale: dict[str, str] = {"en": "en", "nl": "nl", "fy": "nl_FY"}


@lru_cache(maxsize=len(supported_translations))
def get_dynamic_field_translations(lang: str) -> dict[str, str]:
lang = get_supported_translation(lang)
with open(f"tad/languages/{lang}.yaml") as stream:
return yaml.safe_load(stream)


def get_supported_translation(lang: str):
if lang not in supported_translations:
logger.warning("Requested translation does not exist: %s, using fallback %s", lang, _default_language_fallback)
lang = _default_language_fallback
return lang


@lru_cache(maxsize=len(supported_translations))
def get_translation(lang: str) -> NullTranslations:
lang = get_supported_translation(lang)
return Translations.load("tad/locale", locales=_translations_to_locale[lang])


def format_datetime(value: datetime, locale: str, format: str = "medium") -> str:
if format == "full" and locale == "fy":
weekday = get_dynamic_field_translations("fy")["weekdays"][int(datetime.date(value).strftime("%w"))]
month = get_dynamic_field_translations("fy")["months"][int(datetime.date(value).strftime("%-m")) - 1]
return value.strftime(f"{weekday}, %-d {month} %Y %H:%M")
elif format == "medium" and locale == "fy":
weekday = get_dynamic_field_translations("fy")["weekdays"][int(datetime.date(value).strftime("%w"))]
return value.strftime(f"{weekday} %d-%m-%Y %H:%M")
elif format == "full":
format = "EEEE, d MMMM y HH:mm"
elif format == "medium":
format = "EE dd/MM/y HH:mm"
else:
format = "dd/MM/y HH:mm"
return dates.format_datetime(value, format, locale=locale)


def get_requested_language(request: Request) -> str:
# todo (robbert): nice to have, default support based on accept lang of browser
return request.cookies.get("lang", _default_language_fallback)
4 changes: 4 additions & 0 deletions tad/languages/en.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
todo: ToDo
review: Review
done: Done
in_progress: In Progress
26 changes: 26 additions & 0 deletions tad/languages/fy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
todo: Dwaan
review: Resinsje
in_progress: Dwaande
done: Dien
# please note we have custom translations for weekdays and months because babel does not support Frysian
weekdays: # note, the order corresponds to the weekdays (number) used by date formatting
- snein
- moandei
- tiisdei
- woansdei
- tongersdei
- freed
- sneon
months:
- jannewaris
- febrewaris
- maart
- april
- maaie
- juny
- july
- augustus
- septimber
- oktober
- novimber
- desimber
4 changes: 4 additions & 0 deletions tad/languages/nl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
todo: Te doen
review: Beoordelen
done: Afgerond
in_progress: Onderhanden
30 changes: 30 additions & 0 deletions tad/locale/base.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Translations template for PROJECT.
# Copyright (C) 2024 ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-06-24 10:53+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.15.0\n"

#: tad/site/templates/default_layout.jinja:19
msgid "Transparency of Algorithmic Decision making (TAD)"
msgstr ""

#: tad/site/templates/default_layout.jinja:49
msgid "Transparency of Algorithmic Decision making"
msgstr ""

#: tad/site/templates/default_layout.jinja:55
msgid "Language"
msgstr ""
Binary file added tad/locale/en_US/LC_MESSAGES/messages.mo
Binary file not shown.
31 changes: 31 additions & 0 deletions tad/locale/en_US/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# English (United States) translations for PROJECT.
# Copyright (C) 2024 ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-06-24 10:53+0200\n"
"PO-Revision-Date: 2024-06-24 09:48+0200\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: en_US\n"
"Language-Team: en_US <[email protected]>\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.15.0\n"

#: tad/site/templates/default_layout.jinja:19
msgid "Transparency of Algorithmic Decision making (TAD)"
msgstr ""

#: tad/site/templates/default_layout.jinja:49
msgid "Transparency of Algorithmic Decision making"
msgstr ""

#: tad/site/templates/default_layout.jinja:55
msgid "Language"
msgstr ""
Binary file added tad/locale/nl_FY/LC_MESSAGES/messages.mo
Binary file not shown.
31 changes: 31 additions & 0 deletions tad/locale/nl_FY/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Dutch translations for PROJECT.
# Copyright (C) 2024 ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-06-24 10:53+0200\n"
"PO-Revision-Date: 2024-06-24 09:48+0200\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: nl_FY\n"
"Language-Team: nl_FY <[email protected]>\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.15.0\n"

#: tad/site/templates/default_layout.jinja:19
msgid "Transparency of Algorithmic Decision making (TAD)"
msgstr "Transparânsje fan algoritmyske beslútfoarming (TAD)"

#: tad/site/templates/default_layout.jinja:49
msgid "Transparency of Algorithmic Decision making"
msgstr "Transparânsje fan algoritmyske beslútfoarming"

#: tad/site/templates/default_layout.jinja:55
msgid "Language"
msgstr "Taal"
Binary file added tad/locale/nl_NL/LC_MESSAGES/messages.mo
Binary file not shown.
31 changes: 31 additions & 0 deletions tad/locale/nl_NL/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Dutch (Netherlands) translations for PROJECT.
# Copyright (C) 2024 ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2024.
#
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-06-24 10:53+0200\n"
"PO-Revision-Date: 2024-06-24 09:48+0200\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: nl_NL\n"
"Language-Team: nl_NL <[email protected]>\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.15.0\n"

#: tad/site/templates/default_layout.jinja:19
msgid "Transparency of Algorithmic Decision making (TAD)"
msgstr "Transparantie voor Algoritmische Besluitvorming (TAD)"

#: tad/site/templates/default_layout.jinja:49
msgid "Transparency of Algorithmic Decision making"
msgstr "Transparantie voor Algoritmische Besluitvorming"

#: tad/site/templates/default_layout.jinja:55
msgid "Language"
msgstr "Taal"
Loading

0 comments on commit 43b59cc

Please sign in to comment.