Skip to content

Commit 53f1c10

Browse files
committed
Convert ISO 3166-1 to SDMX
1 parent 16bc6d3 commit 53f1c10

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

transport_data/iso/__init__.py

+90
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
"""International Organization for Standardization (ISO)."""
22

33
import logging
4+
from functools import lru_cache
5+
from pathlib import Path
6+
from typing import TYPE_CHECKING, Literal, Mapping
47

58
from sdmx.model import common
69

710
from transport_data.util.pluggy import hookimpl
811

12+
if TYPE_CHECKING:
13+
import gettext
14+
915
log = logging.getLogger(__name__)
1016

1117

@@ -18,3 +24,87 @@ def get_agencies():
1824
contact=[common.Contact(uri=["https://iso.org"])],
1925
)
2026
return (a,)
27+
28+
29+
def get_cl_iso_3166_1(
30+
id_field: Literal["alpha_2", "alpha_3", "numeric"] = "alpha_2",
31+
) -> common.Codelist:
32+
"""Generate a :class:`~sdmx.model.common.Codelist` with entries from ISO 3166-1.
33+
34+
Codes have:
35+
36+
- IDs according to `id_field`.
37+
- An :attr:`~sdmx.model.common.IdentifiableArtefact.name` attribute localized to all
38+
the languages present in the upstream database.
39+
40+
Parameter
41+
---------
42+
id_field :
43+
Field from the database to use for the IDs of generated Codes.
44+
"""
45+
from importlib.metadata import version
46+
47+
import sdmx.urn
48+
from pycountry import countries as db
49+
50+
from transport_data import STORE
51+
52+
# Create an empty codelist
53+
cl: common.Codelist = common.Codelist(
54+
id=f"{db.root_key}_{id_field}",
55+
maintainer=get_agencies()[0],
56+
version=version("pycountry"),
57+
)
58+
59+
# Load localizations of this code list
60+
translations = load_translations(f"iso{db.root_key}")
61+
62+
# Convert all entries in the database to SDMX Codes
63+
for data in db:
64+
# Collect localizations of the country name
65+
name = localize_all(data.name, translations)
66+
# TODO Collect localizations of other fields; add as annotations
67+
# TODO Annotate with other non-localized fields
68+
69+
# Create a Code
70+
c = common.Code(id=getattr(data, id_field), name=name)
71+
# Append to the code list
72+
cl.append(c)
73+
74+
# Generate its URN
75+
c.urn = sdmx.urn.make(c)
76+
77+
# Write to local store
78+
STORE.set(cl)
79+
80+
return cl
81+
82+
83+
def localize_all(value: str, translations, *, default_locale="en") -> dict[str, str]:
84+
"""Localize `value` in all languages available in `translations`."""
85+
# Put the default locale first
86+
result = {default_locale: value}
87+
88+
for lang, tr in translations.items():
89+
localized = tr.gettext(value)
90+
if localized != value:
91+
result[lang] = localized
92+
93+
return result
94+
95+
96+
@lru_cache
97+
def load_translations(domain: str) -> Mapping[str, "gettext.NullTranslations"]:
98+
"""Load all available :mod:`pycountry` translations for `domain`."""
99+
from gettext import translation
100+
101+
from pycountry import LOCALES_DIR
102+
103+
result = {}
104+
for lang in map(lambda d: d.name, Path(LOCALES_DIR).iterdir()):
105+
try:
106+
result[lang] = translation(domain, LOCALES_DIR, languages=[lang])
107+
except FileNotFoundError:
108+
pass
109+
110+
return result

transport_data/tests/test_iso.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pytest
2+
3+
from transport_data.iso import get_cl_iso_3166_1
4+
5+
6+
@pytest.mark.parametrize(
7+
"id_field, entries",
8+
(
9+
("alpha_2", ("AW", "ZE")),
10+
("alpha_3", ("ABW", "ZWE")),
11+
("numeric", ("533", "716")),
12+
),
13+
)
14+
def test_get_cl_iso_3166_1(id_field, entries) -> None:
15+
result = get_cl_iso_3166_1(id_field=id_field)
16+
17+
# Result has the expected number of codes
18+
assert 249 == len(result)
19+
20+
# The name of Aruba is localized in 58 languages
21+
assert 58 == len(result[entries[0]].name.localizations)

0 commit comments

Comments
 (0)