OpenEnergyPlatform · jh-RLI · Mar 22, 2025 · Jan 27, 2025 · Jan 27, 2025 · Feb 19, 2025
diff --git a/README.rst b/README.rst
@@ -86,6 +86,9 @@ of the oemetadata-specification to help users stick with the latest enhancements
 To ease the conversion of oemetadata from any outdated version to the latest version, we provide a
 conversion functionality. The following example shows how to convert the oemetadata from v1.6 to v2.0.
 
+Starting form v2 we do not support conversions for patch versions. This means you can convert from v1.6 to v2.0 but not from v2.0.0 to v2.0.1.
+The oemetadata release procedure requires to only add breaking changes to major or minor version. Only these changes will require a conversion.
+
 CLI - oemetadata conversion::
 
     # Not implemented yet
@@ -112,7 +115,7 @@ Module usage - In python scripts you can use the conversion::
     meta = read_json_file(file_path)
 
     # use omi to convert it to the latest release
-    converted = convert_metadata(meta, "OEMetadata-2.0.1")
+    converted = convert_metadata(meta, "OEMetadata-2.0")
 
     # now you can store the result as json file
     with open("result.json", "w", encoding="utf-8") as json_file:
@@ -129,7 +132,7 @@ two arguments the first one is the metadata and the second optional one is the s
 the validation will try to get the matching schema for the current metadata.
 
 
-CLI - oemetadata conversion::
+CLI - oemetadata validation::
 
     # Not implemented yet
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/src/omi/base.py b/src/omi/base.py
@@ -4,15 +4,17 @@
 
 import json
 import pathlib
+import re
 from dataclasses import dataclass
 
 import requests
-from metadata import v20, v152, v160
+from oemetadata.v1 import v152, v160
+from oemetadata.v2 import v20
 
 from .settings import OEP_URL
 
 # Order matters! First entry equals latest version of metadata format
-METADATA_FORMATS = {"OEP": ["OEMetadata-2.0.1", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []}
+METADATA_FORMATS = {"OEP": ["OEMetadata-2.0", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []}
 METADATA_VERSIONS = {version: md_format for md_format, versions in METADATA_FORMATS.items() for version in versions}
 
 
@@ -70,13 +72,28 @@ def get_metadata_version(metadata: dict) -> str:
     """
     # For OEP metadata
     try:
-        return metadata["metaMetadata"]["metadataVersion"]
+        return __normalize_metadata_version(metadata["metaMetadata"]["metadataVersion"])
     except KeyError:
         pass
     msg = "Could not extract metadata version from metadata."
     raise MetadataError(msg)
 
 
+def __normalize_metadata_version(version: str) -> str:
+    """
+    Normalize a metadata version string by stripping patch numbers.
+
+    For example, "OEMetadata-2.0.4" becomes "OEMetadata-2.0".
+    """
+    if not isinstance(version, str):
+        raise MetadataError(f"Metadata version must be a string, not {type(version)}.")
+    # This regex captures "OEMetadata-2.0" from "OEMetadata-2.0.4" or similar
+    m = re.match(r"^(OEMetadata-2\.\d+)(?:\.\d+)?$", version)
+    if m:
+        return m.group(1)
+    return version
+
+
 def get_latest_metadata_version(metadata_format: str) -> str:
     """
     Return the latest metadata version of a given metadata format.
@@ -148,7 +165,7 @@ def __get_metadata_specs_for_oep(metadata_version: str) -> MetadataSpecification
     MetadataSpecification
         Metadata schema for given metadata version including template and example.
     """
-    metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0.1": v20}
+    metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0": v20}
     metadata_module = metadata_modules[metadata_version]
     module_path = pathlib.Path(metadata_module.__file__).parent
     specs = {}

diff --git a/src/omi/conversion.py b/src/omi/conversion.py
@@ -4,7 +4,9 @@
 
 from copy import deepcopy
 
-from omi.base import get_metadata_specification, get_metadata_version
+from omi.base import get_metadata_version
+from omi.conversions.v152_to_v160 import convert_oep_152_to_160
+from omi.conversions.v160_to_v20 import convert_oep_160_to_20
 
 
 class ConversionError(Exception):
@@ -77,181 +79,7 @@ def get_chain(current_version: str) -> list[str] | None:
     raise ConversionError(f"No conversion chain found from {source_version} to {target_version}.")
 
 
-def __convert_oep_152_to_160(metadata: dict) -> dict:
-    """
-    Convert metadata with version "OEP-1.5.2" to "OEP-1.6.0".
-
-    Parameters
-    ----------
-    metadata: dict
-        Metadata
-
-    Returns
-    -------
-    dict
-        Updated metadata
-    """
-    # No changes in metadata fields
-    metadata["metaMetadata"]["metadataVersion"] = "OEP-1.6.0"
-    return metadata
-
-
-def __convert_oep_160_to_200(metadata: dict) -> dict:
-    """
-    Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.1" using the v2.0 template.
-
-    Parameters
-    ----------
-    metadata: dict
-        Metadata dictionary in v1.6 format
-
-    Returns
-    -------
-    dict
-        Updated metadata dictionary in v2.0 format
-    """
-    metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.1").template)
-    metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = metadata_v2["description"] = None
-
-    # Populate metadata v2 resources
-    for i, resource in enumerate(metadata.get("resources", [])):
-        resource_v2 = ___v2_ensure_resource_entry(metadata_v2, i)
-        ___v2_populate_resource_v2(resource_v2, metadata, resource)
-
-    # Update metaMetadata section
-    metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.1"
-    metadata_v2["metaMetadata"]["metadataLicense"] = metadata.get("metaMetadata", {}).get("metadataLicense")
-
-    return metadata_v2
-
-
-def ___v2_ensure_resource_entry(metadata_v2: dict, index: int) -> dict:
-    """Ensure a resource entry exists in metadata_v2 resources for the given index."""
-    if index >= len(metadata_v2["resources"]):
-        metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0]))
-    return metadata_v2["resources"][index]
-
-
-def ___v2_populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None:
-    """Populate resource_v2 fields based on metadata and resource from v1.6."""
-    # Bulk update keys without
-    resource_v2.update(
-        {
-            "@id": metadata.get("@id"),
-            "@context": metadata.get("@context"),
-            "name": resource.get("name").split(".")[1],
-            "topics": [resource.get("name", "").split(".")[0]],
-            "title": metadata.get("title"),
-            "path": metadata.get("id"),
-            "description": metadata.get("description"),
-            "languages": metadata.get("language", []),
-            "subject": metadata.get("subject", []),
-            "keywords": metadata.get("keywords", []),
-            "publicationDate": metadata.get("publicationDate"),
-            "context": metadata.get("context", {}),
-            "temporal": metadata.get("temporal", {}),
-            "type": None,
-            "format": resource.get("format"),
-            "encoding": resource.get("encoding"),
-            "schema": {
-                "fields": resource.get("schema", {}).get("fields", []),
-                "primaryKey": resource.get("schema", {}).get("primaryKey", []),
-                "foreignKeys": resource.get("schema", {}).get("foreignKeys", []),
-            },
-            "dialect": resource.get("dialect", {}),
-            "review": metadata.get("review", {}),
-        },
-    )
-
-    resource_v2["context"]["publisher"] = None
-
-    resource_v2["embargoPeriod"]["start"] = None
-    resource_v2["embargoPeriod"]["end"] = None
-
-    # Set to null to avoid validation errors: URI
-    resource_v2["spatial"]["location"]["@id"] = None
-    resource_v2["spatial"]["location"]["address"] = metadata.get("spatial", {}).get("location")
-    resource_v2["spatial"]["location"]["latitude"] = None
-    resource_v2["spatial"]["location"]["longitude"] = None
-    # Set to null to avoid validation errors: URI
-    resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent")
-    resource_v2["spatial"]["extent"]["@id"] = None
-    resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = (
-        metadata.get("spatial", {}).get("resolution", "").split(" ", 1)
-    )
-    resource_v2["spatial"]["extent"]["crs"] = None
-
-    ___v2_populate_sources(resource_v2, metadata.get("sources", []))
-    ___v2_populate_contributors(resource_v2, metadata.get("contributors", []))
-    ___v2_populate_licenses(resource_v2, metadata.get("licenses", []))
-    ___v2_populate_schema_fields(resource_v2, resource)
-
-
-def ___v2_populate_sources(resource_v2: dict, sources: list) -> None:
-    """Populate sources in resource_v2 from sources in v1.6."""
-    for i_source, source in enumerate(sources):
-        if i_source >= len(resource_v2["sources"]):
-            resource_v2["sources"].append(deepcopy(resource_v2["sources"][0]))
-        source_v2 = resource_v2["sources"][i_source]
-        source_v2.update(
-            {
-                "title": source.get("title"),
-                "description": source.get("description"),
-                "path": source.get("path"),
-                "publicationYear": None,
-                "authors": [],
-            },
-        )
-        ___v2_populate_source_licenses(source_v2, source.get("licenses", []))
-
-
-def ___v2_populate_source_licenses(source_v2: dict, licenses: list) -> None:
-    """Populate licenses in source_v2 from licenses in v1.6."""
-    for i_license, license_entry in enumerate(licenses):
-        if i_license >= len(source_v2["licenses"]):
-            source_v2["licenses"].append(deepcopy(source_v2["licenses"][0]))
-        source_v2["licenses"][i_license].update(license_entry)
-        source_v2["licenses"][i_license]["copyrightStatement"] = None
-
-
-def ___v2_populate_contributors(resource_v2: dict, contributors: list) -> None:
-    """Populate contributors in resource_v2 from contributors in v1.6."""
-    for i_contribution, contributor in enumerate(contributors):
-        if i_contribution >= len(resource_v2["contributors"]):
-            resource_v2["contributors"].append(deepcopy(resource_v2["contributors"][0]))
-        contributor_v2 = resource_v2["contributors"][i_contribution]
-        contributor_v2.update(
-            {
-                "title": contributor.get("title"),
-                "path": contributor.get("path"),
-                "organization": contributor.get("organization"),
-                "date": contributor.get("date"),
-                "object": contributor.get("object"),
-                "comment": contributor.get("comment"),
-            },
-        )
-
-
-def ___v2_populate_licenses(resource_v2: dict, licenses: list) -> None:
-    """Populate licenses in resource_v2 from licenses in v1.6."""
-    for i_license, license_entry in enumerate(licenses):
-        if i_license >= len(resource_v2["licenses"]):
-            resource_v2["licenses"].append(deepcopy(resource_v2["licenses"][0]))
-        resource_v2["licenses"][i_license].update(license_entry)
-        resource_v2["licenses"][i_license]["copyrightStatement"] = None
-
-
-def ___v2_populate_schema_fields(resource_v2: dict, resource: dict) -> None:
-    """Populate schema fields in resource_v2 from resource in v1.6."""
-    for i_field, field in enumerate(resource.get("schema", {}).get("fields", [])):
-        if i_field >= len(resource_v2["schema"]["fields"]):
-            resource_v2["schema"]["fields"].append(deepcopy(resource_v2["schema"]["fields"][0]))
-        schema_field_v2 = resource_v2["schema"]["fields"][i_field]
-        schema_field_v2.update(field)
-        schema_field_v2["nullable"] = None
-
-
 METADATA_CONVERSIONS = {
-    ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160,
-    ("OEP-1.6.0", "OEMetadata-2.0.1"): __convert_oep_160_to_200,
+    ("OEP-1.5.2", "OEP-1.6.0"): convert_oep_152_to_160,
+    ("OEP-1.6.0", "OEMetadata-2.0"): convert_oep_160_to_20,
 }
diff --git a/src/omi/conversions/README.md b/src/omi/conversions/README.md
@@ -0,0 +1,5 @@
+# Conversions
+
+This module is used to collect all existing OEMetaData version conversions. Each step in the conversion chain is stored in its own sub module. OMI supports the OEMetaData starting from v1.5.2 previous version are only supported by omi version > v1.0.0.
+
+Since OEMetaData version 2 we decided to use patch versions to only update content or documentation parts of the metadata specification. Therefore OMI will only implement conversion steps for minor versions since they will include all minor structural changes like changing JSON key names or adding new key:value pairs. More substantial changes to the JSON structure will be reflected in a major version change this would include changing the nested structure of the metadata.
diff --git a/src/omi/conversions/__init__.py b/src/omi/conversions/__init__.py
@@ -0,0 +1 @@
+__version__ = "1.0.0"
diff --git a/src/omi/conversions/utils.py b/src/omi/conversions/utils.py
@@ -0,0 +1,89 @@
+"""Utility functions for data conversion."""
+
+import re
+
+
+def find_temporal_resolution_value_and_unit(resolution: str) -> tuple[str, str]:
+    """
+    Find temporal resolution value and unit from a resolution string.
+
+    For temporal resolution, if the string starts with a number, this function will extract the number
+    as the value and any following alphabetical characters as the unit. If no leading numeric value is found,
+    the whole string is treated as a descriptive resolution with an empty unit.
+
+    Possible formats:
+      - "yearly"
+      - "hourly"
+      - "1 h"
+      - "5 years"
+      - "1h"
+
+    Parameters
+    ----------
+    resolution: str
+        Temporal resolution string.
+
+    Returns
+    -------
+    tuple[str, str]
+        Temporal resolution value and unit.
+    """
+    # Try matching a number (with optional decimals) and an optional unit, allowing for spaces in between.
+    match = re.match(r"^\s*(\d+(?:\.\d+)?)(?:\s*([a-zA-Z]+))?\s*$", resolution)
+    if match:
+        value = match.group(1)
+        unit = match.group(2) if match.group(2) is not None else ""
+        return value, unit
+
+    # If no numeric pattern is detected, return the entire trimmed string as the value.
+    return resolution.strip(), ""
+
+
+def find_spatial_resolution_value_and_unit(resolution: str) -> tuple[str, str]:
+    """
+    Find spatial resolution value and unit from a resolution string.
+
+    For spatial resolution, this function attempts to extract a numeric value with a 'm' (meters) unit,
+    as in "100 m" or even when embedded in a longer string like "vector, 10 m". If such a pattern is found,
+    the numeric part is returned as the value and the unit is set to "m". Otherwise, the entire string
+    is returned as a descriptive resolution (value) with an empty unit.
+
+    Possible formats:
+      - "vector, 10 m"
+      - "100 m"
+      - "Germany"
+      - "NUTS-0"
+      - "MVGD"
+      - "Regionale Planungsgemeinschaften und Berlin"
+      - "national"
+      - "country"
+
+    Parameters
+    ----------
+    resolution: str
+        Spatial resolution string.
+
+    Returns
+    -------
+    tuple[str, str]
+        Spatial resolution value and unit (unit is expected to be 'm' when a numeric resolution is provided).
+    """
+    # Search for a numeric value followed by optional whitespace and an 'm' unit (case-insensitive).
+    match = re.search(r"(\d+(?:\.\d+)?)\s*m\b", resolution, re.IGNORECASE)
+    if match:
+        value = match.group(1)
+        unit = "m"
+        return value, unit
+
+    # If no numeric pattern is detected, return the entire trimmed string as the value.
+    return resolution.strip(), ""
+
+
+license_cc_by_4 = {
+    "name": "CC-BY-4.0",
+    "title": "Creative Commons Attribution 4.0 International",
+    "path": "https://creativecommons.org/licenses/by/4.0/legalcode",
+    "instruction": "You are free to share and adapt, but you must attribute and cant add additional restrictions. See https://creativecommons.org/licenses/by/4.0/deed.en for further information.",  # noqa: E501
+    "attribution": "",
+    "copyrightStatement": "",
+}