Migrate message_data.tools.messagev

khaeru · khaeru · commit 828cd22cef41 · 2024-07-30T12:22:46.000+02:00
diff --git a/doc/api/tools-messagev.rst b/doc/api/tools-messagev.rst
@@ -0,0 +1,54 @@
+MESSAGE V input files
+*********************
+
+This document describes some of the file formats for the pre-:mod:`.ixmp` MESSAGE model, a.k.a. **MESSAGE V**, and code in :mod:`message_ix_models.tools.messagev` that reads these formats.
+
+.. note:: See also the earlier :doc:`import_from_msgV_sqlite` for similar code/descriptions.
+
+.. contents::
+   :local:
+
+``easemps3.free``: soft dynamic constraints
+-------------------------------------------
+
+Each constraint is specified by a single row with the following, space-separated entries:
+
+1. **Constraint type.** `mpa` (constraint on activity) or `mpc` (constraint on capacity).
+2. **Technology name.** Four-letter internal name/code of a technology, e.g. `uHap`.
+3. **Lower/upper bound.** Either `LO` (decline constraint) or `UP` (growth constraint).
+4. **Cost type.** One of:
+
+   - `lev`: levelized costs.
+   - `abs`: absolute costs.
+   - `var`: variable costs.
+
+5. **Growth rate for step 1.** Percentage points of growth/decline at which the constraint becomes active.
+
+6. **Additional cost for step 1.** Additional cost applied to activity/capacity growth or decline beyond the rate in #5. Depending on #4, specified:
+
+   - `lev`: in percentage points of the levelized cost of the technology.
+   - `abs`, `var`: in absolute monetary value.
+
+7. **Up to 4 additional pairs of 5 and 6.** Growth rates for successive constraints are cumulative.
+
+An example:
+
+.. code::
+
+   mpa uEAp UP lev 5 50 15 300000
+
+Here the constraint relates to a growth constraint (UP) for activities (mpa) and the technology for which the constraint is to be extended is uEAp.
+The allowed rate of growth is increased by 5 %-points and each additional unit of output that can be produced costs 50 % of the levelized costs additional on top of the normal costs (i.e. the costs that result from building and using the additional capacity required for the additional production).
+
+The second step increases the maximum growth rate further, by 15 %-points, but the costs are prohibitive (300000).
+
+Soft constraints can be set for each technology individually. This can be done globally ("regions = all -glb") or for each region separately ("regions = cpa").
+
+
+API reference
+-------------
+
+.. currentmodule:: message_ix_models.tools.messagev
+
+.. automodule:: message_ix_models.tools.messagev
+   :members:
diff --git a/doc/index.rst b/doc/index.rst
@@ -69,6 +69,7 @@ Commonly used classes may be imported directly from :mod:`message_ix_models`.
    api/report/index
    api/tools
    api/tools-costs
+   api/tools-messagev
    api/data-sources
    api/util
    api/testing
diff --git a/message_ix_models/tools/messagev.py b/message_ix_models/tools/messagev.py
@@ -0,0 +1,194 @@
+"""Tools for extracting data from MESSAGE V."""
+
+import re
+from functools import lru_cache
+
+import numpy as np
+import pandas as pd
+
+
+class CHNFile:
+    """Reader for MESSAGE V ``.chn`` files."""
+
+    index = {}
+
+    # FIXME reduce complexity from 15 to ≤14
+    def __init__(self, path):  # noqa: C901
+        """Parse .chn file."""
+
+        def _depth(str):
+            return (len(str.replace("\t", "    ")) - len(str.lstrip())) // 2
+
+        stack = []
+        self.data = {}
+        for line in open(path):
+            if line.startswith("#"):  # Comment
+                pass
+            elif len(stack) == 0:  # New level
+                name, level = line.split()
+                stack.append((name, level.rstrip(":")))
+            elif len(stack) == 1:  # New commodity
+                stack.append(tuple(line.split()))
+            elif len(stack) == 2:
+                if _depth(line) == 0 and line.strip() == "*":  # End of level
+                    stack = []
+                elif _depth(line) == 1:  # New commodity
+                    stack[-1] = tuple(line.split())
+                else:
+                    p_c = line.strip().rstrip(":")
+                    if p_c in ("Producers", "Consumers"):  # Start of P/C block
+                        stack.append(p_c)
+                        pc_data = []
+                    elif p_c == "*":  # Consecutive '*'
+                        pass
+            elif len(stack) == 3:
+                if _depth(line) == 2 and line.strip() == "*":  # End of block
+                    # Store data
+                    if len(pc_data):
+                        key = tuple([stack[0][0], stack[1][0], stack[2]])
+                        self.data[key] = pc_data
+                    stack.pop(-1)
+                else:  # Data line
+                    # TODO parse: tec, level, code, commodity, {ts,c}, [data]
+                    pc_data.append(line.split())
+            elif line == "*\n":
+                stack.pop(-1)
+
+
+class DICFile:
+    """Reader for MESSAGE V ``.dic`` files."""
+
+    tec_code = {}
+    code_tec = {}
+
+    def __init__(self, path=None):
+        if path is None:
+            return
+
+        for line in open(path):
+            if line.startswith("#"):
+                continue
+
+            tec, code = line.split()
+            self.tec_code[tec] = code
+            self.code_tec[code] = tec
+
+    def __getitem__(self, key):
+        try:
+            return self.code_tec[key]
+        except KeyError:
+            return self.tec_code[key]
+
+
+class INPFile:
+    """Reader for MESSAGE V ``.inp`` files."""
+
+    index = {}
+    file = None
+    years_re = re.compile(r"^timesteps:(( \d*)*)", re.MULTILINE)
+
+    def __init__(self, path):
+        self.file = open(path)
+
+        # Index the file
+        section = "_info"
+        pos = self.file.tell()
+        while True:
+            line = self.file.readline()
+            if line == "":
+                break
+            elif line == "*\n":
+                self.index[section] = (pos, self.file.tell() - pos)
+                section = None
+                pos = self.file.tell()
+            elif section is None:
+                section = line.split()[0]
+
+    def get_section(self, name):
+        start, len = self.index[name]
+        self.file.seek(start)
+        return self.file.read(len)
+
+    @lru_cache(1)
+    def get_years(self):
+        """Return timesteps."""
+        sec = self.get_section("_info")
+        match = self.years_re.search(sec)
+        return list(map(int, match.groups()[0].strip().split()))
+
+    params_with_source = "con1a con1c con2a inp minp moutp"
+    ts_params = "ctime fom inv plf pll vom" + params_with_source
+    scalar_params = {
+        "annualize": int,
+        "display": str,
+        "fyear": int,
+        "lyear": int,
+        "hisc": float,
+        "minp": float,
+    }
+
+    def const_or_ts(self, line):
+        param = line.pop(0)
+        source = line.pop(0) if param in self.params_with_source else None
+
+        if param == "minp":
+            line = ["c" if len(line) == 1 else "ts"] + line
+
+        kind = line.pop(0)
+        if kind == "ts":
+            elem = list(zip(self.get_years(), line))
+        elif kind == "c":
+            assert len(line) == 1
+
+            # # This line implements a fill-forward:
+            # elem = [(year, line[0]) for year in self.get_years()]
+
+            # Single element
+            elem = [(self.get_years()[0], line[0])]
+        else:
+            raise ValueError(param, source, kind, line)
+
+        # 'free' is a special value for bounds/constraints
+        df = (
+            pd.DataFrame(elem, columns=["year", "value"])
+            .replace("free", np.nan)
+            .astype({"value": float})
+        )
+
+        # Add parameter name and source
+        df["param"] = param
+        df["source"] = source
+
+        return df
+
+    def parse_section(self, name):
+        result = {}
+        params = []
+
+        # Parse each line
+        for line in map(str.split, self.get_section(name).split("\n")):
+            if line in ([], ["*"]) or line[0].startswith("#"):
+                # End of section, comment, or blank line
+                continue
+
+            param = line[0]
+            if param == name:
+                # Start of the section
+                result["extra"] = line[1:]
+            elif param in "bda bdc":
+                result["type"] = line.pop(1)  # 'lo' or 'hi'
+                params.append(self.const_or_ts(line))
+            # elif param in 'mpa mpc':
+            #     # TODO implement this
+            #     continue
+            elif param in self.ts_params:
+                params.append(self.const_or_ts(line))
+            elif param in self.scalar_params:
+                assert len(line) == 2, line
+                result[param] = self.scalar_params[param](line[1])
+
+        # Concatenate accumulated params to a single DataFrame
+        if len(params):
+            result["params"] = pd.concat(params, sort=False)
+
+        return result