Merge pull request #253 from iiasa/fix/material-data-lfs

khaeru · web-flow · commit 464e579d2f92 · 2024-11-26T11:30:27.000+01:00
Convert aluminum_trade.csv from Git LFS pointer
diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst
@@ -23,7 +23,7 @@ Next release
   - Reconfigure use and implementation of technology variants/modules to be more agnostic (:pull:`221`).
   - Change cost decay to reach reduction percentage specified on the year 2100 (:pull:`227`).
   - Add `cooling` technology variant/module (:pull:`222`).
-- Improve and extend :doc:`/material/index` (:pull:`218`).
+- Improve and extend :doc:`/material/index` (:pull:`218`, :pull:`253`).
 
   - Release of MESSAGEix-Materials 1.1.0 (:doc:`/material/v1.1.0`).
 
diff --git a/message_ix_models/data/material/aluminum/aluminum_trade.csv b/message_ix_models/data/material/aluminum/aluminum_trade.csv
diff --git a/message_ix_models/model/material/data_aluminum.py b/message_ix_models/model/material/data_aluminum.py
@@ -20,7 +20,7 @@
 
 def read_data_aluminum(
     scenario: message_ix.Scenario,
-) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
     """Read and clean data from :file:`aluminum_techno_economic.xlsx`.
 
     Parameters
@@ -42,10 +42,7 @@ def read_data_aluminum(
 
     fname = "aluminum_techno_economic.xlsx"
 
-    if "R12_CHN" in s_info.N:
-        sheet_n = "data_R12"
-    else:
-        sheet_n = "data_R11"
+    sheet_n = "data_R12" if "R12_CHN" in s_info.N else "data_R11"
 
     # Read the file
     data_alu = pd.read_excel(
@@ -564,7 +561,9 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     data_trade.loc[(data_trade["Region"] == "Europe"), "Value"] *= 0.7
     data_trade.loc[(data_trade["Region"] == "Europe"), "Region"] = "West Europe"
 
-    data_trade_eeu = data_trade[data_trade["Region"] == "West Europe"]
+    data_trade_eeu = data_trade.loc[data_trade["Region"] == "West Europe"].copy(
+        deep=True
+    )
     data_trade_eeu["Value"] *= 0.3 / 0.7
     data_trade_eeu["Region"] = "East Europe"
 
@@ -592,7 +591,7 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     data_trade.loc[(data_trade["Region"] == "Other Asia"), "Value"] *= 0.5
     data_trade.loc[(data_trade["Region"] == "Other Asia"), "Region"] = "South Asia"
 
-    data_trade_pas = data_trade[data_trade["Region"] == "South Asia"]
+    data_trade_pas = data_trade[data_trade["Region"] == "South Asia"].copy(deep=True)
     data_trade_pas["Region"] = "Other Pacific Asia"
 
     data_trade = pd.concat([data_trade, data_trade_pas])
@@ -602,7 +601,7 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     data_trade.loc[(data_trade["Region"] == "Other Producers"), "Value"] *= 0.5
     data_trade.loc[(data_trade["Region"] == "Other Producers"), "Region"] = "Africa"
 
-    data_trade_fsu = data_trade[data_trade["Region"] == "Africa"]
+    data_trade_fsu = data_trade[data_trade["Region"] == "Africa"].copy(deep=True)
     data_trade_fsu["Region"] = "Former Soviet Union"
 
     data_trade = pd.concat([data_trade, data_trade_fsu])
@@ -614,12 +613,8 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
 
     s_info = ScenarioInfo(scenario)
 
-    if "R12_CHN" in s_info.N:
-        region_tag = "R12_"
-        china_mapping = "R12_CHN"
-    else:
-        region_tag = "R11_"
-        china_mapping = "R11_CPA"
+    region_tag = "R12_" if "R12_CHN" in s_info.N else "R11_"
+    china_mapping = "R12_CHN" if "R12_CHN" in s_info.N else "R11_CPA"
 
     region_mapping = {
         "China": china_mapping,
@@ -647,7 +642,7 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     # For imports this corresponds to: USE|Inputs|Imports
 
     data_import = data_trade[data_trade["Variable"] == "USE|Inputs|Imports"]
-    data_import_hist = data_import[data_import["year_act"] <= 2015]
+    data_import_hist = data_import[data_import["year_act"] <= 2015].copy(deep=True)
     data_import_hist["technology"] = "import_aluminum"
     data_import_hist["mode"] = "M1"
     data_import_hist["time"] = "year"
@@ -658,7 +653,7 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     # For exports this corresponds to: MANUFACTURING|Outputs|Exports
 
     data_export = data_trade[data_trade["Variable"] == "MANUFACTURING|Outputs|Exports"]
-    data_export_hist = data_export[data_export["year_act"] <= 2015]
+    data_export_hist = data_export[data_export["year_act"] <= 2015].copy(deep=True)
     data_export_hist["technology"] = "export_aluminum"
     data_export_hist["mode"] = "M1"
     data_export_hist["time"] = "year"
@@ -714,7 +709,9 @@ def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]
     merged_df["value"] = merged_df["value_export"] - merged_df["value_import"]
 
     # Select relevant columns for the final DataFrame
-    bound_act_net_export_chn = merged_df[["node_loc", "year_act", "value"]]
+    bound_act_net_export_chn = merged_df[["node_loc", "year_act", "value"]].copy(
+        deep=True
+    )
 
     bound_act_net_export_chn["technology"] = "export_aluminum"
     bound_act_net_export_chn["mode"] = "M1"
diff --git a/message_ix_models/model/material/data_ammonia_new.py b/message_ix_models/model/material/data_ammonia_new.py
@@ -1,3 +1,5 @@
+from typing import Any, cast
+
 import message_ix
 import numpy as np
 import pandas as pd
@@ -160,10 +162,7 @@ def __missing__(self, key):
         conv_cost_df = pd.DataFrame()
         df = par_dict[p]
         for tec in tec_list:
-            if p == "inv_cost":
-                year_col = "year_vtg"
-            else:
-                year_col = "year_act"
+            year_col = "year_vtg" if p == "inv_cost" else "year_act"
 
             df_tecs = df[df["technology"] == tec]
             df_tecs = df_tecs.merge(cost_conv, left_on=year_col, right_index=True)
@@ -213,11 +212,11 @@ def gen_data_rel(scenario, dry_run=False, add_ccs: bool = True):
         df = par_dict[par_name]
         # remove "default" node name to broadcast with all scenario regions later
         df["node_rel"] = df["node_rel"].apply(lambda x: None if x == "all" else x)
-        df = df.to_dict()
-        df = make_df(par_name, **df)
+        df_dict = cast(dict[str, Any], df.to_dict())
+        df = make_df(par_name, **df_dict)
         # split df into df with default values and df with regionalized values
         df_all_regs = df[df["node_rel"].isna()]
-        df_single_regs = df[~df["node_rel"].isna()]
+        df_single_regs = df.copy(deep=True).loc[~df["node_rel"].isna()]
 
         # broadcast regions to default parameter values
         df_all_regs = df_all_regs.pipe(broadcast, node_rel=nodes)
@@ -234,7 +233,9 @@ def same_node_if_nan(df):
             df_single_regs["node_loc"] = df_single_regs["node_loc"].apply(
                 lambda x: None if x == "all" else x
             )
-            df_new_reg_all_regs = df_single_regs[df_single_regs["node_loc"].isna()]
+            df_new_reg_all_regs = df_single_regs.copy(deep=True).loc[
+                df_single_regs["node_loc"].isna()
+            ]
             df_new_reg_all_regs = df_new_reg_all_regs.pipe(broadcast, node_loc=nodes)
             df_single_regs = pd.concat(
                 [
@@ -466,13 +467,11 @@ def read_demand() -> dict[str, pd.DataFrame]:
     # Process the regional historical activities
 
     fs_GLO = feedshare_GLO.copy()
-    fs_GLO.insert(1, "bio_pct", 0)
-    fs_GLO.insert(2, "elec_pct", 0)
+    fs_GLO.insert(1, "bio_pct", 0.0)
+    fs_GLO.insert(2, "elec_pct", 0.0)
     # 17/14 NH3:N ratio, to get NH3 activity based on N demand
     # => No NH3 loss assumed during production
 
-    # FIXME: Name: elec_pct, dtype: float64 ' has dtype incompatible with int64,
-    #  please explicitly cast to a compatible dtype first.
     fs_GLO.iloc[:, 1:6] = input_fuel[5] * fs_GLO.iloc[:, 1:6]
     fs_GLO.insert(6, "NH3_to_N", 1)
 
diff --git a/message_ix_models/model/material/data_power_sector.py b/message_ix_models/model/material/data_power_sector.py
@@ -184,7 +184,7 @@ def read_material_intensities(
         for t in data_lca["technology"].unique():
             for c in data_lca["commodity"].unique():
                 for p in data_lca["phase"].unique():
-                    temp = data_lca.loc[
+                    temp = data_lca.copy(deep=True).loc[
                         (
                             (data_lca["node"] == n)
                             & (data_lca["technology"] == t)
@@ -501,6 +501,4 @@ def gen_data_power_sector(
     maybe_init_pars(scenario)
 
     # Concatenate to one data frame per parameter
-    results = {par_name: pd.concat(dfs) for par_name, dfs in results.items()}
-
-    return results
+    return {par_name: pd.concat(dfs) for par_name, dfs in results.items()}
diff --git a/message_ix_models/model/material/data_util.py b/message_ix_models/model/material/data_util.py
@@ -204,7 +204,7 @@ def modify_demand_and_hist_activity(scen: message_ix.Scenario) -> None:
         columns=["REGION", "SECTOR", "FUEL", "RYEAR", "UNIT_OUT", "RESULT"]
     )
     for r in df_spec["REGION"].unique():
-        df_spec_temp = df_spec.loc[df_spec["REGION"] == r]
+        df_spec_temp = df_spec[df_spec["REGION"] == r].copy(deep=True)
         df_spec_total_temp = df_spec_total.loc[df_spec_total["REGION"] == r]
         df_spec_temp.loc[:, "i_spec"] = (
             df_spec_temp.loc[:, "RESULT"]
@@ -269,7 +269,7 @@ def modify_demand_and_hist_activity(scen: message_ix.Scenario) -> None:
     )
 
     for r in df_therm["REGION"].unique():
-        df_therm_temp = df_therm.loc[df_therm["REGION"] == r]
+        df_therm_temp = df_therm.loc[df_therm["REGION"] == r].copy(deep=True)
         df_therm_total_temp = df_therm_total.loc[df_therm_total["REGION"] == r]
         df_therm_temp.loc[:, "i_therm"] = (
             df_therm_temp.loc[:, "RESULT"]
diff --git a/message_ix_models/model/material/material_demand/material_demand_calc.py b/message_ix_models/model/material/material_demand/material_demand_calc.py
@@ -134,27 +134,27 @@ def read_timer_gdp(
 
 
 def project_demand(df: pd.DataFrame, phi: float, mu: float):
-    df_demand = df.groupby("region", group_keys=False).apply(
+    df_demand = df.groupby("region", group_keys=False)[df.columns].apply(
         lambda group: group.assign(
             demand_pcap_base=group["demand.tot.base"].iloc[0]
             * giga
             / group["pop.mil"].iloc[0]
             / mega
         )
     )
-    df_demand = df_demand.groupby("region", group_keys=False).apply(
+    df_demand = df_demand.groupby("region", group_keys=False)[df_demand.columns].apply(
         lambda group: group.assign(
             gap_base=group["demand_pcap_base"].iloc[0] - group["demand_pcap0"].iloc[0]
         )
     )
-    df_demand = df_demand.groupby("region", group_keys=False).apply(
+    df_demand = df_demand.groupby("region", group_keys=False)[df_demand.columns].apply(
         lambda group: group.assign(
             demand_pcap=group["demand_pcap0"]
             + group["gap_base"] * gompertz(phi, mu, y=group["year"])
         )
     )
     df_demand = (
-        df_demand.groupby("region", group_keys=False)
+        df_demand.groupby("region", group_keys=False)[df_demand.columns]
         .apply(
             lambda group: group.assign(
                 demand_tot=group["demand_pcap"] * group["pop.mil"] * mega / giga
diff --git a/message_ix_models/model/material/util.py b/message_ix_models/model/material/util.py
@@ -64,11 +64,8 @@ def prepare_xlsx_for_explorer(filepath: str) -> None:
     """
     df = pd.read_excel(filepath)
 
-    def add_R12(str):
-        if len(str) < 5:
-            return "R12_" + str
-        else:
-            return str
+    def add_R12(str: str) -> str:
+        return "R12_" + str if len(str) < 5 else str
 
     df = df[~df["Region"].isna()]
     df["Region"] = df["Region"].map(add_R12)
@@ -97,7 +94,7 @@ def combine_df_dictionaries(*args: dict[str, pd.DataFrame]) -> dict:
     return comb_dict
 
 
-def read_yaml_file(file_path: str or Path) -> dict or None:
+def read_yaml_file(file_path: Union[str, Path]) -> Union[dict, None]:
     """
     Tries to read yaml file into a dict
 
@@ -119,7 +116,8 @@ def read_yaml_file(file_path: str or Path) -> dict or None:
             return None
 
 
-def invert_dictionary(original_dict: dict[str, list]) -> dict:
+# NOTE guessing the type hint here, but this seems unused anyway
+def invert_dictionary(original_dict: dict[str, list[str]]) -> dict[str, list[str]]:
     """
     Create inverted dictionary from existing dictionary, where values turn
     into keys and vice versa
@@ -134,7 +132,7 @@ def invert_dictionary(original_dict: dict[str, list]) -> dict:
     dict
 
     """
-    inverted_dict = {}
+    inverted_dict: dict[str, list[str]] = {}
     for key, value in original_dict.items():
         for array_element in value:
             if array_element not in inverted_dict:
@@ -196,7 +194,7 @@ def remove_from_list_if_exists(element: Any, _list: list) -> None:
         _list.remove(element)
 
 
-def exponential(x: float or list[float], b: float, m: float) -> float:
+def exponential(x: Union[float, list[float]], b: float, m: float) -> float:
     """
     Mathematical function used in Excels GROWTH function
 
@@ -300,7 +298,7 @@ def update_macro_calib_file(scenario: message_ix.Scenario, fname: str) -> None:
     df = scenario.var("COST_NODAL_NET", filters={"year": years_cost})
     df["node"] = pd.Categorical(df["node"], nodes)
     df = df[df["year"].isin(years_cost)].groupby(["node"]).apply(cost_fit)
-    ws = wb.get_sheet_by_name("cost_ref")
+    ws = wb["cost_ref"]
     # write derived values to sheet. Cell B7 (MEA region) is skipped.
     for i in range(2, 7):
         ws[f"B{i}"].value = df.values[i - 2]
@@ -316,28 +314,25 @@ def update_macro_calib_file(scenario: message_ix.Scenario, fname: str) -> None:
     df["node"] = pd.Categorical(df["node"], nodes)
     df["commodity"] = pd.Categorical(df["commodity"], comms)
     df = df.groupby(["node", "commodity"]).apply(price_fit)
-    ws = wb.get_sheet_by_name("price_ref")
+    ws = wb["price_ref"]
     for i in range(2, 62):
         ws[f"C{i}"].value = df.values[i - 2]
     wb.save(path)
 
 
 def get_ssp_from_context(context: Context) -> str:
-    """
-    Get selected SSP from context
+    """Get selected SSP from context
+
     Parameters
     ----------
     context: Context
+
     Returns
     -------
     str
         SSP label
     """
-    if "ssp" not in context:
-        ssp = "SSP2"
-    else:
-        ssp = context["ssp"]
-    return ssp
+    return "SSP2" if "ssp" not in context else context["ssp"]
 
 
 def maybe_remove_water_tec(scenario: message_ix.Scenario, results: dict) -> None:

Original file line number	Diff line number	Diff line change
`@@ -134,27 +134,27 @@ def read_timer_gdp(`
`134`	`134`
`135`	`135`
`136`	`136`	`def project_demand(df: pd.DataFrame, phi: float, mu: float):`
`137`		`- df_demand = df.groupby("region", group_keys=False).apply(`
	`137`	`+ df_demand = df.groupby("region", group_keys=False)[df.columns].apply(`
`138`	`138`	`lambda group: group.assign(`
`139`	`139`	`demand_pcap_base=group["demand.tot.base"].iloc[0]`
`140`	`140`	`* giga`
`141`	`141`	`/ group["pop.mil"].iloc[0]`
`142`	`142`	`/ mega`
`143`	`143`	`)`
`144`	`144`	`)`
`145`		`- df_demand = df_demand.groupby("region", group_keys=False).apply(`
	`145`	`+ df_demand = df_demand.groupby("region", group_keys=False)[df_demand.columns].apply(`
`146`	`146`	`lambda group: group.assign(`
`147`	`147`	`gap_base=group["demand_pcap_base"].iloc[0] - group["demand_pcap0"].iloc[0]`
`148`	`148`	`)`
`149`	`149`	`)`
`150`		`- df_demand = df_demand.groupby("region", group_keys=False).apply(`
	`150`	`+ df_demand = df_demand.groupby("region", group_keys=False)[df_demand.columns].apply(`
`151`	`151`	`lambda group: group.assign(`
`152`	`152`	`demand_pcap=group["demand_pcap0"]`
`153`	`153`	`+ group["gap_base"] * gompertz(phi, mu, y=group["year"])`
`154`	`154`	`)`
`155`	`155`	`)`
`156`	`156`	`df_demand = (`
`157`		`- df_demand.groupby("region", group_keys=False)`
	`157`	`+ df_demand.groupby("region", group_keys=False)[df_demand.columns]`
`158`	`158`	`.apply(`
`159`	`159`	`lambda group: group.assign(`
`160`	`160`	`demand_tot=group["demand_pcap"] * group["pop.mil"] * mega / giga`