From 96ff63382e0a3ed9c9ff5d322682a3a962c475b3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:26:55 +0000 Subject: [PATCH 1/4] Initial plan From a1c8721a84186bf873b5ee70b7aefaa428ac9513 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:37:30 +0000 Subject: [PATCH 2/4] Add dynamic Eurostat region mapping with fallback and tests Agent-Logs-Url: https://github.com/maxnutz/pypsa_validation_processing/sessions/2b02bd67-ebd8-44a4-83a0-79efdb83a7af Co-authored-by: maxnutz <81740567+maxnutz@users.noreply.github.com> --- pixi.toml | 2 +- pyproject.toml | 1 + pypsa_validation_processing/utils.py | 114 ++++++++++++++++++++++++--- tests/test_aggregation.py | 20 +++++ tests/test_network_processor.py | 56 +++++++++++++ tests/test_utils.py | 90 +++++++++++++++++---- 6 files changed, 258 insertions(+), 25 deletions(-) diff --git a/pixi.toml b/pixi.toml index 75b0e9d..999230a 100644 --- a/pixi.toml +++ b/pixi.toml @@ -23,5 +23,5 @@ requests = ">=2.20" pandas = ">=1.0.0" pyam-iamc = ">=2.0.0" nomenclature-iamc = ">=0.29" +eurostat = ">=1.1.1" pypsa-validation-processing = { path = ".", editable = true } - diff --git a/pyproject.toml b/pyproject.toml index ee08f65..bcc845b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "pypsa", "pyam-iamc", "nomenclature-iamc", + "eurostat", ] [project.urls] diff --git a/pypsa_validation_processing/utils.py b/pypsa_validation_processing/utils.py index f30b6b4..de422b2 100644 --- a/pypsa_validation_processing/utils.py +++ b/pypsa_validation_processing/utils.py @@ -1,6 +1,18 @@ """Static information and general utility functions for pypsa_validation_processing.""" -EU27_COUNTRY_CODES: dict[str, str] = { +from __future__ import annotations + +import re +import warnings +from functools import lru_cache + +try: + import eurostat +except ImportError: # pragma: no cover + eurostat = None + + +FALLBACK_COUNTRY_CODES: dict[str, str] = { "AL": "Albania", "AT": "Austria", "BA": "Bosnia and Herzegovina", @@ -16,8 +28,6 @@ "FI": "Finland", "FR": "France", "GB": "United Kingdom", - "GB0": "United Kingdom", - "GB1": "United Kingdom-Northern Ireland", "GR": "Greece", "HR": "Croatia", "HU": "Hungary", @@ -42,7 +52,10 @@ "EU": "European Union", } + COUNTRIES_SPECIAL_CASES: dict[str, str] = { + "GB0": "United Kingdom", + "GB1": "United Kingdom-Northern Ireland", "DE1": "Germany South-West", "DE2": "Germany South-East", "DE3": "Germany West", @@ -60,7 +73,7 @@ } -NUTS_2_REGIONS: dict[str, str] = { +FALLBACK_NUTS_2_REGIONS: dict[str, str] = { "AT11": "Burgenland", "AT12": "Lower Austria", "AT13": "Vienna", @@ -69,11 +82,10 @@ "AT31": "Upper Austria", "AT32": "Salzburg", "AT33": "Tyrol", - "AT333": "East Tyrol", "AT34": "Vorarlberg", } -NUTS_3_REGIONS: dict[str, str] = { +FALLBACK_NUTS_3_REGIONS: dict[str, str] = { "AT111": "Mittelburgenland", "AT112": "Nordburgenland", "AT113": "Südburgenland", @@ -112,10 +124,92 @@ "ATXXX": "Not regionalised/Unknown NUTS 3", "ATZZZ": "Extra-Regio NUTS 3", } -# NUTS 3 overwrites NUTS2, relevant because of language change of East Tyrol -> Osttirol -REGION_MAPPING = ( - EU27_COUNTRY_CODES | COUNTRIES_SPECIAL_CASES | NUTS_2_REGIONS | NUTS_3_REGIONS -) + + +def _warn_and_return_fallback( + name: str, fallback: dict[str, str], error: Exception | None = None +) -> dict[str, str]: + if error is not None: + warnings.warn( + f"Eurostat mapping fallback for {name}: {error}", + RuntimeWarning, + stacklevel=2, + ) + return dict(sorted(fallback.items())) + + +def _get_eurostat_geo_dic(dataset: str) -> dict[str, str]: + if eurostat is None: + raise RuntimeError("eurostat package is not installed") + geo_dic = eurostat.get_dic(dataset, par="geo", frmt="dict", lang="en") + if not isinstance(geo_dic, dict): + raise TypeError(f"Unexpected Eurostat payload for {dataset}: {type(geo_dic)!r}") + return geo_dic + + +def _filter_geo_codes( + geo_dic: dict[str, str], pattern: str, country_prefix: str | None = None +) -> dict[str, str]: + return dict( + sorted( + { + code: name + for code, name in geo_dic.items() + if isinstance(code, str) + and isinstance(name, str) + and re.match(pattern, code) + and (country_prefix is None or code.startswith(country_prefix)) + }.items() + ) + ) + + +@lru_cache(maxsize=1) +def get_country_mapping() -> dict[str, str]: + try: + mapping = _filter_geo_codes( + _get_eurostat_geo_dic("nama_10_gdp"), pattern=r"^[A-Z]{2}$" + ) + if not mapping: + return _warn_and_return_fallback("countries", FALLBACK_COUNTRY_CODES) + return mapping + except Exception as error: # pragma: no cover - exercised via tests/mocking + return _warn_and_return_fallback("countries", FALLBACK_COUNTRY_CODES, error) + + +@lru_cache(maxsize=4) +def get_nuts_mapping(level: int, country_prefix: str | None = None) -> dict[str, str]: + if level not in (2, 3): + raise ValueError("NUTS mapping level must be 2 or 3") + + dataset = "nama_10r_2gdp" if level == 2 else "nama_10r_3gdp" + pattern = r"^[A-Z]{2}[A-Z0-9]{2}$" if level == 2 else r"^[A-Z]{2}[A-Z0-9]{3}$" + fallback = FALLBACK_NUTS_2_REGIONS if level == 2 else FALLBACK_NUTS_3_REGIONS + + try: + mapping = _filter_geo_codes( + _get_eurostat_geo_dic(dataset), pattern=pattern, country_prefix=country_prefix + ) + if not mapping: + return _warn_and_return_fallback(f"nuts{level}", fallback) + return mapping + except Exception as error: # pragma: no cover - exercised via tests/mocking + return _warn_and_return_fallback(f"nuts{level}", fallback, error) + + +def create_region_mapping() -> dict[str, str]: + return ( + get_country_mapping() + | get_nuts_mapping(level=2) + | get_nuts_mapping(level=3) + | COUNTRIES_SPECIAL_CASES + ) + + +EU27_COUNTRY_CODES = get_country_mapping() +NUTS_2_REGIONS = get_nuts_mapping(level=2) +NUTS_3_REGIONS = get_nuts_mapping(level=3) +REGION_MAPPING = create_region_mapping() UNITS_MAPPING = { "MWh_el": "MWh", diff --git a/tests/test_aggregation.py b/tests/test_aggregation.py index 1bbe89a..d4f65af 100644 --- a/tests/test_aggregation.py +++ b/tests/test_aggregation.py @@ -317,6 +317,26 @@ def test_preserves_locations_when_all_countries_region(self, tmp_path: Path): iam_df = processor.structure_pyam_from_pandas(df) assert set(iam_df.region) == {"AT1", "DE1"} + def test_maps_region_codes_when_enabled_in_region_mode(self, tmp_path: Path): + processor = _make_processor( + tmp_path, + aggregation_level="region", + country="all", + map_country_codes_to_names=True, + ) + processor.common_dsd = None + idx = pd.MultiIndex.from_tuples( + [("Test|Var", "AT11", "MWh"), ("Test|Var", "DE11", "MWh")], + names=["variable", "location", "unit"], + ) + df = pd.DataFrame({2020: [100.0, 200.0]}, index=idx) + with patch( + "pypsa_validation_processing.class_definitions.REGION_MAPPING", + {"AT11": "Burgenland", "DE11": "Stuttgart"}, + ): + iam_df = processor.structure_pyam_from_pandas(df) + assert set(iam_df.region) == {"Burgenland", "Stuttgart"} + # --------------------------------------------------------------------------- # Tests for backward compatibility diff --git a/tests/test_network_processor.py b/tests/test_network_processor.py index bef4d0e..9c552fa 100644 --- a/tests/test_network_processor.py +++ b/tests/test_network_processor.py @@ -1106,6 +1106,62 @@ def test_structure_pyam_preserves_original_model_and_scenario_metadata( assert mock_iam.call_args.kwargs["model"] == " Model Name " assert mock_iam.call_args.kwargs["scenario"] == " Scenario\tName " + def test_structure_pyam_maps_location_when_enabled(self, tmp_path: Path): + processor = self._setup_processor(tmp_path) + processor.aggregation_level = "region" + processor.map_country_codes_to_names = True + processor.common_dsd = None + + df = pd.DataFrame( + {pd.Timestamp("2020-01-01"): [1000.0]}, + index=pd.MultiIndex.from_tuples( + [("Final Energy|Electricity", "AT11", "MWh_el")], + names=["variable", "location", "unit"], + ), + ) + + with patch( + "pypsa_validation_processing.class_definitions.REGION_MAPPING", + {"AT11": "Burgenland"}, + ): + with patch( + "pypsa_validation_processing.class_definitions.pyam.IamDataFrame" + ) as mock_iam: + mock_iam.return_value = MagicMock() + processor.structure_pyam_from_pandas(df) + + mapped_df = mock_iam.call_args.kwargs["data"] + assert mapped_df["location"].tolist() == ["Burgenland"] + + def test_structure_pyam_keeps_location_code_when_mapping_disabled( + self, tmp_path: Path + ): + processor = self._setup_processor(tmp_path) + processor.aggregation_level = "region" + processor.map_country_codes_to_names = False + processor.common_dsd = None + + df = pd.DataFrame( + {pd.Timestamp("2020-01-01"): [1000.0]}, + index=pd.MultiIndex.from_tuples( + [("Final Energy|Electricity", "AT11", "MWh_el")], + names=["variable", "location", "unit"], + ), + ) + + with patch( + "pypsa_validation_processing.class_definitions.REGION_MAPPING", + {"AT11": "Burgenland"}, + ): + with patch( + "pypsa_validation_processing.class_definitions.pyam.IamDataFrame" + ) as mock_iam: + mock_iam.return_value = MagicMock() + processor.structure_pyam_from_pandas(df) + + mapped_df = mock_iam.call_args.kwargs["data"] + assert mapped_df["location"].tolist() == ["AT11"] + # --------------------------------------------------------------------------- # Tests for file I/O and network loading diff --git a/tests/test_utils.py b/tests/test_utils.py index 1c12f96..3ed36ec 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,29 +1,91 @@ """Tests for pypsa_validation_processing.utils.""" +from __future__ import annotations + +from unittest.mock import patch + import pytest -from pypsa_validation_processing.utils import REGION_MAPPING +import pypsa_validation_processing.utils as utils + + +@pytest.fixture(autouse=True) +def _clear_mapping_caches(): + utils.get_country_mapping.cache_clear() + utils.get_nuts_mapping.cache_clear() + yield + utils.get_country_mapping.cache_clear() + utils.get_nuts_mapping.cache_clear() class TestRegionsCodes: - def test_is_dict(self): - assert isinstance(REGION_MAPPING, dict) + def test_region_mapping_is_dict_of_strings(self): + assert isinstance(utils.REGION_MAPPING, dict) + assert all(isinstance(key, str) for key in utils.REGION_MAPPING) + assert all(isinstance(value, str) for value in utils.REGION_MAPPING.values()) - def test_has_all_27_member_states(self): - # All 27 EU member state ISO codes must be present + def test_region_mapping_has_required_eu_member_state_codes(self): expected_codes = { "AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "ES", "FI", "FR", "GR", "HR", "HU", "IE", "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO", "SE", "SI", "SK", } - assert expected_codes.issubset(REGION_MAPPING.keys()) + assert expected_codes.issubset(utils.REGION_MAPPING) + + def test_region_mapping_contains_nuts2_and_nuts3_codes(self): + nuts2 = [code for code in utils.REGION_MAPPING if len(code) == 4 and code[:2].isalpha()] + nuts3 = [code for code in utils.REGION_MAPPING if len(code) == 5 and code[:2].isalpha()] + assert nuts2 + assert nuts3 + + +class TestEurostatMappingLoaders: + def test_create_region_mapping_success_path_uses_eurostat_data(self): + def _mock_geo(dataset: str) -> dict[str, str]: + if dataset == "nama_10_gdp": + return {"AT": "Austria", "DE": "Germany", "DE1": "invalid"} + if dataset == "nama_10r_2gdp": + return {"AT11": "Burgenland", "ATX": "invalid"} + if dataset == "nama_10r_3gdp": + return {"AT111": "Mittelburgenland", "AT11": "invalid"} + raise AssertionError(dataset) + + with patch.object(utils, "_get_eurostat_geo_dic", side_effect=_mock_geo): + region_mapping = utils.create_region_mapping() + + assert region_mapping["AT"] == "Austria" + assert region_mapping["AT11"] == "Burgenland" + assert region_mapping["AT111"] == "Mittelburgenland" + assert "DE1" in region_mapping # from special cases + assert region_mapping["DE1"] == utils.COUNTRIES_SPECIAL_CASES["DE1"] + + def test_create_region_mapping_falls_back_when_eurostat_fails(self): + with patch.object(utils, "_get_eurostat_geo_dic", side_effect=RuntimeError("boom")): + region_mapping = utils.create_region_mapping() + + assert region_mapping["AT"] == "Austria" + assert region_mapping["AT11"] == "Burgenland" + assert region_mapping["AT111"] == "Mittelburgenland" + + def test_create_region_mapping_partial_data_uses_fallback_for_missing_dataset(self): + def _partial_geo(dataset: str) -> dict[str, str]: + if dataset == "nama_10_gdp": + return {"AT": "Austria", "DE": "Germany"} + if dataset == "nama_10r_2gdp": + return {"AT11": "Burgenland"} + if dataset == "nama_10r_3gdp": + return {} + raise AssertionError(dataset) + + with patch.object(utils, "_get_eurostat_geo_dic", side_effect=_partial_geo): + region_mapping = utils.create_region_mapping() + + assert region_mapping["AT11"] == "Burgenland" + assert region_mapping["AT111"] == utils.FALLBACK_NUTS_3_REGIONS["AT111"] - def test_sample_mappings(self): - assert REGION_MAPPING["AT"] == "Austria" - assert REGION_MAPPING["DE"] == "Germany" - assert REGION_MAPPING["FR"] == "France" + def test_special_cases_override_other_mapping_sources(self): + with patch.object(utils, "get_country_mapping", return_value={"DE1": "Country DE1"}): + with patch.object(utils, "get_nuts_mapping", return_value={"DE1": "NUTS DE1"}): + region_mapping = utils.create_region_mapping() - def test_values_are_strings(self): - for key, value in REGION_MAPPING.items(): - assert isinstance(key, str), f"Key {key!r} is not a string" - assert isinstance(value, str), f"Value {value!r} for key {key!r} is not a string" + assert region_mapping["DE1"] == utils.COUNTRIES_SPECIAL_CASES["DE1"] From 5c0a3b5bfe212e6fb7df2a75c2c28ba036730b1a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:45:39 +0000 Subject: [PATCH 3/4] Polish Eurostat mapping tests and fallback compatibility notes Agent-Logs-Url: https://github.com/maxnutz/pypsa_validation_processing/sessions/2b02bd67-ebd8-44a4-83a0-79efdb83a7af Co-authored-by: maxnutz <81740567+maxnutz@users.noreply.github.com> --- pypsa_validation_processing/utils.py | 6 +++++- tests/test_utils.py | 23 ++++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pypsa_validation_processing/utils.py b/pypsa_validation_processing/utils.py index de422b2..b21e17a 100644 --- a/pypsa_validation_processing/utils.py +++ b/pypsa_validation_processing/utils.py @@ -82,6 +82,8 @@ "AT31": "Upper Austria", "AT32": "Salzburg", "AT33": "Tyrol", + # Keep historical model key for compatibility; fallback NUTS3 keeps the same key. + "AT333": "East Tyrol", "AT34": "Vorarlberg", } @@ -140,7 +142,9 @@ def _warn_and_return_fallback( def _get_eurostat_geo_dic(dataset: str) -> dict[str, str]: if eurostat is None: - raise RuntimeError("eurostat package is not installed") + raise RuntimeError( + "eurostat package is not available (install with: pip install eurostat)" + ) geo_dic = eurostat.get_dic(dataset, par="geo", frmt="dict", lang="en") if not isinstance(geo_dic, dict): raise TypeError(f"Unexpected Eurostat payload for {dataset}: {type(geo_dic)!r}") diff --git a/tests/test_utils.py b/tests/test_utils.py index 3ed36ec..5fd4211 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -33,15 +33,13 @@ def test_region_mapping_has_required_eu_member_state_codes(self): assert expected_codes.issubset(utils.REGION_MAPPING) def test_region_mapping_contains_nuts2_and_nuts3_codes(self): - nuts2 = [code for code in utils.REGION_MAPPING if len(code) == 4 and code[:2].isalpha()] - nuts3 = [code for code in utils.REGION_MAPPING if len(code) == 5 and code[:2].isalpha()] - assert nuts2 - assert nuts3 + assert "AT11" in utils.REGION_MAPPING + assert "AT111" in utils.REGION_MAPPING class TestEurostatMappingLoaders: def test_create_region_mapping_success_path_uses_eurostat_data(self): - def _mock_geo(dataset: str) -> dict[str, str]: + def _mock_eurostat_geo_dic(dataset: str) -> dict[str, str]: if dataset == "nama_10_gdp": return {"AT": "Austria", "DE": "Germany", "DE1": "invalid"} if dataset == "nama_10r_2gdp": @@ -50,13 +48,17 @@ def _mock_geo(dataset: str) -> dict[str, str]: return {"AT111": "Mittelburgenland", "AT11": "invalid"} raise AssertionError(dataset) - with patch.object(utils, "_get_eurostat_geo_dic", side_effect=_mock_geo): + with patch.object( + utils, "_get_eurostat_geo_dic", side_effect=_mock_eurostat_geo_dic + ): region_mapping = utils.create_region_mapping() assert region_mapping["AT"] == "Austria" assert region_mapping["AT11"] == "Burgenland" assert region_mapping["AT111"] == "Mittelburgenland" + assert "ATX" not in region_mapping assert "DE1" in region_mapping # from special cases + assert region_mapping["DE1"] != "invalid" assert region_mapping["DE1"] == utils.COUNTRIES_SPECIAL_CASES["DE1"] def test_create_region_mapping_falls_back_when_eurostat_fails(self): @@ -84,8 +86,15 @@ def _partial_geo(dataset: str) -> dict[str, str]: assert region_mapping["AT111"] == utils.FALLBACK_NUTS_3_REGIONS["AT111"] def test_special_cases_override_other_mapping_sources(self): + def _mock_nuts_mapping( + level: int, country_prefix: str | None = None + ) -> dict[str, str]: + assert country_prefix is None + assert level in (2, 3) + return {"DE1": f"NUTS{level} DE1"} + with patch.object(utils, "get_country_mapping", return_value={"DE1": "Country DE1"}): - with patch.object(utils, "get_nuts_mapping", return_value={"DE1": "NUTS DE1"}): + with patch.object(utils, "get_nuts_mapping", side_effect=_mock_nuts_mapping): region_mapping = utils.create_region_mapping() assert region_mapping["DE1"] == utils.COUNTRIES_SPECIAL_CASES["DE1"] From a611020ef73d6526840b2f636fdf24cf7a3fe89e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:49:07 +0000 Subject: [PATCH 4/4] Refine Eurostat fallback error handling and test assertions Agent-Logs-Url: https://github.com/maxnutz/pypsa_validation_processing/sessions/2b02bd67-ebd8-44a4-83a0-79efdb83a7af Co-authored-by: maxnutz <81740567+maxnutz@users.noreply.github.com> --- pypsa_validation_processing/utils.py | 26 +++++++++++++++++++++----- tests/test_utils.py | 1 - 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/pypsa_validation_processing/utils.py b/pypsa_validation_processing/utils.py index b21e17a..50b0561 100644 --- a/pypsa_validation_processing/utils.py +++ b/pypsa_validation_processing/utils.py @@ -8,8 +8,24 @@ try: import eurostat -except ImportError: # pragma: no cover +except Exception as error: # pragma: no cover + EUROSTAT_IMPORT_ERROR = error eurostat = None +else: + EUROSTAT_IMPORT_ERROR = None + +try: + from requests import RequestException +except Exception: # pragma: no cover + RequestException = RuntimeError + +EUROSTAT_MAPPING_ERRORS = ( + RuntimeError, + TypeError, + ValueError, + KeyError, + RequestException, +) FALLBACK_COUNTRY_CODES: dict[str, str] = { @@ -143,8 +159,8 @@ def _warn_and_return_fallback( def _get_eurostat_geo_dic(dataset: str) -> dict[str, str]: if eurostat is None: raise RuntimeError( - "eurostat package is not available (install with: pip install eurostat)" - ) + "eurostat package is not available (install with: pixi add eurostat)" + ) from EUROSTAT_IMPORT_ERROR geo_dic = eurostat.get_dic(dataset, par="geo", frmt="dict", lang="en") if not isinstance(geo_dic, dict): raise TypeError(f"Unexpected Eurostat payload for {dataset}: {type(geo_dic)!r}") @@ -177,7 +193,7 @@ def get_country_mapping() -> dict[str, str]: if not mapping: return _warn_and_return_fallback("countries", FALLBACK_COUNTRY_CODES) return mapping - except Exception as error: # pragma: no cover - exercised via tests/mocking + except EUROSTAT_MAPPING_ERRORS as error: # pragma: no cover - tests cover this path return _warn_and_return_fallback("countries", FALLBACK_COUNTRY_CODES, error) @@ -197,7 +213,7 @@ def get_nuts_mapping(level: int, country_prefix: str | None = None) -> dict[str, if not mapping: return _warn_and_return_fallback(f"nuts{level}", fallback) return mapping - except Exception as error: # pragma: no cover - exercised via tests/mocking + except EUROSTAT_MAPPING_ERRORS as error: # pragma: no cover - tests cover this path return _warn_and_return_fallback(f"nuts{level}", fallback, error) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5fd4211..8e5b722 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -58,7 +58,6 @@ def _mock_eurostat_geo_dic(dataset: str) -> dict[str, str]: assert region_mapping["AT111"] == "Mittelburgenland" assert "ATX" not in region_mapping assert "DE1" in region_mapping # from special cases - assert region_mapping["DE1"] != "invalid" assert region_mapping["DE1"] == utils.COUNTRIES_SPECIAL_CASES["DE1"] def test_create_region_mapping_falls_back_when_eurostat_fails(self):