From 665a14bb33250614029e30ab6021396313f490cf Mon Sep 17 00:00:00 2001 From: Jakob Duerrwaechter Date: Thu, 19 Dec 2024 12:14:59 +0100 Subject: [PATCH] remove scalar data reader, enable 0d-ndas instead --- sodym/data_reader.py | 84 ----------------------------------- sodym/df_to_nda.py | 5 ++- sodym/dimensions.py | 4 ++ sodym/mfa_definition.py | 1 - sodym/mfa_system.py | 22 +-------- sodym/named_dim_arrays.py | 22 ++++++--- tests/test_reader.py | 93 ++------------------------------------- 7 files changed, 28 insertions(+), 203 deletions(-) diff --git a/sodym/data_reader.py b/sodym/data_reader.py index 1e1c003..df98ee8 100644 --- a/sodym/data_reader.py +++ b/sodym/data_reader.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -import numpy as np import pandas as pd from typing import List, Dict import yaml @@ -22,10 +21,6 @@ def read_dimensions(self, dimension_definitions: List[DimensionDefinition]) -> D def read_dimension(self, dimension_definition: DimensionDefinition) -> Dimension: pass - def read_scalar_data(self, parameters: List[str]) -> dict: - """Optional addition method if additional scalar parameters are required.""" - pass - @abstractmethod def read_parameter_values(self, parameter_name: str, dims: DimensionSet) -> Parameter: pass @@ -174,97 +169,18 @@ def read_parameter_values(self, parameter_name: str, dims): return Parameter.from_df(dims=dims, name=parameter_name, df=data) -class ScalarDataReader(ABC): - def read_scalar_data(self, parameters: List[str], **read_scalar_kwargs) -> dict: - """Optional addition method if additional scalar parameters are required.""" - raise NotImplementedError("No scalar data reader specified.") - - -class EmptyScalarDataReader(ScalarDataReader): - - def read_scalar_data(self, parameters: List[str]): - if parameters: - raise ValueError("List of scalar parameters is not empty, but no real reader for scalar data is specified.") - return {} - - -class CheckedScalarDataReader(ScalarDataReader): - """Parent class for scalar data readers that ensures that the read data and requested parameters - match. - """ - - def __init__(self, scalar_file: str = None, **read_scalar_kwargs): - self.scalar_file = scalar_file - self.read_scalar_kwargs = read_scalar_kwargs - - def read_scalar_data(self, parameters: List[str]): - if not parameters: - return {} - if self.scalar_file is None: - raise ValueError("No scalar data file specified.") - data = self._get_dict() - if not set(parameters) == set(data.keys()): - raise ValueError( - f"Parameter names in yaml file do not match requested parameters. Unexpected parameters: {set(data.keys()) - set(parameters)}; Missing parameters: {set(parameters) - set(data.keys())}." - ) - return data - - def add_pd_reader_defaults(self): - if "header" not in self.read_scalar_kwargs: - self.read_scalar_kwargs["header"] = None - if "index_col" not in self.read_scalar_kwargs: - self.read_scalar_kwargs["index_col"] = 0 - - def _get_dict(self) -> dict: - raise NotImplementedError(f"Method not implemented. Choose a subclass of {self.__class__.__name__}.") - - -class YamlScalarDataReader(CheckedScalarDataReader): - - def _get_dict(self) -> dict: - with open(self.scalar_file, "r") as stream: - return yaml.safe_load(stream) - - -class CSVScalarDataReader(CheckedScalarDataReader): - - def _get_dict(self) -> dict: - self.add_pd_reader_defaults() - df = pd.read_csv(self.scalar_file, **self.read_scalar_kwargs) - dict = df.to_dict()[df.columns[0]] - return dict - - -class ExcelScalarDataReader(CheckedScalarDataReader): - - def __init__(self, scalar_file: str = None, scalar_sheet: str = None, **read_scalar_kwargs): - self.sheet_name = scalar_sheet - super().__init__(scalar_file, **read_scalar_kwargs) - - def _get_dict(self) -> dict: - self.add_pd_reader_defaults() - df= pd.read_excel(self.scalar_file, sheet_name=self.sheet_name, **self.read_scalar_kwargs) - dict = df.to_dict()[df.columns[0]] - return dict - - class CompoundDataReader(DataReader): def __init__( self, dimension_reader: DimensionReader, parameter_reader: ParameterReader, - scalar_data_reader: ScalarDataReader = EmptyScalarDataReader(), ): self.dimension_reader = dimension_reader self.parameter_reader = parameter_reader - self.scalar_data_reader = scalar_data_reader def read_dimension(self, dimension_definition: DimensionDefinition) -> Dimension: return self.dimension_reader.read_dimension(dimension_definition) def read_parameter_values(self, parameter_name: str, dims: DimensionSet) -> Parameter: return self.parameter_reader.read_parameter_values(parameter_name, dims) - - def read_scalar_data(self, parameters: List[str]) -> dict: - return self.scalar_data_reader.read_scalar_data(parameters) diff --git a/sodym/df_to_nda.py b/sodym/df_to_nda.py index 98b27d2..7903199 100644 --- a/sodym/df_to_nda.py +++ b/sodym/df_to_nda.py @@ -202,7 +202,10 @@ def _sort_df(self): def _check_data_complete(self): # Generate expected index tuples from NamedDimArray dimensions - expected_index_tuples = set(itertools.product(*(dim.items for dim in self.nda.dims))) + if self.nda.dims.ndim == 0: + expected_index_tuples = set() + else: + expected_index_tuples = set(itertools.product(*(dim.items for dim in self.nda.dims))) # Generate actual index tuples from DataFrame columns actual_index_tuples = set(self.df.drop(columns=self.format.value_column).itertuples(index=False, name=None)) diff --git a/sodym/dimensions.py b/sodym/dimensions.py index b2a35cf..655a76e 100644 --- a/sodym/dimensions.py +++ b/sodym/dimensions.py @@ -150,6 +150,10 @@ def shape(self, keys: tuple = None): keys = keys if keys else self.letters return tuple(self.size(key) for key in keys) + @property + def ndim(self): + return len(self.dim_list) + def get_subset(self, dims: tuple = None) -> "DimensionSet": """Selects :py:class:`Dimension` objects from the object attribute dim_list, according to the dims passed, which can be either letters or names. diff --git a/sodym/mfa_definition.py b/sodym/mfa_definition.py index 69cab4c..f8801bd 100644 --- a/sodym/mfa_definition.py +++ b/sodym/mfa_definition.py @@ -82,7 +82,6 @@ class MFADefinition(PydanticBaseModel): flows: List[FlowDefinition] stocks: List[StockDefinition] parameters: List[ParameterDefinition] - scalar_parameters: Optional[list] = [] @model_validator(mode="after") def check_dimension_letters(self): diff --git a/sodym/mfa_system.py b/sodym/mfa_system.py index f4e307c..db33ab1 100644 --- a/sodym/mfa_system.py +++ b/sodym/mfa_system.py @@ -11,7 +11,7 @@ from .process_helper import make_processes from .stock_helper import make_empty_stocks from .flow_helper import make_empty_flows -from .data_reader import DataReader, CompoundDataReader, CSVDimensionReader, CSVParameterReader, CSVScalarDataReader, ExcelDimensionReader, ExcelParameterReader, ExcelScalarDataReader +from .data_reader import DataReader, CompoundDataReader, CSVDimensionReader, CSVParameterReader, ExcelDimensionReader, ExcelParameterReader class MFASystem(PydanticBaseModel): @@ -45,7 +45,6 @@ class MFASystem(PydanticBaseModel): dims: DimensionSet parameters: Dict[str, Parameter] - scalar_parameters: Optional[dict] = {} processes: Dict[str, Process] flows: Dict[str, Flow] stocks: Optional[Dict[str, Stock]] = {} @@ -56,7 +55,6 @@ def from_data_reader(cls, definition: MFADefinition, data_reader: DataReader): Initialises stocks and flows with all zero values.""" dims = data_reader.read_dimensions(definition.dimensions) parameters = data_reader.read_parameters(definition.parameters, dims=dims) - scalar_parameters = data_reader.read_scalar_data(definition.scalar_parameters) processes = make_processes(definition.processes) flows = make_empty_flows(processes=processes, flow_definitions=definition.flows, dims=dims) stocks = make_empty_stocks( @@ -65,7 +63,6 @@ def from_data_reader(cls, definition: MFADefinition, data_reader: DataReader): return cls( dims=dims, parameters=parameters, - scalar_parameters=scalar_parameters, processes=processes, flows=flows, stocks=stocks, @@ -77,20 +74,17 @@ def from_csv( definition: MFADefinition, dimension_files: dict, parameter_files: dict, - scalar_file: str = None, ): """Define and set up the MFA system and load all required data from CSV files. Initialises stocks and flows with all zero values. See :py:class:`sodym.data_reader.CSVDimensionReader`, :py:class:`sodym.data_reader.CSVParameterReader`, and - :py:class:`sodym.data_reader.CSVScalarDataReader` for more information on the expected data format. :param definition: The MFA definition object :param dimension_files: A dictionary mapping dimension names to CSV files :param parameter_files: A dictionary mapping parameter names to CSV files - :param scalar_file: The path to the CSV file containing scalar data """ dimension_reader = CSVDimensionReader( @@ -99,13 +93,9 @@ def from_csv( parameter_reader = CSVParameterReader( parameter_files=parameter_files, ) - scalar_data_reader = CSVScalarDataReader( - scalar_file=scalar_file, - ) data_reader = CompoundDataReader( dimension_reader=dimension_reader, parameter_reader=parameter_reader, - scalar_data_reader=scalar_data_reader, ) return cls.from_data_reader(definition, data_reader) @@ -115,10 +105,8 @@ def from_excel( definition: MFADefinition, dimension_files: dict, parameter_files: dict, - scalar_file: str = None, dimension_sheets: dict = None, parameter_sheets: dict = None, - scalar_sheet: str = None, ): """Define and set up the MFA system and load all required data from Excel files. Initialises stocks and flows with all zero values. @@ -126,16 +114,13 @@ def from_excel( See :py:class:`sodym.data_reader.ExcelDimensionReader`, :py:class:`sodym.data_reader.ExcelParameterReader`, and - :py:class:`sodym.data_reader.ExcelScalarDataReader` for more information on the expected data format. :param definition: The MFA definition object :param dimension_files: A dictionary mapping dimension names to Excel files :param parameter_files: A dictionary mapping parameter names to Excel files - :param scalar_file: The path to the Excel file containing scalar data :param dimension_sheets: A dictionary mapping dimension names to sheet names in the Excel files :param parameter_sheets: A dictionary mapping parameter names to sheet names in the Excel files - :param scalar_sheet: The name of the sheet in the Excel file containing scalar data """ dimension_reader = ExcelDimensionReader( dimension_files=dimension_files, @@ -145,14 +130,9 @@ def from_excel( parameter_files=parameter_files, parameter_sheets=parameter_sheets, ) - scalar_data_reader = ExcelScalarDataReader( - scalar_file=scalar_file, - scalar_sheet=scalar_sheet - ) data_reader = CompoundDataReader( dimension_reader=dimension_reader, parameter_reader=parameter_reader, - scalar_data_reader=scalar_data_reader, ) return cls.from_data_reader(definition, data_reader) diff --git a/sodym/named_dim_arrays.py b/sodym/named_dim_arrays.py index 5d217d4..e42b938 100644 --- a/sodym/named_dim_arrays.py +++ b/sodym/named_dim_arrays.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd from pydantic import BaseModel as PydanticBaseModel, ConfigDict, model_validator -from typing import Optional +from typing import Optional, Union from .dimensions import DimensionSet, Dimension from .df_to_nda import DataFrameToNDADataConverter @@ -55,18 +55,27 @@ class NamedDimArray(PydanticBaseModel): model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=()) dims: DimensionSet - values: Optional[np.ndarray] = None + values: Optional[Union[np.ndarray, np.generic]] = None name: Optional[str] = "unnamed" @model_validator(mode="after") - def fill_values(self): + def validate_values(self): if self.values is None: self.values = np.zeros(self.dims.shape()) - elif self.values.shape != self.dims.shape(): + return self + + def check_value_format(self): + if not isinstance(self.values, (np.ndarray, np.generic)): + raise ValueError("Values must be a numpy array or numpy generic.") + if self.dims.ndim > 0 and not isinstance(self.values, np.ndarray): + raise ValueError("Values must be a numpy array, except for 0-dimensional arrays.") + elif self.dims.ndim == 0 and isinstance(self.values, np.generic): + self.values = np.array(self.values) + + if self.values.shape != self.dims.shape(): raise ValueError( "Values passed to {self.__cls__.__name__} must have the same shape as the DimensionSet." ) - return self @classmethod def from_dims_superset(cls, dims_superset: DimensionSet, dim_letters: tuple = None, **kwargs): @@ -95,9 +104,8 @@ def shape(self): return self.dims.shape() def set_values(self, values: np.ndarray): - assert isinstance(values, np.ndarray), "Values must be a numpy array." - assert values.shape == self.shape, "Values must have the same shape as the DimensionSet." self.values = values + self.check_value_format() def sum_values(self): return np.sum(self.values) diff --git a/tests/test_reader.py b/tests/test_reader.py index 99a501b..5559fbe 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from sodym.data_reader import CSVDimensionReader, ExcelDimensionReader, CSVParameterReader, ExcelParameterReader, CSVScalarDataReader, ExcelScalarDataReader, YamlScalarDataReader +from sodym.data_reader import CSVDimensionReader, ExcelDimensionReader, CSVParameterReader, ExcelParameterReader from sodym.mfa_definition import DimensionDefinition, ParameterDefinition, MFADefinition from sodym.mfa_system import MFASystem @@ -102,114 +102,29 @@ def test_wrong_parameter_reader(): return -def test_scalar_reader(): - - test_prms = { - "v1": ["a"], - "v2": ["a", "b", "c"], - } - prm_values = { - "v1": [0.5], - "v2": [0.5, 0.6, 0.7], - } - base_name = "tests/tests_data/scalars" - - for test_name, prm_names in test_prms.items(): - - readers = [ - YamlScalarDataReader(scalar_file = f"{base_name}_{test_name}.yml"), - CSVScalarDataReader(scalar_file = f"{base_name}_{test_name}.csv"), - ExcelScalarDataReader(scalar_file = f"{base_name}.xlsx", scalar_sheet = test_name) - ] - - for reader in readers: - data = reader.read_scalar_data(prm_names) - for i_prm, prm in enumerate(prm_names): - assert data[prm] == prm_values[test_name][i_prm] - - def test_build_mfa_system(): - definition_ws = MFADefinition( - dimensions=dimension_definitions, - processes=[], - stocks=[], - flows=[], - parameters=parameter_definitions, - scalar_parameters=["a", "b", "c"], - ) - definition_ns = MFADefinition( + definition = MFADefinition( dimensions=dimension_definitions, processes=[], stocks=[], flows=[], parameters=parameter_definitions, - scalar_parameters=None, ) class MinimalMFASystem(MFASystem): def compute(self): pass mfa = MinimalMFASystem.from_csv( - definition_ws, - csv_dimension_files, - csv_parameter_files, - scalar_file = "tests/tests_data/scalars_v2.csv", - ) - assert mfa.scalar_parameters["a"] == 0.5 - mfa = MinimalMFASystem.from_csv( - definition_ns, + definition, csv_dimension_files, csv_parameter_files, ) mfa = MinimalMFASystem.from_excel( - definition_ws, - excel_dimension_files, - excel_parameter_files, - dimension_sheets=dimension_sheet_names, - parameter_sheets=parameter_sheet_names, - scalar_file = "tests/tests_data/scalars.xlsx", - scalar_sheet="v2", - ) - assert mfa.scalar_parameters["b"] == 0.6 - mfa = MinimalMFASystem.from_excel( - definition_ns, + definition, excel_dimension_files, excel_parameter_files, dimension_sheets=dimension_sheet_names, parameter_sheets=parameter_sheet_names, ) - - with pytest.raises(ValueError): - # no scalars in definition, scalar file specified - mfa = MinimalMFASystem.from_csv( - definition_ns, - csv_dimension_files, - csv_parameter_files, - scalar_file = "tests/tests_data/scalars_v2.csv", - ) - # scalars in definition, no scalar file specified - mfa = MinimalMFASystem.from_csv( - definition_ws, - csv_dimension_files, - csv_parameter_files, - ) - # same for excel - mfa = MinimalMFASystem.from_excel( - definition_ns, - excel_dimension_files, - excel_parameter_files, - dimension_sheets=dimension_sheet_names, - parameter_sheets=parameter_sheet_names, - scalar_file = "tests/tests_data/scalars.xlsx", - scalar_sheet="v2", - ) - mfa = MinimalMFASystem.from_excel( - definition_ws, - excel_dimension_files, - excel_parameter_files, - dimension_sheets=dimension_sheet_names, - parameter_sheets=parameter_sheet_names, - ) -