Skip to content

Commit

Permalink
remove scalar data reader, enable 0d-ndas instead
Browse files Browse the repository at this point in the history
  • Loading branch information
JakobBD committed Dec 19, 2024
1 parent 2aa9e89 commit 665a14b
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 203 deletions.
84 changes: 0 additions & 84 deletions sodym/data_reader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
from typing import List, Dict
import yaml
Expand All @@ -22,10 +21,6 @@ def read_dimensions(self, dimension_definitions: List[DimensionDefinition]) -> D
def read_dimension(self, dimension_definition: DimensionDefinition) -> Dimension:
pass

def read_scalar_data(self, parameters: List[str]) -> dict:
"""Optional addition method if additional scalar parameters are required."""
pass

@abstractmethod
def read_parameter_values(self, parameter_name: str, dims: DimensionSet) -> Parameter:
pass
Expand Down Expand Up @@ -174,97 +169,18 @@ def read_parameter_values(self, parameter_name: str, dims):
return Parameter.from_df(dims=dims, name=parameter_name, df=data)


class ScalarDataReader(ABC):
def read_scalar_data(self, parameters: List[str], **read_scalar_kwargs) -> dict:
"""Optional addition method if additional scalar parameters are required."""
raise NotImplementedError("No scalar data reader specified.")


class EmptyScalarDataReader(ScalarDataReader):

def read_scalar_data(self, parameters: List[str]):
if parameters:
raise ValueError("List of scalar parameters is not empty, but no real reader for scalar data is specified.")
return {}


class CheckedScalarDataReader(ScalarDataReader):
"""Parent class for scalar data readers that ensures that the read data and requested parameters
match.
"""

def __init__(self, scalar_file: str = None, **read_scalar_kwargs):
self.scalar_file = scalar_file
self.read_scalar_kwargs = read_scalar_kwargs

def read_scalar_data(self, parameters: List[str]):
if not parameters:
return {}
if self.scalar_file is None:
raise ValueError("No scalar data file specified.")
data = self._get_dict()
if not set(parameters) == set(data.keys()):
raise ValueError(
f"Parameter names in yaml file do not match requested parameters. Unexpected parameters: {set(data.keys()) - set(parameters)}; Missing parameters: {set(parameters) - set(data.keys())}."
)
return data

def add_pd_reader_defaults(self):
if "header" not in self.read_scalar_kwargs:
self.read_scalar_kwargs["header"] = None
if "index_col" not in self.read_scalar_kwargs:
self.read_scalar_kwargs["index_col"] = 0

def _get_dict(self) -> dict:
raise NotImplementedError(f"Method not implemented. Choose a subclass of {self.__class__.__name__}.")


class YamlScalarDataReader(CheckedScalarDataReader):

def _get_dict(self) -> dict:
with open(self.scalar_file, "r") as stream:
return yaml.safe_load(stream)


class CSVScalarDataReader(CheckedScalarDataReader):

def _get_dict(self) -> dict:
self.add_pd_reader_defaults()
df = pd.read_csv(self.scalar_file, **self.read_scalar_kwargs)
dict = df.to_dict()[df.columns[0]]
return dict


class ExcelScalarDataReader(CheckedScalarDataReader):

def __init__(self, scalar_file: str = None, scalar_sheet: str = None, **read_scalar_kwargs):
self.sheet_name = scalar_sheet
super().__init__(scalar_file, **read_scalar_kwargs)

def _get_dict(self) -> dict:
self.add_pd_reader_defaults()
df= pd.read_excel(self.scalar_file, sheet_name=self.sheet_name, **self.read_scalar_kwargs)
dict = df.to_dict()[df.columns[0]]
return dict


class CompoundDataReader(DataReader):

def __init__(
self,
dimension_reader: DimensionReader,
parameter_reader: ParameterReader,
scalar_data_reader: ScalarDataReader = EmptyScalarDataReader(),
):
self.dimension_reader = dimension_reader
self.parameter_reader = parameter_reader
self.scalar_data_reader = scalar_data_reader

def read_dimension(self, dimension_definition: DimensionDefinition) -> Dimension:
return self.dimension_reader.read_dimension(dimension_definition)

def read_parameter_values(self, parameter_name: str, dims: DimensionSet) -> Parameter:
return self.parameter_reader.read_parameter_values(parameter_name, dims)

def read_scalar_data(self, parameters: List[str]) -> dict:
return self.scalar_data_reader.read_scalar_data(parameters)
5 changes: 4 additions & 1 deletion sodym/df_to_nda.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ def _sort_df(self):

def _check_data_complete(self):
# Generate expected index tuples from NamedDimArray dimensions
expected_index_tuples = set(itertools.product(*(dim.items for dim in self.nda.dims)))
if self.nda.dims.ndim == 0:
expected_index_tuples = set()
else:
expected_index_tuples = set(itertools.product(*(dim.items for dim in self.nda.dims)))

# Generate actual index tuples from DataFrame columns
actual_index_tuples = set(self.df.drop(columns=self.format.value_column).itertuples(index=False, name=None))
Expand Down
4 changes: 4 additions & 0 deletions sodym/dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ def shape(self, keys: tuple = None):
keys = keys if keys else self.letters
return tuple(self.size(key) for key in keys)

@property
def ndim(self):
return len(self.dim_list)

def get_subset(self, dims: tuple = None) -> "DimensionSet":
"""Selects :py:class:`Dimension` objects from the object attribute dim_list,
according to the dims passed, which can be either letters or names.
Expand Down
1 change: 0 additions & 1 deletion sodym/mfa_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ class MFADefinition(PydanticBaseModel):
flows: List[FlowDefinition]
stocks: List[StockDefinition]
parameters: List[ParameterDefinition]
scalar_parameters: Optional[list] = []

@model_validator(mode="after")
def check_dimension_letters(self):
Expand Down
22 changes: 1 addition & 21 deletions sodym/mfa_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .process_helper import make_processes
from .stock_helper import make_empty_stocks
from .flow_helper import make_empty_flows
from .data_reader import DataReader, CompoundDataReader, CSVDimensionReader, CSVParameterReader, CSVScalarDataReader, ExcelDimensionReader, ExcelParameterReader, ExcelScalarDataReader
from .data_reader import DataReader, CompoundDataReader, CSVDimensionReader, CSVParameterReader, ExcelDimensionReader, ExcelParameterReader


class MFASystem(PydanticBaseModel):
Expand Down Expand Up @@ -45,7 +45,6 @@ class MFASystem(PydanticBaseModel):

dims: DimensionSet
parameters: Dict[str, Parameter]
scalar_parameters: Optional[dict] = {}
processes: Dict[str, Process]
flows: Dict[str, Flow]
stocks: Optional[Dict[str, Stock]] = {}
Expand All @@ -56,7 +55,6 @@ def from_data_reader(cls, definition: MFADefinition, data_reader: DataReader):
Initialises stocks and flows with all zero values."""
dims = data_reader.read_dimensions(definition.dimensions)
parameters = data_reader.read_parameters(definition.parameters, dims=dims)
scalar_parameters = data_reader.read_scalar_data(definition.scalar_parameters)
processes = make_processes(definition.processes)
flows = make_empty_flows(processes=processes, flow_definitions=definition.flows, dims=dims)
stocks = make_empty_stocks(
Expand All @@ -65,7 +63,6 @@ def from_data_reader(cls, definition: MFADefinition, data_reader: DataReader):
return cls(
dims=dims,
parameters=parameters,
scalar_parameters=scalar_parameters,
processes=processes,
flows=flows,
stocks=stocks,
Expand All @@ -77,20 +74,17 @@ def from_csv(
definition: MFADefinition,
dimension_files: dict,
parameter_files: dict,
scalar_file: str = None,
):
"""Define and set up the MFA system and load all required data from CSV files.
Initialises stocks and flows with all zero values.
See :py:class:`sodym.data_reader.CSVDimensionReader`,
:py:class:`sodym.data_reader.CSVParameterReader`, and
:py:class:`sodym.data_reader.CSVScalarDataReader` for more information on the expected data
format.
:param definition: The MFA definition object
:param dimension_files: A dictionary mapping dimension names to CSV files
:param parameter_files: A dictionary mapping parameter names to CSV files
:param scalar_file: The path to the CSV file containing scalar data
"""

dimension_reader = CSVDimensionReader(
Expand All @@ -99,13 +93,9 @@ def from_csv(
parameter_reader = CSVParameterReader(
parameter_files=parameter_files,
)
scalar_data_reader = CSVScalarDataReader(
scalar_file=scalar_file,
)
data_reader = CompoundDataReader(
dimension_reader=dimension_reader,
parameter_reader=parameter_reader,
scalar_data_reader=scalar_data_reader,
)
return cls.from_data_reader(definition, data_reader)

Expand All @@ -115,27 +105,22 @@ def from_excel(
definition: MFADefinition,
dimension_files: dict,
parameter_files: dict,
scalar_file: str = None,
dimension_sheets: dict = None,
parameter_sheets: dict = None,
scalar_sheet: str = None,
):
"""Define and set up the MFA system and load all required data from Excel files.
Initialises stocks and flows with all zero values.
Builds a CompoundDataReader from Excel readers, and calls the from_data_reader class method.
See :py:class:`sodym.data_reader.ExcelDimensionReader`,
:py:class:`sodym.data_reader.ExcelParameterReader`, and
:py:class:`sodym.data_reader.ExcelScalarDataReader` for more information on the expected
data format.
:param definition: The MFA definition object
:param dimension_files: A dictionary mapping dimension names to Excel files
:param parameter_files: A dictionary mapping parameter names to Excel files
:param scalar_file: The path to the Excel file containing scalar data
:param dimension_sheets: A dictionary mapping dimension names to sheet names in the Excel files
:param parameter_sheets: A dictionary mapping parameter names to sheet names in the Excel files
:param scalar_sheet: The name of the sheet in the Excel file containing scalar data
"""
dimension_reader = ExcelDimensionReader(
dimension_files=dimension_files,
Expand All @@ -145,14 +130,9 @@ def from_excel(
parameter_files=parameter_files,
parameter_sheets=parameter_sheets,
)
scalar_data_reader = ExcelScalarDataReader(
scalar_file=scalar_file,
scalar_sheet=scalar_sheet
)
data_reader = CompoundDataReader(
dimension_reader=dimension_reader,
parameter_reader=parameter_reader,
scalar_data_reader=scalar_data_reader,
)
return cls.from_data_reader(definition, data_reader)

Expand Down
22 changes: 15 additions & 7 deletions sodym/named_dim_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
from pydantic import BaseModel as PydanticBaseModel, ConfigDict, model_validator
from typing import Optional
from typing import Optional, Union

from .dimensions import DimensionSet, Dimension
from .df_to_nda import DataFrameToNDADataConverter
Expand Down Expand Up @@ -55,18 +55,27 @@ class NamedDimArray(PydanticBaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())

dims: DimensionSet
values: Optional[np.ndarray] = None
values: Optional[Union[np.ndarray, np.generic]] = None
name: Optional[str] = "unnamed"

@model_validator(mode="after")
def fill_values(self):
def validate_values(self):
if self.values is None:
self.values = np.zeros(self.dims.shape())
elif self.values.shape != self.dims.shape():
return self

def check_value_format(self):
if not isinstance(self.values, (np.ndarray, np.generic)):
raise ValueError("Values must be a numpy array or numpy generic.")
if self.dims.ndim > 0 and not isinstance(self.values, np.ndarray):
raise ValueError("Values must be a numpy array, except for 0-dimensional arrays.")
elif self.dims.ndim == 0 and isinstance(self.values, np.generic):
self.values = np.array(self.values)

if self.values.shape != self.dims.shape():
raise ValueError(
"Values passed to {self.__cls__.__name__} must have the same shape as the DimensionSet."
)
return self

@classmethod
def from_dims_superset(cls, dims_superset: DimensionSet, dim_letters: tuple = None, **kwargs):
Expand Down Expand Up @@ -95,9 +104,8 @@ def shape(self):
return self.dims.shape()

def set_values(self, values: np.ndarray):
assert isinstance(values, np.ndarray), "Values must be a numpy array."
assert values.shape == self.shape, "Values must have the same shape as the DimensionSet."
self.values = values
self.check_value_format()

def sum_values(self):
return np.sum(self.values)
Expand Down
Loading

0 comments on commit 665a14b

Please sign in to comment.