diff --git a/.gitignore b/.gitignore index 6cabf4c..1dda6bb 100644 --- a/.gitignore +++ b/.gitignore @@ -169,9 +169,9 @@ cython_debug/ # Ruff .ruff_cache -# static artifacts -artifacts - # pytest coverage pytest.xml -pytest-coverage.txt \ No newline at end of file +pytest-coverage.txt + +# artifacts +artifacts \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 363aa31..43caab4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -51,14 +51,23 @@ ] }, { - "name": "bds export-entities-to-json", + "name": "bds export-to-json", "type": "debugpy", "request": "launch", "cwd": "${workspaceFolder}", - "program": "${workspaceFolder}/.venv/bin/bam_data_store", + "program": "${workspaceFolder}/.venv/bin/bam_masterdata", "justMyCode": false, "args": ["export-entities-to-json"] }, + { + "name": "bds export-to-excel", + "type": "debugpy", + "request": "launch", + "cwd": "${workspaceFolder}", + "program": "${workspaceFolder}/.venv/bin/bam_masterdata", + "justMyCode": false, + "args": ["export-entities-to-excel"] + }, ] }, } diff --git a/bam_data_store/cli/cli.py b/bam_data_store/cli/cli.py deleted file mode 100644 index 47cc9c2..0000000 --- a/bam_data_store/cli/cli.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -import click - -from bam_data_store.cli.entities_to_json import entities_to_json -from bam_data_store.logger import logger -from bam_data_store.utils import delete_and_create_dir, listdir_py_modules - - -@click.group(help='Entry point to run `bam_data_store` CLI commands.') -def cli(): - pass - - -@cli.command(help='Export entities to JSON files to the `./artifacts/` folder.') -def export_entities_to_json(): - datamodel_dir = os.path.join('.', 'bam_data_store', 'datamodel') - export_dir = os.path.join('.', 'artifacts') - - # Delete and create the export directory - delete_and_create_dir(directory_path=export_dir, logger=logger) - - # Get the Python modules to process the datamodel - py_modules = listdir_py_modules(directory_path=datamodel_dir, logger=logger) - - # Process each module - for module_path in py_modules: - entities_to_json(module_path=module_path, export_dir=export_dir) - - click.echo(f'All entity artifacts have been generated and saved to {export_dir}') - - -if __name__ == '__main__': - cli() diff --git a/bam_data_store/cli/entities_to_json.py b/bam_data_store/cli/entities_to_json.py deleted file mode 100644 index 7bf3cd5..0000000 --- a/bam_data_store/cli/entities_to_json.py +++ /dev/null @@ -1,52 +0,0 @@ -import importlib.util -import inspect -import os - -import click - - -def import_module(module_path: str): - """ - Dynamically imports a module from the given file path. - - Args: - module_path (str): Path to the Python module file. - - Returns: - module: Imported module object. - """ - module_name = os.path.splitext(os.path.basename(module_path))[0] - spec = importlib.util.spec_from_file_location(module_name, module_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return module - - -def entities_to_json(module_path: str, export_dir: str) -> None: - """ - Export entities to JSON files. The Python modules are imported using the function `import_module`, - and their contents are inspected (using `inspect`) to find the classes in the datamodel containing - `defs` and with a `to_json` method defined. - - Args: - module_path (str): Path to the Python module file. - export_dir (str): Path to the directory where the JSON files will be saved. - """ - module = import_module(module_path=module_path) - for name, obj in inspect.getmembers(module, inspect.isclass): - # Ensure the class has the `to_json` method - if not hasattr(obj, 'defs') or not callable(getattr(obj, 'to_json')): - continue - - try: - # Instantiate the class and call the method - json_data = obj().to_json(indent=2) - - # Write JSON data to file - output_file = os.path.join(export_dir, f'{obj.defs.code}.json') - with open(output_file, 'w', encoding='utf-8') as f: - f.write(json_data) - - click.echo(f'Saved JSON for class {name} to {output_file}') - except Exception as err: - click.echo(f'Failed to process class {name} in {module_path}: {err}') diff --git a/bam_data_store/utils/__init__.py b/bam_data_store/utils/__init__.py deleted file mode 100644 index f196789..0000000 --- a/bam_data_store/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .utils import delete_and_create_dir, listdir_py_modules diff --git a/bam_data_store/utils/utils.py b/bam_data_store/utils/utils.py deleted file mode 100644 index a25ed25..0000000 --- a/bam_data_store/utils/utils.py +++ /dev/null @@ -1,53 +0,0 @@ -import glob -import os -import shutil -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from structlog._config import BoundLoggerLazyProxy - - -def delete_and_create_dir(directory_path: Optional[str], logger: 'BoundLoggerLazyProxy') -> None: - """ - Deletes the directory at `directory_path` and creates a new one in the same path. - - Args: - directory_path (Optional[str]): The directory path to delete and create the folder. - logger (BoundLoggerLazyProxy): The logger to log messages. - """ - if not directory_path: - logger.warning( - 'The `directory_path` is empty. Please, provide a proper input to the function.' - ) - return None - - if os.path.exists(directory_path): - shutil.rmtree(directory_path) - os.makedirs(directory_path) - - -def listdir_py_modules(directory_path: Optional[str], logger: 'BoundLoggerLazyProxy') -> list[str]: - """ - Recursively goes through the `directory_path` and returns a list of all .py files that do not start with '_'. - - Args: - directory_path (Optional[str]): The directory path to search through. - logger (BoundLoggerLazyProxy): The logger to log messages. - - Returns: - list[str]: A list of all .py files that do not start with '_' - """ - if not directory_path: - logger.warning( - 'The `directory_path` is empty. Please, provide a proper input to the function.' - ) - return [] - - # Use glob to find all .py files recursively - files = glob.glob(os.path.join(directory_path, '**', '*.py'), recursive=True) - if not files: - logger.info('No Python files found in the directory.') - return [] - - # Filter out files that start with '_' - return [f for f in files if not os.path.basename(f).startswith('_')] diff --git a/bam_data_store/cli/__init__.py b/bam_masterdata/cli/__init__.py similarity index 100% rename from bam_data_store/cli/__init__.py rename to bam_masterdata/cli/__init__.py diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py new file mode 100644 index 0000000..fbcc0f2 --- /dev/null +++ b/bam_masterdata/cli/cli.py @@ -0,0 +1,80 @@ +import os + +import click +from openpyxl import Workbook + +from bam_masterdata.cli.entities_to_excel import entities_to_excel +from bam_masterdata.cli.entities_to_json import entities_to_json +from bam_masterdata.logger import logger +from bam_masterdata.utils import ( + delete_and_create_dir, + import_module, + listdir_py_modules, +) + + +@click.group(help='Entry point to run `bam_masterdata` CLI commands.') +def cli(): + pass + + +@cli.command(help='Export entities to JSON files to the `./artifacts/` folder.') +def export_entities_to_json(): + # Get the directories from the Python modules and the export directory for the static artifacts + datamodel_dir = os.path.join('.', 'bam_masterdata', 'datamodel') + export_dir = os.path.join('.', 'artifacts') + + # Delete and create the export directory + delete_and_create_dir(directory_path=export_dir, logger=logger) + + # Get the Python modules to process the datamodel + py_modules = listdir_py_modules(directory_path=datamodel_dir, logger=logger) + + # Process each module using the `to_json` method of each entity + for module_path in py_modules: + entities_to_json(module_path=module_path, export_dir=export_dir, logger=logger) + + click.echo(f'All entity artifacts have been generated and saved to {export_dir}') + + +@cli.command( + help=""" + Export entities to an Excel file in the path `./artifacts/masterdata.xlsx`. + """, +) +def export_entities_to_excel(): + # Get the Python modules to process the datamodel + datamodel_dir = os.path.join('.', 'bam_masterdata', 'datamodel') + py_modules = listdir_py_modules(directory_path=datamodel_dir, logger=logger) + + # Load the definitions module classes + definitions_module = import_module( + module_path='./bam_masterdata/metadata/definitions.py' + ) + + # Process the modules and save the entities to the openBIS masterdata Excel file + masterdata_file = os.path.join('.', 'artifacts', 'masterdata.xlsx') + wb = Workbook() + for i, module_path in enumerate(py_modules): + if i == 0: + ws = wb.active + else: + ws = wb.create_sheet() + ws.title = ( + os.path.basename(module_path) + .capitalize() + .replace('.py', '') + .replace('_', ' ') + ) + entities_to_excel( + worksheet=ws, + module_path=module_path, + definitions_module=definitions_module, + ) + wb.save(masterdata_file) + + click.echo(f'All masterdata have been generated and saved to {masterdata_file}') + + +if __name__ == '__main__': + cli() diff --git a/bam_masterdata/cli/entities_to_excel.py b/bam_masterdata/cli/entities_to_excel.py new file mode 100644 index 0000000..a344052 --- /dev/null +++ b/bam_masterdata/cli/entities_to_excel.py @@ -0,0 +1,75 @@ +import inspect +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from openpyxl.worksheet.worksheet import Worksheet + +from bam_masterdata.utils import import_module + + +def entities_to_excel( + worksheet: 'Worksheet', + module_path: str, + definitions_module: Any, +) -> None: + """ + Export entities to the Excel file. The Python modules are imported using the function `import_module`, + and their contents are inspected (using `inspect`) to find the classes in the datamodel containing + `defs` and with a `to_json` method defined. Each row is then appended to the `worksheet`. + + Args: + worksheet (Worksheet): The worksheet to append the entities. + module_path (str): Path to the Python module file. + definitions_module (Any): The module containing the definitions of the entities. This is used + to match the header definitions of the entities. + """ + def_members = inspect.getmembers(definitions_module, inspect.isclass) + module = import_module(module_path=module_path) + for _, obj in inspect.getmembers(module, inspect.isclass): + # Ensure the class has the `to_json` method + if not hasattr(obj, 'defs') or not callable(getattr(obj, 'to_json')): + continue + + obj_instance = obj() + + # Entity title + obj_definitions = obj_instance.defs + worksheet.append([obj_definitions.excel_name]) + + # Entity header definitions and values + for def_name, def_cls in def_members: + if def_name == obj_definitions.name: + break + worksheet.append(obj_definitions.excel_headers) + header_values = [ + getattr(obj_definitions, f_set) for f_set in def_cls.model_fields.keys() + ] + worksheet.append(header_values) + + # Properties assignment for ObjectType + if obj_instance.entity_type == 'ObjectType': + if not obj_instance.properties: + continue + worksheet.append(obj_instance.properties[0].excel_headers) + for prop in obj_instance.properties: + row = [] + for f_set in prop.model_fields.keys(): + if f_set == 'data_type': + val = prop.data_type.value + else: + val = getattr(prop, f_set) + row.append(val) + worksheet.append(row) + # Terms assignment for VocabularyType + elif obj_instance.entity_type == 'VocabularyType': + if not obj_instance.terms: + continue + worksheet.append(obj_instance.terms[0].excel_headers) + for term in obj_instance.terms: + worksheet.append( + getattr(term, f_set) for f_set in term.model_fields.keys() + ) + + # ? do the PropertyTypeDef need to be exported to Excel? + + worksheet.append(['']) # empty row after entity definitions diff --git a/bam_masterdata/cli/entities_to_json.py b/bam_masterdata/cli/entities_to_json.py new file mode 100644 index 0000000..55b6402 --- /dev/null +++ b/bam_masterdata/cli/entities_to_json.py @@ -0,0 +1,64 @@ +import inspect +import json +import os +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from structlog._config import BoundLoggerLazyProxy + +import click + +from bam_masterdata.utils import delete_and_create_dir, import_module + + +def entities_to_json( + module_path: str, export_dir: str, logger: 'BoundLoggerLazyProxy' +) -> None: + """ + Export entities to JSON files. The Python modules are imported using the function `import_module`, + and their contents are inspected (using `inspect`) to find the classes in the datamodel containing + `defs` and with a `to_json` method defined. + + Args: + module_path (str): Path to the Python module file. + export_dir (str): Path to the directory where the JSON files will be saved. + logger (BoundLoggerLazyProxy): The logger to log messages. + """ + module = import_module(module_path=module_path) + # export to specific subfolders for each type of entity (each module) + module_export_dir = os.path.join( + export_dir, os.path.basename(module_path).replace('.py', '') + ) + delete_and_create_dir(directory_path=module_export_dir, logger=logger) + for name, obj in inspect.getmembers(module, inspect.isclass): + # Ensure the class has the `to_json` method + if not hasattr(obj, 'defs') or not callable(getattr(obj, 'to_json')): + continue + + try: + # Instantiate the class and call the method + json_data = obj().to_json(indent=2) + + # Write JSON data to file + output_file = os.path.join(module_export_dir, f'{obj.defs.code}.json') + with open(output_file, 'w', encoding='utf-8') as f: + f.write(json_data) + + click.echo(f'Saved JSON for class {name} to {output_file}') + except Exception as err: + click.echo(f'Failed to process class {name} in {module_path}: {err}') + + # Special case of `PropertyTypeDef` in `property_types.py` + if 'property_types.py' in module_path: + for name, obj in inspect.getmembers(module): + if name.startswith('_') or name == 'PropertyTypeDef': + continue + try: + json_data = json.dumps(obj.model_dump(), indent=2) + output_file = os.path.join(module_export_dir, f'{obj.code}.json') + with open(output_file, 'w', encoding='utf-8') as f: + f.write(json_data) + + click.echo(f'Saved JSON for class {name} to {output_file}') + except Exception as err: + click.echo(f'Failed to process class {name} in {module_path}: {err}') diff --git a/bam_masterdata/datamodel/collection_types.py b/bam_masterdata/datamodel/collection_types.py new file mode 100644 index 0000000..6f0b110 --- /dev/null +++ b/bam_masterdata/datamodel/collection_types.py @@ -0,0 +1,54 @@ +from bam_masterdata.metadata.definitions import ( + CollectionTypeDef, + PropertyTypeAssignment, +) +from bam_masterdata.metadata.entities import CollectionType + + +class DefaultExperiment(CollectionType): + defs = CollectionTypeDef( + version=1, + code='DEFAULT_EXPERIMENT', + description=""" + Default Experiment//Standard-Experiment + """, + ) + + name = PropertyTypeAssignment( + version=1, + code='$NAME', + data_type='VARCHAR', + property_label='Name', + description=""" + Name + """, + mandatory=True, + show_in_edit_views=True, + section='General information', + ) + + grant = PropertyTypeAssignment( + version=1, + code='DEFAULT_EXPERIMENT.GRANT', + data_type='VARCHAR', + property_label='Grant', + description=""" + Grant + """, + mandatory=False, + show_in_edit_views=True, + section='General information', + ) + + experimental_goals = PropertyTypeAssignment( + version=1, + code='DEFAULT_EXPERIMENT.EXPERIMENTAL_GOALS', + data_type='MULTILINE_VARCHAR', + property_label='Goals', + description=""" + Goals of the experiment + """, + mandatory=False, + show_in_edit_views=True, + section='Experimental details', + ) diff --git a/bam_masterdata/datamodel/dataset_types.py b/bam_masterdata/datamodel/dataset_types.py new file mode 100644 index 0000000..e69de29 diff --git a/bam_masterdata/datamodel/property_types.py b/bam_masterdata/datamodel/property_types.py new file mode 100644 index 0000000..a1949f7 --- /dev/null +++ b/bam_masterdata/datamodel/property_types.py @@ -0,0 +1,11 @@ +from bam_masterdata.metadata.definitions import PropertyTypeDef + +Name = PropertyTypeDef( + version=1, + code='$NAME', + description=""" + Name + """, + property_label='Name', + data_type='VARCHAR', +) diff --git a/bam_masterdata/metadata/definitions.py b/bam_masterdata/metadata/definitions.py index a092527..713652f 100644 --- a/bam_masterdata/metadata/definitions.py +++ b/bam_masterdata/metadata/definitions.py @@ -100,6 +100,31 @@ def validate_code(cls, value: str) -> str: def strip_description(cls, value: str) -> str: return value.strip() + @property + def name(self) -> str: + return self.__class__.__name__ + + @property + def excel_name(self) -> str: + """ + Returns the name of the entity in a format suitable for the openBIS Excel file. + """ + name_map = { + 'CollectionTypeDef': 'EXPERIMENT_TYPE', + 'DataSetTypeDef': 'DATASET_TYPE', + 'ObjectTypeDef': 'SAMPLE_TYPE', + 'PropertyTypeDef': 'PROPERTY_TYPE', + 'VocabularyTypeDef': 'VOCABULARY_TYPE', + } + return name_map.get(self.name) + + @property + def excel_headers(self) -> list[str]: + """ + Returns the headers for the entity in a format suitable for the openBIS Excel file. + """ + return [k.capitalize().replace('_', ' ') for k in self.model_fields.keys()] + class BaseObjectTypeDef(EntityDef): """ diff --git a/bam_masterdata/metadata/entities.py b/bam_masterdata/metadata/entities.py index cecc9a4..bd26fbd 100644 --- a/bam_masterdata/metadata/entities.py +++ b/bam_masterdata/metadata/entities.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator from bam_masterdata.metadata.definitions import ( + CollectionTypeDef, ObjectTypeDef, PropertyTypeAssignment, VocabularyTerm, @@ -54,7 +55,7 @@ def to_dict(self) -> dict: class ObjectType(BaseEntity): """ Base class used to define object types. All object types must inherit from this class. The - object types are defined in the module `bam_masterdata/object_types.py`. + object types are defined in the module `bam_data_store/object_types.py`. The `ObjectType` class contains a list of all `properties` defined for a `ObjectType`, for internally represent the model in other formats (e.g., JSON or Excel). @@ -89,11 +90,18 @@ def model_validator_after_init(cls, data: Any) -> Any: return data + @property + def entity_type(self) -> str: + """ + Returns the entity type of the class as a string to speed up checks. + """ + return 'ObjectType' + class VocabularyType(BaseEntity): """ Base class used to define vocabulary types. All vocabulary types must inherit from this class. The - vocabulary types are defined in the module `bam_masterdata/vocabulary_types.py`. + vocabulary types are defined in the module `bam_data_store/vocabulary_types.py`. The `VocabularyType` class contains a list of all `terms` defined for a `VocabularyType`, for internally represent the model in other formats (e.g., JSON or Excel). @@ -128,10 +136,13 @@ def model_validator_after_init(cls, data: Any) -> Any: return data - -class PropertyType(BaseEntity): - pass + @property + def entity_type(self) -> str: + """ + Returns the entity type of the class as a string to speed up checks. + """ + return 'VocabularyType' class CollectionType(ObjectType): - pass + model_config = ConfigDict(ignored_types=(CollectionTypeDef, PropertyTypeAssignment)) diff --git a/bam_masterdata/utils/__init__.py b/bam_masterdata/utils/__init__.py new file mode 100644 index 0000000..d7dec42 --- /dev/null +++ b/bam_masterdata/utils/__init__.py @@ -0,0 +1 @@ +from .utils import delete_and_create_dir, import_module, listdir_py_modules diff --git a/bam_masterdata/utils/utils.py b/bam_masterdata/utils/utils.py new file mode 100644 index 0000000..a28b983 --- /dev/null +++ b/bam_masterdata/utils/utils.py @@ -0,0 +1,80 @@ +import glob +import importlib.util +import os +import shutil +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from structlog._config import BoundLoggerLazyProxy + + +def delete_and_create_dir(directory_path: str, logger: 'BoundLoggerLazyProxy') -> None: + """ + Deletes the directory at `directory_path` and creates a new one in the same path. + + Args: + directory_path (str): The directory path to delete and create the folder. + logger (BoundLoggerLazyProxy): The logger to log messages. + """ + if not directory_path: + logger.warning( + 'The `directory_path` is empty. Please, provide a proper input to the function.' + ) + return None + + if os.path.exists(directory_path): + try: + shutil.rmtree(directory_path) # ! careful with this line + except PermissionError: + logger.error( + f'Permission denied to delete the directory at {directory_path}.' + ) + return None + os.makedirs(directory_path) + + +def listdir_py_modules( + directory_path: str, logger: 'BoundLoggerLazyProxy' +) -> list[str]: + """ + Recursively goes through the `directory_path` and returns a list of all .py files that do not start with '_'. + + Args: + directory_path (str): The directory path to search through. + logger (BoundLoggerLazyProxy): The logger to log messages. + + Returns: + list[str]: A list of all .py files that do not start with '_' + """ + if not directory_path: + logger.warning( + 'The `directory_path` is empty. Please, provide a proper input to the function.' + ) + return [] + + # Use glob to find all .py files recursively + files = glob.glob(os.path.join(directory_path, '**', '*.py'), recursive=True) + if not files: + logger.info('No Python files found in the directory.') + return [] + + # Filter out files that start with '_' + # ! sorted in order to avoid using with OS sorting differently + return sorted([f for f in files if not os.path.basename(f).startswith('_')]) + + +def import_module(module_path: str) -> Any: + """ + Dynamically imports a module from the given file path. + + Args: + module_path (str): Path to the Python module file. + + Returns: + module: Imported module object. + """ + module_name = os.path.splitext(os.path.basename(module_path))[0] + spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py new file mode 100644 index 0000000..3b2ea52 --- /dev/null +++ b/tests/utils/test_utils.py @@ -0,0 +1,96 @@ +import inspect +import os +import shutil + +import pytest + +from bam_masterdata.logger import logger +from bam_masterdata.utils import ( + delete_and_create_dir, + import_module, + listdir_py_modules, +) + + +@pytest.mark.parametrize( + 'directory_path, dir_exists', + [ + # `directory_path` is empty + ('', False), + # `directory_path` does not exist and it is created + ('tests/data/tmp/', True), + ], +) +def test_delete_and_create_dir( + cleared_log_storage: list, directory_path: str, dir_exists: bool +): + """Tests the `delete_and_delete_dir` function.""" + delete_and_create_dir(directory_path=directory_path, logger=logger) + assert dir_exists == os.path.exists(directory_path) + if dir_exists: + shutil.rmtree(directory_path) # ! careful with this line + else: + assert len(cleared_log_storage) == 1 + assert cleared_log_storage[0]['level'] == 'warning' + assert 'directory_path' in cleared_log_storage[0]['event'] + + +@pytest.mark.parametrize( + 'directory_path, listdir, log_message, log_message_level', + [ + # `directory_path` is empty + ( + '', + [], + 'The `directory_path` is empty. Please, provide a proper input to the function.', + 'warning', + ), + # No Python files found in the directory + ('./tests/data', [], 'No Python files found in the directory.', 'info'), + # Python files found in the directory + ( + './tests/utils', + [ + './tests/utils/test_utils.py', + ], + None, + None, + ), + ], +) +def test_listdir_py_modules( + cleared_log_storage: list, + directory_path: str, + listdir: list[str], + log_message: str, + log_message_level: str, +): + """Tests the `listdir_py_modules` function.""" + result = listdir_py_modules(directory_path=directory_path, logger=logger) + if not listdir: + assert cleared_log_storage[0]['event'] == log_message + assert cleared_log_storage[0]['level'] == log_message_level + # when testing locally and with Github actions the order of the files is different --> `result` is sorted, so we also sort `listdir` + assert result == sorted(listdir) + + +@pytest.mark.skip( + reason='Very annoying to test this function, as any module we can use to be tested will change a lot in the future.' +) +def test_import_module(): + """Tests the `import_module` function.""" + # testing only the possitive results + module = import_module('./bam_data_store/utils/utils.py') + assert [f[0] for f in inspect.getmembers(module, inspect.ismodule)] == [ + 'glob', + 'importlib', + 'os', + 'shutil', + 'sys', + ] + assert [f[0] for f in inspect.getmembers(module, inspect.isclass)] == [] + assert [f[0] for f in inspect.getmembers(module, inspect.isfunction)] == [ + 'delete_and_create_dir', + 'import_module', + 'listdir_py_modules', + ]