From 435d03b0ddc0ad05bfbf78c91b2316965584e7ff Mon Sep 17 00:00:00 2001 From: Diego Prada Date: Sat, 21 Oct 2023 20:40:03 -0600 Subject: [PATCH] in process --- .../digestion/argument/compression.py | 12 + .../digestion/argument/compression_opts.py | 12 + .../digestion/argument/float_precision.py | 12 + .../digestion/argument/int_precision.py | 12 + molsysmt/form/file_msmh5/__init__.py | 2 + .../to_molsysmt_MSMH5FileHandler.py | 8 + .../form/file_msmh5/to_molsysmt_Topology.py | 9 +- .../molsysmt_MSMH5FileHandler/__init__.py | 31 ++ .../form/molsysmt_MSMH5FileHandler/add.py | 8 + .../append_structures.py | 8 + .../molsysmt_MSMH5FileHandler/attributes.py | 39 +++ .../form/molsysmt_MSMH5FileHandler/copy.py | 16 + .../form/molsysmt_MSMH5FileHandler/extract.py | 27 ++ .../form/molsysmt_MSMH5FileHandler/get.py | 242 ++++++++++++++ .../has_attribute.py | 11 + .../form/molsysmt_MSMH5FileHandler/is_form.py | 8 + .../molsysmt_MSMH5FileHandler/iterators.py | 25 ++ .../form/molsysmt_MSMH5FileHandler/merge.py | 8 + .../form/molsysmt_MSMH5FileHandler/set.py | 1 + .../to_molsysmt_MolSys.py | 7 + .../to_molsysmt_Structures.py | 6 + .../to_molsysmt_Topology.py | 107 ++++++ .../to_nglview_NGLWidget.py | 7 + .../form/molsysmt_Topology/to_file_msmh5.py | 252 ++++++++++---- molsysmt/native/msmh5_file_handler.py | 14 +- molsysmt/native/topology.py | 18 +- molsysmt/native/topology2.py | 309 ++++++++++++++++++ 27 files changed, 1128 insertions(+), 83 deletions(-) create mode 100644 molsysmt/_private/digestion/argument/compression.py create mode 100644 molsysmt/_private/digestion/argument/compression_opts.py create mode 100644 molsysmt/_private/digestion/argument/float_precision.py create mode 100644 molsysmt/_private/digestion/argument/int_precision.py create mode 100644 molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/add.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/copy.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/extract.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/get.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/merge.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/set.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py create mode 100644 molsysmt/native/topology2.py diff --git a/molsysmt/_private/digestion/argument/compression.py b/molsysmt/_private/digestion/argument/compression.py new file mode 100644 index 000000000..1736bdc10 --- /dev/null +++ b/molsysmt/_private/digestion/argument/compression.py @@ -0,0 +1,12 @@ +from molsysmt._private.exceptions import ArgumentError + +def digest_compression(compression, caller=None): + + if isinstance(compression, str): + + if caller.endswith('to_file_msmh5'): + if compression in ['gzip', 'lzf', 'szip']: + return compression + + raise ArgumentError('compression', value=compression, caller=caller, message=None) + diff --git a/molsysmt/_private/digestion/argument/compression_opts.py b/molsysmt/_private/digestion/argument/compression_opts.py new file mode 100644 index 000000000..a3497d4da --- /dev/null +++ b/molsysmt/_private/digestion/argument/compression_opts.py @@ -0,0 +1,12 @@ +from molsysmt._private.exceptions import ArgumentError + +def digest_compression_opts(compression_opts, caller=None): + + if isinstance(compression_opts, int): + + if caller.endswith('to_file_msmh5'): + if 0<=compression_opts<=9: + return compression_opts + + raise ArgumentError('compression_opts', value=compression_opts, caller=caller, message=None) + diff --git a/molsysmt/_private/digestion/argument/float_precision.py b/molsysmt/_private/digestion/argument/float_precision.py new file mode 100644 index 000000000..311644d08 --- /dev/null +++ b/molsysmt/_private/digestion/argument/float_precision.py @@ -0,0 +1,12 @@ +from molsysmt._private.exceptions import ArgumentError + +def digest_float_precision(float_precision, caller=None): + + if isinstance(float_precision, str): + + if caller.endswith('to_file_msmh5'): + if float_precision in ['single', 'double']: + return float_precision + + raise ArgumentError('float_precision', value=float_precision, caller=caller, message=None) + diff --git a/molsysmt/_private/digestion/argument/int_precision.py b/molsysmt/_private/digestion/argument/int_precision.py new file mode 100644 index 000000000..d0235ee54 --- /dev/null +++ b/molsysmt/_private/digestion/argument/int_precision.py @@ -0,0 +1,12 @@ +from molsysmt._private.exceptions import ArgumentError + +def digest_int_precision(int_precision, caller=None): + + if isinstance(int_precision, str): + + if caller.endswith('to_file_msmh5'): + if int_precision in ['single', 'double']: + return int_precision + + raise ArgumentError('int_precision', value=int_precision, caller=caller, message=None) + diff --git a/molsysmt/form/file_msmh5/__init__.py b/molsysmt/form/file_msmh5/__init__.py index 75ef6e729..80ab6eeb7 100644 --- a/molsysmt/form/file_msmh5/__init__.py +++ b/molsysmt/form/file_msmh5/__init__.py @@ -16,6 +16,7 @@ from .set import * from .iterators import StructuresIterator, TopologyIterator +from .to_molsysmt_MSMH5FileHandler import to_molsysmt_MSMH5FileHandler from .to_molsysmt_MolSys import to_molsysmt_MolSys from .to_molsysmt_Topology import to_molsysmt_Topology from .to_molsysmt_Structures import to_molsysmt_Structures @@ -23,6 +24,7 @@ _convert_to={ 'file:msmh5': extract, + 'molsysmt.MSMH5FileHandler': to_molsysmt_MSMH5FileHandler, 'molsysmt.MolSys': to_molsysmt_MolSys, 'molsysmt.Topology': to_molsysmt_Topology, 'molsysmt.Structures': to_molsysmt_Structures, diff --git a/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py b/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py new file mode 100644 index 000000000..f041b9fbd --- /dev/null +++ b/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py @@ -0,0 +1,8 @@ +from molsysmt._private.digestion import digest + +@digest(form='file:msmh5') +def to_molsysmt_MSMH5FileHandler(item, atom_indices='all'): + + from molsysmt.native import MSMH5FileHandler + + return MSMH5FileHandler(item, io_mode='r') diff --git a/molsysmt/form/file_msmh5/to_molsysmt_Topology.py b/molsysmt/form/file_msmh5/to_molsysmt_Topology.py index 57c694d06..b6c0b05a6 100644 --- a/molsysmt/form/file_msmh5/to_molsysmt_Topology.py +++ b/molsysmt/form/file_msmh5/to_molsysmt_Topology.py @@ -3,4 +3,11 @@ @digest(form='file:msmh5') def to_molsysmt_Topology(item, atom_indices='all'): - raise NotImplementedError + from . import to_molsysmt_MSMH5FileHandler + from ..molsysmt_MSMH5FileHandler import to_molsysmt_Topology as molsysmt_MSMH5FileHandler_to_molsysmt_Topology + + handler = to_molsysmt_MSMH5FileHandler(item) + tmp_item = molsysmt_MSMH5FileHandler_to_molsysmt_Topology(handler, atom_indices=atom_indices) + handler.close() + + return tmp_item diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py b/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py new file mode 100644 index 000000000..9ac963115 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py @@ -0,0 +1,31 @@ +form_name = 'molsysmt.MSMH5FileHandler' +form_type = 'class' +form_info = ["", ""] + +from .is_form import is_form + +from .attributes import attributes +from .has_attribute import has_attribute + +from .extract import extract +from .copy import copy +from .add import add +from .merge import merge +from .append_structures import append_structures +from .get import * +from .set import * +from .iterators import StructuresIterator, TopologyIterator + +from .to_molsysmt_MolSys import to_molsysmt_MolSys +from .to_molsysmt_Topology import to_molsysmt_Topology +from .to_molsysmt_Structures import to_molsysmt_Structures +from .to_nglview_NGLWidget import to_nglview_NGLWidget + +_convert_to={ + 'molsysmt.MSMH5FileHandler': extract, + 'molsysmt.MolSys': to_molsysmt_MolSys, + 'molsysmt.Topology': to_molsysmt_Topology, + 'molsysmt.Structures': to_molsysmt_Structures, + 'nglview.NGLWidget': to_nglview_NGLWidget, + } + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/add.py b/molsysmt/form/molsysmt_MSMH5FileHandler/add.py new file mode 100644 index 000000000..84bb598a0 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/add.py @@ -0,0 +1,8 @@ +from molsysmt._private.exceptions import NotImplementedMethodError +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler', to_form='molsysmt.MSMH5FileHandler') +def add(to_item, item, atom_indices='all', structure_indices='all'): + + raise NotImplementedMethodError() + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py b/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py new file mode 100644 index 000000000..228cd14b3 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py @@ -0,0 +1,8 @@ +from molsysmt._private.exceptions import NotImplementedMethodError +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def append_structures(item, structure_id=None, time=None, coordinates=None, box=None): + + raise NotImplementedMethodError() + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py b/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py new file mode 100644 index 000000000..6e20367b8 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py @@ -0,0 +1,39 @@ +from molsysmt.attribute.attributes import attributes as _all_attributes + +attributes = {ii:False for ii in _all_attributes} + +attributes['atom_index'] = True +attributes['atom_id'] = True +attributes['atom_name'] = True +attributes['atom_type'] = True +attributes['bond_index'] = True +attributes['bond_id'] = True +attributes['bond_type'] = True +attributes['bond_order'] = True +attributes['group_index'] = True +attributes['group_id'] = True +attributes['group_name'] = True +attributes['group_type'] = True +attributes['component_index'] = True +attributes['component_id'] = True +attributes['component_name'] = True +attributes['component_type'] = True +attributes['molecule_index'] = True +attributes['molecule_id'] = True +attributes['molecule_name'] = True +attributes['molecule_type'] = True +attributes['chain_index'] = True +attributes['chain_id'] = True +attributes['chain_name'] = True +attributes['chain_type'] = True +attributes['entity_index'] = True +attributes['entity_id'] = True +attributes['entity_name'] = True +attributes['entity_type'] = True +attributes['coordinates'] = True +attributes['velocities'] = True +attributes['box'] = True +attributes['time'] = True +attributes['structure_id'] = True + +del(_all_attributes) diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py b/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py new file mode 100644 index 000000000..48ed23ad9 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py @@ -0,0 +1,16 @@ +from molsysmt._private.exceptions import NotImplementedMethodError +from molsysmt._private.digestion import digest +from molsysmt._private.variables import is_all + +@digest(form='molsysmt.MSMH5FileHandler') +def copy(item, output_filename=None): + + if output_filename is None: + output_filename = item + + from shutil import copy as copy_file + copy_file(item, output_filename) + tmp_item = output_filename + + return tmp_item + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py b/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py new file mode 100644 index 000000000..bf83b41e1 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py @@ -0,0 +1,27 @@ +from molsysmt._private.exceptions import NotImplementedMethodError +from molsysmt._private.digestion import digest +from molsysmt._private.variables import is_all + +@digest(form='molsysmt.MSMH5FileHandler') +def extract(item, atom_indices='all', structure_indices='all', output_filename=None, copy_if_all=True): + + if output_filename is None: + output_filename = item + + if is_all(atom_indices) and is_all(structure_indices): + + if copy_if_all or (output_filename!=item): + + from shutil import copy as copy_file + copy_file(item, output_filename) + tmp_item = output_filename + + else: + + tmp_item = item + else: + + raise NotImplementedMethodError() + + return tmp_item + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/get.py b/molsysmt/form/molsysmt_MSMH5FileHandler/get.py new file mode 100644 index 000000000..73921f4a5 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/get.py @@ -0,0 +1,242 @@ +####################################################################################### +########### THE FOLLOWING LINES NEED TO BE CUSTOMIZED FOR EVERY CLASS ################ +####################################################################################### + +from molsysmt._private.execfile import execfile +from molsysmt._private.exceptions import NotImplementedMethodError, NotWithThisFormError +from molsysmt._private.digestion import digest + +form='molsysmt.MSMH5FileHandler' + + +## From atom + +@digest(form=form) +def get_atom_id_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_atom_name_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_atom_type_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_group_index_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_component_index_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_chain_index_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_molecule_index_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_entity_index_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_inner_bonded_atoms_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_n_inner_bonds_from_atom(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_coordinates_from_atom(item, indices='all', structure_indices='all'): + + raise NotImplementedError + +## From group + +@digest(form=form) +def get_group_id_from_group(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_group_name_from_group(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_group_type_from_group(item, indices='all'): + + raise NotImplementedError + +## From component + +@digest(form=form) +def get_component_id_from_component(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_component_name_from_component(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_component_type_from_component(item, indices='all'): + + raise NotImplementedError + +## From molecule + +@digest(form=form) +def get_molecule_id_from_molecule(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_molecule_name_from_molecule(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_molecule_type_from_molecule(item, indices='all'): + + raise NotImplementedError + + +## From chain + +@digest(form=form) +def get_chain_id_from_chain(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_chain_name_from_chain(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_chain_type_from_chain(item, indices='all'): + + raise NotImplementedError + + +## From entity + +@digest(form=form) +def get_entity_id_from_entity(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_entity_name_from_entity(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_entity_type_from_entity(item, indices='all'): + + raise NotImplementedError + + +## From system + +@digest(form=form) +def get_n_atoms_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_groups_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_components_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_chains_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_molecules_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_entities_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_bonds_from_system(item): + + raise NotImplementedError + +@digest(form=form) +def get_n_structures_from_system(item, structure_indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_box_from_system(item, structure_indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_time_from_system(item, structure_indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_structure_id_from_system(item, structure_indices='all'): + + raise NotImplementedError + + +## From bond + +@digest(form=form) +def get_bond_order_from_bond(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_bond_type_from_bond(item, indices='all'): + + raise NotImplementedError + +@digest(form=form) +def get_bonded_atoms_from_bond(item, indices='all'): + + raise NotImplementedError + + +####################################################################################### +######### DO NOT TOUCH THE FOLLOWING LINES, JUST INCLUDE THEM AS THEY ARE ############# +####################################################################################### + +from os import path +this_folder = path.dirname(path.abspath(__file__)) +common_get = path.join(this_folder, '../../_private/common_get.py') +execfile(common_get, globals(), locals()) +del(path, this_folder, common_get) + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py b/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py new file mode 100644 index 000000000..64675b499 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py @@ -0,0 +1,11 @@ +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def has_attribute(molecular_system, attribute): + + from . import attributes + + output = attributes[attribute] + + return output + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py b/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py new file mode 100644 index 000000000..b6ce7de28 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py @@ -0,0 +1,8 @@ + +def is_form(item): + + item_fullname = item.__class__.__module__+'.'+item.__class__.__name__ + output = (item_fullname == 'molsysmt.native.msmh5_file_handler.MSMH5FileHandler') + + return output + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py b/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py new file mode 100644 index 000000000..2c24555c2 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py @@ -0,0 +1,25 @@ +from molsysmt._private.exceptions import NotImplementedIteratorError + +class StructuresIterator(): + + def __init__(self, molecular_system, atom_indices='all', start=0, interval=1, stop=None, chunk=1, structure_indices=None): + pass + + def __iter__(self): + return self + + def __next__(self): + raise NotImplementedIteratorError + +class TopologyIterator(): + + def __init__(self, molecular_system): + pass + + def __iter__(self): + return self + + def __next__(self): + raise NotImplementedIteratorError + + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py b/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py new file mode 100644 index 000000000..ee1f45d4b --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py @@ -0,0 +1,8 @@ +from molsysmt._private.exceptions import NotImplementedMethodError +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def merge(items, atom_indices='all', structure_indices='all'): + + raise NotImplementedMethodError() + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/set.py b/molsysmt/form/molsysmt_MSMH5FileHandler/set.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/set.py @@ -0,0 +1 @@ + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py new file mode 100644 index 000000000..522aa372a --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py @@ -0,0 +1,7 @@ +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def to_molsysmt_MolSys(item, atom_indices='all', structure_indices='all'): + + raise NotImplementedError + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py new file mode 100644 index 000000000..4685544d3 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py @@ -0,0 +1,6 @@ +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def to_molsysmt_Structures(item, atom_indices='all', structure_indices='all'): + + raise NotImplementedError diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py new file mode 100644 index 000000000..35701cb08 --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py @@ -0,0 +1,107 @@ +from molsysmt._private.digestion import digest +import pandas as pd +import numpy as np + +@digest(form='molsysmt.MSMH5FileHandler') +def to_molsysmt_Topology(item, atom_indices='all'): + + from molsysmt.native import Topology + + topology_ds = item.file['topology'] + + n_atoms = topology_ds['atoms'].attrs['n_atoms'] + n_bonds = topology_ds['bonds'].attrs['n_bonds'] + + tmp_item = Topology() + + # Atoms + + tmp_item.atoms_dataframe['atom_index']=np.arange(n_atoms) + tmp_item.atoms_dataframe['atom_id']=topology_ds['atoms']['id'][:] + tmp_item.atoms_dataframe['atom_type']=topology_ds['atoms']['type'].asstr()[:] + tmp_item.atoms_dataframe['atom_name']=topology_ds['atoms']['name'].asstr()[:] + tmp_item.atoms_dataframe['group_index']=topology_ds['atoms']['group_index'][:] + + # Groups + + groups_df = pd.DataFrame({ + 'group_id':topology_ds['groups']['id'][:], + 'group_name':topology_ds['groups']['name'].asstr()[:], + 'group_type':topology_ds['groups']['type'].asstr()[:], + 'component_index':topology_ds['groups']['component_index'][:], + }) + + aux_df = groups_df.iloc[tmp_item.atoms_dataframe['group_index'].to_numpy()] + aux_df.reset_index(inplace=True, drop=True) + + tmp_item.atoms_dataframe['group_id']=aux_df['group_id'] + tmp_item.atoms_dataframe['group_name']=aux_df['group_name'] + tmp_item.atoms_dataframe['group_type']=aux_df['group_type'] + tmp_item.atoms_dataframe['component_index']=aux_df['component_index'] + + del(groups_df, aux_df) + + # Components + + components_df = pd.DataFrame({ + 'component_id':topology_ds['components']['id'][:], + 'component_name':topology_ds['components']['name'].asstr()[:], + 'component_type':topology_ds['components']['type'].asstr()[:], + 'molecule_index':topology_ds['components']['molecule_index'][:], + }) + + aux_df = components_df.iloc[tmp_item.atoms_dataframe['component_index'].to_numpy()] + aux_df.reset_index(inplace=True, drop=True) + + tmp_item.atoms_dataframe['component_id']=aux_df['component_id'] + tmp_item.atoms_dataframe['component_name']=aux_df['component_name'] + tmp_item.atoms_dataframe['component_type']=aux_df['component_type'] + tmp_item.atoms_dataframe['molecule_index']=aux_df['molecule_index'] + + del(components_df, aux_df) + + # Molecules + + molecules_df = pd.DataFrame({ + 'molecule_id':topology_ds['molecules']['id'][:], + 'molecule_name':topology_ds['molecules']['name'].asstr()[:], + 'molecule_type':topology_ds['molecules']['type'].asstr()[:], + 'entity_index':topology_ds['molecules']['entity_index'][:], + }) + + aux_df = molecules_df.iloc[tmp_item.atoms_dataframe['molecule_index'].to_numpy()] + aux_df.reset_index(inplace=True, drop=True) + + tmp_item.atoms_dataframe['molecule_id']=aux_df['molecule_id'] + tmp_item.atoms_dataframe['molecule_name']=aux_df['molecule_name'] + tmp_item.atoms_dataframe['molecule_type']=aux_df['molecule_type'] + tmp_item.atoms_dataframe['entity_index']=aux_df['entity_index'] + + del(molecules_df, aux_df) + + # Entities + + entities_df = pd.DataFrame({ + 'entity_id':topology_ds['entities']['id'][:], + 'entity_name':topology_ds['entities']['name'].asstr()[:], + 'entity_type':topology_ds['entities']['type'].asstr()[:], + }) + + aux_df = entities_df.iloc[tmp_item.atoms_dataframe['entity_index'].to_numpy()] + aux_df.reset_index(inplace=True, drop=True) + + tmp_item.atoms_dataframe['entity_id']=aux_df['entity_id'] + tmp_item.atoms_dataframe['entity_name']=aux_df['entity_name'] + tmp_item.atoms_dataframe['entity_type']=aux_df['entity_type'] + + del(entities_df, aux_df) + + # Bonds + + tmp_item.bonds_dataframe['atom1_index']=topology_ds['bonds']['atom1_index'][:] + tmp_item.bonds_dataframe['atom2_index']=topology_ds['bonds']['atom2_index'][:] + tmp_item.bonds_dataframe['type']=topology_ds['bonds']['type'].asstr()[:] + tmp_item.bonds_dataframe['order']=topology_ds['bonds']['order'].asstr()[:] + + return tmp_item + diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py new file mode 100644 index 000000000..8a877cf0e --- /dev/null +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py @@ -0,0 +1,7 @@ +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.MSMH5FileHandler') +def to_nglview_NGLWidget(item, atom_indices='all', structure_indices='all'): + + raise NotImplementedError + diff --git a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py index 4670a4051..fbea93aa3 100644 --- a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py +++ b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py @@ -47,116 +47,226 @@ def _add_topology_to_msmh5(item, file, atom_indices='all'): if not file_is_msmh5: raise ValueError - n_atoms = item.atoms_dataframe.shape[0] + # Atoms - n_atoms = item.atoms_dataframe.shape[0] + atoms_df = item.atoms_dataframe - atom_index_array = item.atoms_dataframe["atom_index"].to_numpy() - atom_name_array = item.atoms_dataframe["atom_name"].to_numpy() - atom_id_array = item.atoms_dataframe["atom_id"].to_numpy() - atom_type_array = item.atoms_dataframe["atom_type"].to_numpy() + n_atoms = atoms_df.shape[0] - group_index_array = item.atoms_dataframe["group_index"].to_numpy() - group_name_array = item.atoms_dataframe["group_name"].to_numpy() - group_id_array = item.atoms_dataframe["group_id"].to_numpy() - group_type_array = item.atoms_dataframe["group_type"].to_numpy() + atoms = file['topology']['atoms'] - component_index_array = item.atoms_dataframe["component_index"].to_numpy() - component_name_array = item.atoms_dataframe["component_name"].to_numpy() - component_id_array = item.atoms_dataframe["component_id"].to_numpy() - component_type_array = item.atoms_dataframe["component_type"].to_numpy() + atoms.attrs['n_atoms'] = n_atoms - group_index_array = item.atoms_dataframe["group_index"].to_numpy() - group_name_array = item.atoms_dataframe["group_name"].to_numpy() - group_id_array = item.atoms_dataframe["group_id"].to_numpy() - group_type_array = item.atoms_dataframe["group_type"].to_numpy() + atoms['id'].resize((n_atoms,)) + atoms['name'].resize((n_atoms,)) + atoms['type'].resize((n_atoms,)) + atoms['id'][:] = atoms_df['atom_id'].to_numpy(dtype=int) + atoms['name'][:] = atoms_df['atom_name'].to_numpy(dtype=str) + atoms['type'][:] = atoms_df['atom_type'].to_numpy(dtype=str) + # Groups - chain_index_array = item.atoms_dataframe["chain_index"].to_numpy() - chain_name_array = item.atoms_dataframe["chain_name"].to_numpy() - chain_id_array = item.atoms_dataframe["chain_id"].to_numpy() - chain_type_array = item.atoms_dataframe["chain_type"].to_numpy() + groups_df = atoms_df[['group_index', 'group_id', 'group_name', 'group_type', 'component_index']].drop_duplicates() - bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy() - bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy() + n_groups = groups_df.shape[0] + if n_groups==1: + if groups_df['group_index'].iloc[0] == None: + n_groups = 0 - for ii in range(n_atoms): + groups = file['topology']['groups'] + groups.attrs['n_groups'] = n_groups + if n_groups > 0: + if all(groups_df['group_id'].unique()==[None]): + groups_df['group_id']=groups_df['group_index'] + if all(groups_df['group_name'].unique()==[None]): + groups_df['group_name']='UNK' - aux_indices = item.atoms_dataframe['group_index'].unique() - where_not_None = np.where(aux_indices!=None) - aux_indices = aux_indices[where_not_None] - n_groups = aux_indices.shape[0] + if all(groups_df['group_type'].unique()==[None]): + groups_df['group_type']='UNK' - aux_indices = item.atoms_dataframe['component_index'].unique() - where_not_None = np.where(aux_indices!=None) - aux_indices = aux_indices[where_not_None] - n_components = aux_indices.shape[0] + atoms['group_index'].resize((n_atoms,)) + groups['id'].resize((n_groups,)) + groups['name'].resize((n_groups,)) + groups['type'].resize((n_groups,)) - aux_indices = item.atoms_dataframe['molecule_index'].unique() - where_not_None = np.where(aux_indices!=None) - aux_indices = aux_indices[where_not_None] - n_molecules = aux_indices.shape[0] + atoms['group_index'][:] = atoms_df['group_index'].to_numpy(dtype=int) + groups['id'][:] = groups_df['group_id'].to_numpy(dtype=int) + groups['name'][:] = groups_df['group_name'].to_numpy(dtype=str) + groups['type'][:] = groups_df['group_type'].to_numpy(dtype=str) - aux_indices = item.atoms_dataframe['entity_index'].unique() - where_not_None = np.where(aux_indices!=None) - aux_indices = aux_indices[where_not_None] - n_entities = aux_indices.shape[0] + # Components - aux_indices = item.atoms_dataframe['chain_index'].unique() - where_not_None = np.where(aux_indices!=None) - aux_indices = aux_indices[where_not_None] - n_chains = aux_indices.shape[0] + components_df = atoms_df[['component_index', 'component_id', 'component_name', 'component_type', 'molecule_index']].drop_duplicates() - # Atoms + n_components = components_df.shape[0] - atoms = file['topology']['atoms'] + if n_components==1: + if components_df['component_index'].iloc[0] == None: + n_components = 0 - atoms.attrs['n_atoms'] = n_atoms + components = file['topology']['components'] + components.attrs['n_components'] = n_components - atoms['id'].resize((n_atoms,)) - atoms['name'].resize((n_atoms,)) - atoms['type'].resize((n_atoms,)) + if n_components > 0: - atoms['id'][:] = item.atoms_dataframe['atom_id'].to_numpy() - atoms['name'][:] = item.atoms_dataframe['atom_name'].to_numpy() - atoms['type'][:] = item.atoms_dataframe['atom_type'].to_numpy() + if all(components_df['component_id'].unique()==[None]): + components_df['component_id']=components_df['component_index'] - # Groups + if all(components_df['component_name'].unique()==[None]): + components_df['component_name']='UNK' - if n_groups > 0: + if all(components_df['component_type'].unique()==[None]): + components_df['component_type']='UNK' - groups = file['topology']['groups'] + groups['component_index'].resize((n_groups,)) + components['id'].resize((n_components,)) + components['name'].resize((n_components,)) + components['type'].resize((n_components,)) - groups.attrs['n_groups'] = n_groups + groups['component_index'][:] = groups_df['component_index'].to_numpy(dtype=int) + components['id'][:] = components_df['component_id'].to_numpy(dtype=int) + components['name'][:] = components_df['component_name'].to_numpy(dtype=str) + components['type'][:] = components_df['component_type'].to_numpy(dtype=str) - atoms['group_index'].resize((n_atoms,)) - groups['id'].resize((n_groups,)) - groups['name'].resize((n_groups,)) - groups['type'].resize((n_groups,)) + # Molecules - atoms['group_index'][:] = item.atoms_dataframe['group_index'].to_numpy() - groups['id'][:] = item.atoms_dataframe['group_id'].to_numpy() - groups['name'][:] = item.atoms_dataframe['group_name'].to_numpy() - groups['type'][:] = item.atoms_dataframe['group_type'].to_numpy() + molecules_df = atoms_df[['molecule_index', 'molecule_id', 'molecule_name', 'molecule_type', 'entity_index']].drop_duplicates() - # Components + n_molecules = molecules_df.shape[0] - if n_components > 0: + if n_molecules==1: + if molecules_df['molecule_index'].iloc[0] == None: + n_molecules = 0 + + molecules = file['topology']['molecules'] + molecules.attrs['n_molecules'] = n_molecules + + if n_molecules > 0: + + if all(molecules_df['molecule_id'].unique()==[None]): + molecules_df['molecule_id']=molecules_df['molecule_index'] + + if all(molecules_df['molecule_name'].unique()==[None]): + molecules_df['molecule_name']='UNK' + + if all(molecules_df['molecule_type'].unique()==[None]): + molecules_df['molecule_type']='UNK' + + components['molecule_index'].resize((n_components,)) + molecules['id'].resize((n_molecules,)) + molecules['name'].resize((n_molecules,)) + molecules['type'].resize((n_molecules,)) + + components['molecule_index'][:] = components_df['molecule_index'].to_numpy(dtype=int) + molecules['id'][:] = molecules_df['molecule_id'].to_numpy(dtype=int) + molecules['name'][:] = molecules_df['molecule_name'].to_numpy(dtype=str) + molecules['type'][:] = molecules_df['molecule_type'].to_numpy(dtype=str) + + # Entities + + entities_df = atoms_df[['entity_index', 'entity_id', 'entity_name', 'entity_type']].drop_duplicates() + + n_entities = entities_df.shape[0] + + if n_entities==1: + if entities_df['entity_index'].iloc[0] == None: + n_entities = 0 + + entities = file['topology']['entities'] + entities.attrs['n_entities'] = n_entities + + if n_entities > 0: + + if all(entities_df['entity_id'].unique()==[None]): + entities_df['entity_id']=entities_df['entity_index'] + + if all(entities_df['entity_name'].unique()==[None]): + entities_df['entity_name']='UNK' + + if all(entities_df['entity_type'].unique()==[None]): + entities_df['entity_type']='UNK' + + molecules['entity_index'].resize((n_molecules,)) + entities['id'].resize((n_entities,)) + entities['name'].resize((n_entities,)) + entities['type'].resize((n_entities,)) + + molecules['entity_index'][:] = molecules_df['entity_index'].to_numpy(dtype=int) + entities['id'][:] = entities_df['entity_id'].to_numpy(dtype=int) + entities['name'][:] = entities_df['entity_name'].to_numpy(dtype=str) + entities['type'][:] = entities_df['entity_type'].to_numpy(dtype=str) - groups['component_index'].resize((n_groups,)) - groups['component_index'][:] = item.atoms_dataframe['component_index'].to_numpy() # Chains + chains_df = atoms_df[['chain_index', 'chain_id', 'chain_name', 'chain_type']].drop_duplicates() + + n_chains = chains_df.shape[0] + + if n_chains==1: + if chains_df['chain_index'].iloc[0] == None: + n_chains = 0 + + chains = file['topology']['chains'] + chains.attrs['n_chains'] = n_chains + if n_chains > 0: + if all(chains_df['chain_id'].unique()==[None]): + chains_df['chain_id']=chains_df['chain_index'] + + if chains_df['chain_id'].dtype == 'O': + chains_df['chain_id']=chains_df['chain_index'] + + if all(chains_df['chain_name'].unique()==[None]): + chains_df['chain_name']='UNK' + + if all(chains_df['chain_type'].unique()==[None]): + chains_df['chain_type']='UNK' + atoms['chain_index'].resize((n_atoms,)) - atoms['chain_index'][:] = item.atoms_dataframe['chain_index'].to_numpy() + chains['id'].resize((n_chains,)) + chains['name'].resize((n_chains,)) + chains['type'].resize((n_chains,)) + + atoms['chain_index'][:] = atoms_df['chain_index'].to_numpy(dtype=int) + chains['id'][:] = chains_df['chain_id'].to_numpy(dtype=int) + chains['name'][:] = chains_df['chain_name'].to_numpy(dtype=str) + chains['type'][:] = chains_df['chain_type'].to_numpy(dtype=str) + + del(groups_df, components_df, molecules_df, entities_df, chains_df) + + # Bonds + + bonds_df = item.bonds_dataframe + + n_bonds = bonds_df.shape[0] + + bonds = file['topology']['bonds'] + bonds.attrs['n_bonds'] = n_bonds + + if n_bonds>0: + + if all(bonds_df['order'].unique()==[None]): + bonds_df['order']='UNK' + + if all(bonds_df['type'].unique()==[None]): + bonds_df['type']='UNK' + + bonds['atom1_index'].resize((n_bonds,)) + bonds['atom2_index'].resize((n_bonds,)) + bonds['order'].resize((n_bonds,)) + bonds['type'].resize((n_bonds,)) + + bonds['atom1_index'][:] = bonds_df['atom1_index'].to_numpy(dtype=int) + bonds['atom2_index'][:] = bonds_df['atom2_index'].to_numpy(dtype=int) + bonds['order'][:] = bonds_df['order'].to_numpy(dtype=str) + bonds['type'][:] = bonds_df['type'].to_numpy(dtype=str) if needs_to_be_closed: diff --git a/molsysmt/native/msmh5_file_handler.py b/molsysmt/native/msmh5_file_handler.py index 24cfeee39..a2b12d266 100644 --- a/molsysmt/native/msmh5_file_handler.py +++ b/molsysmt/native/msmh5_file_handler.py @@ -145,13 +145,13 @@ def _new_msmfile(filename, creator='MolSysMT', compression="gzip", compression_o structures.attrs['time_unit']='ps' structures.attrs['energy_unit']='kJ/mol' - bonds.create_dataset('id', (0,), dtype=int_type, maxshape=(None,), **global_dataset_options) - bonds.create_dataset('box', (0,6), dtype=float_type, maxshape=(None,6), **global_dataset_options) - bonds.create_dataset('coordinates', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) - bonds.create_dataset('velocities', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) - bonds.create_dataset('kinetic_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) - bonds.create_dataset('potential_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) - bonds.create_dataset('temperature', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) + structures.create_dataset('id', (0,), dtype=int_type, maxshape=(None,), **global_dataset_options) + structures.create_dataset('box', (0,6), dtype=float_type, maxshape=(None,6), **global_dataset_options) + structures.create_dataset('coordinates', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) + structures.create_dataset('velocities', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) + structures.create_dataset('kinetic_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) + structures.create_dataset('potential_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) + structures.create_dataset('temperature', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) return file diff --git a/molsysmt/native/topology.py b/molsysmt/native/topology.py index c8e45511f..f57fcd74e 100644 --- a/molsysmt/native/topology.py +++ b/molsysmt/native/topology.py @@ -33,13 +33,23 @@ def _nan_to_None(self): class Bonds_DataFrame(pd.DataFrame): - def __init__(self): + def __init__(self, n_bonds=0): columns = ['atom1_index', 'atom2_index', 'order', 'type'] - super().__init__(columns=columns) + if n_bonds: + self["atom_index"] = np.arange(0, n_atoms, dtype=int) + self._nan_to_None() + + def all_nan_to_None(self): + + list_columns_where_nan = ['atom1_index','atom2_index','order','type'] + + for column in self: + self[column].where(self[column].notnull(), None, inplace=True) + def _nan_to_None(self): list_columns_where_nan = ['order','type'] @@ -57,10 +67,10 @@ def _sort_bonds(self): class Topology(): - def __init__(self, n_atoms=0): + def __init__(self, n_atoms=0, n_bonds=0): self.atoms_dataframe=Atoms_DataFrame(n_atoms=n_atoms) - self.bonds_dataframe=Bonds_DataFrame() + self.bonds_dataframe=Bonds_DataFrame(n_bonds=n_bonds) def extract(self, atom_indices='all', structure_indices='all'): diff --git a/molsysmt/native/topology2.py b/molsysmt/native/topology2.py new file mode 100644 index 000000000..afa634349 --- /dev/null +++ b/molsysmt/native/topology2.py @@ -0,0 +1,309 @@ +import pandas as pd +import numpy as np + + +class Atoms_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['atom_index', 'atom_name', 'atom_id', 'atom_type', + 'group_index', 'group_name', 'group_id', 'group_type', + 'component_index', 'component_name', 'component_id', 'component_type', + 'chain_index', 'chain_name', 'chain_id', 'chain_type', + 'molecule_index', 'molecule_name', 'molecule_id', 'molecule_type', + 'entity_index', 'entity_name', 'entity_id', 'entity_type', + 'occupancy', 'alternate_location', 'b_factor', 'formal_charge', 'partial_charge'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Groups_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['group_name', 'group_id', 'group_type', 'component_index'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Components_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['component_name', 'component_id', 'component_type', 'molecule_index'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Molecules_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['molecule_name', 'molecule_id', 'molecule_type', 'entity_index'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Entities_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['entity_name', 'entity_id', 'entity_type'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Chains_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['chain_name', 'chain_id', 'chain_type'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + +class Bonds_DataFrame(pd.DataFrame): + + def __init__(self): + + columns = ['atom1_index', 'atom2_index', 'order', 'type'] + + super().__init__(columns=columns) + + def _nan_to_UNK(self): + + for column in self: + self[column].where(self[column].notnull(), 'UNK', inplace=True) + + def _sort_bonds(self): + + self_mask = self['atom1_index'] > self['atom2_index'] + self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', + 'atom2_index': 'atom1_index'}, axis=1)) + self.sort_values(by=['atom1_index', 'atom2_index'], inplace=True) + self.reset_index(drop=True, inplace=True) + + +class Topology2(): + + def __init__(self, n_atoms=0, n_bonds=0): + + self.atoms = Atoms_DataFrame() + self.groups = Groups_DataFrame() + self.components = Components_DataFrame() + self.molecules = Molecules_DataFrame() + self.entities = Entities_DataFrame() + self.chains = Chains_DataFrame() + self.bonds = Bonds_DataFrame() + + def extract(self, atom_indices='all'): + + if type(atom_indices)==str: + + if atom_indices in ['all', 'All', 'ALL']: + return self.copy() + + else: + + raise NotImplementedError + + return tmp_item + + + def add(self, item, selection='all'): + + raise NotImplementedError + + + def copy(self): + + tmp_item = Topology2() + + tmp_item.atoms = Atoms_DataFrame() + tmp_item.groups = Groups_DataFrame() + tmp_item.components = Components_DataFrame() + tmp_item.molecules = Molecules_DataFrame() + tmp_item.entities = Entities_DataFrame() + tmp_item.chains = Chains_DataFrame() + tmp_item.bonds = Bonds_DataFrame() + + for column in self.atoms.columns: + tmp_item.atoms[column]=self.atoms[column].to_numpy() + + for column in self.groups.columns: + tmp_item.groups[column]=self.groups[column].to_numpy() + + for column in self.groups.columns: + tmp_item.groups[column]=self.groups[column].to_numpy() + + + + for column in self.bonds_dataframe.columns: + tmp_item.bonds_dataframe[column]=self.bonds_dataframe[column].to_numpy() + + return tmp_item + + + def _build_components(self): + + from molsysmt.element.component import get_component_type_from_group_names + from molsysmt.element.component import get_component_index_from_bonded_atoms + + n_atoms = self.atoms_dataframe.shape[0] + n_bonds = self.bonds_dataframe.shape[0] + + group_index_from_atom = self.atoms_dataframe['group_index'].to_numpy() + group_name_from_atom = self.atoms_dataframe['group_name'].to_numpy() + atom_index_from_bond = self.bonds_dataframe[['atom1_index','atom2_index']].to_numpy(dtype=int, copy=True) + + if n_bonds==0: + + index_array = np.full(n_atoms, None, dtype=object) + id_array = np.full(n_atoms, None, dtype=object) + name_array = np.full(n_atoms, None, dtype=object) + type_array = np.full(n_atoms, None, dtype=object) + + else: + + index_array = get_component_index_from_bonded_atoms(atom_index_from_bond, n_atoms) + component_indices = np.unique(index_array) + n_components = component_indices.shape[0] + + type_array = np.full(n_atoms, None, dtype=object) + + for ii in component_indices: + + mask = (index_array==ii) + group_indices=np.unique(group_index_from_atom[mask]) + group_names=[] + for group_index in group_indices: + first_occurrence = np.where(group_index_from_atom==group_index)[0][0] + group_names.append(group_name_from_atom[first_occurrence]) + + type_array[mask]=get_component_type_from_group_names(group_names) + + self.atoms_dataframe["component_index"] = index_array + self.atoms_dataframe["component_id"] = index_array + self.atoms_dataframe["component_name"] = index_array + self.atoms_dataframe["component_type"] = type_array + + del(group_index_from_atom, group_name_from_atom, atom_index_from_bond, index_array, type_array) + + def _build_molecules(self): + + component_index_from_atom = self.atoms_dataframe['component_index'].to_numpy() + component_type_from_atom = self.atoms_dataframe['component_type'].to_numpy() + + n_atoms=component_index_from_atom.shape[0] + index_array = component_index_from_atom.copy() + id_array = np.full(n_atoms, None, dtype=object) + name_array = np.full(n_atoms, None, dtype=object) + type_array = component_type_from_atom.copy() + + self.atoms_dataframe["molecule_index"] = index_array + self.atoms_dataframe["molecule_id"] = id_array + self.atoms_dataframe["molecule_name"] = name_array + self.atoms_dataframe["molecule_type"] = type_array + + del(component_index_from_atom, component_type_from_atom, index_array, id_array, name_array, + type_array) + + def _build_entities(self): + + n_atoms=self.atoms_dataframe.shape[0] + + entity_index = np.empty(n_atoms, dtype=int) + entity_name = np.empty(n_atoms, dtype=object) + entity_id = np.empty(n_atoms, dtype=object) + entity_type = np.empty(n_atoms, dtype=object) + + entities= {} + n_entities = 0 + + current_molecule_name = '@' + + ii=0 + for molecule_name, molecule_type in zip(self.atoms_dataframe['molecule_name'], + self.atoms_dataframe['molecule_type']): + + if molecule_name!=current_molecule_name: + + current_molecule_name=molecule_name + current_molecule_type=molecule_type + + if current_molecule_name in entities: + + current_entity_index = entities[current_molecule_name]['entity_index'] + current_entity_id = entities[current_molecule_name]['entity_id'] + current_entity_name = entities[current_molecule_name]['entity_name'] + current_entity_type = entities[current_molecule_name]['entity_type'] + + else: + + current_entity_index = n_entities + current_entity_id = n_entities + current_entity_name = current_molecule_name + current_entity_type = current_molecule_type + + entities[current_entity_name]={} + entities[current_entity_name]['entity_index'] = current_entity_index + entities[current_entity_name]['entity_id'] = current_entity_id + entities[current_entity_name]['entity_name'] = current_entity_name + entities[current_entity_name]['entity_type'] = current_entity_type + + n_entities +=1 + + entity_index[ii]=current_entity_index + entity_id[ii]=current_entity_id + entity_name[ii]=current_entity_name + entity_type[ii]=current_entity_type + + ii+=1 + + self.atoms_dataframe["entity_index"] = entity_index + self.atoms_dataframe["entity_name"] = entity_name + self.atoms_dataframe["entity_type"] = entity_type + self.atoms_dataframe["entity_id"] = entity_id + + del(entity_name, entity_type, entity_id) + del(entities) + + def _join_molecules(self, indices=None): + + pass + + def _nan_to_None(self): + + self.atoms_dataframe._nan_to_None() + self.bonds_dataframe._nan_to_None() +