From 435d03b0ddc0ad05bfbf78c91b2316965584e7ff Mon Sep 17 00:00:00 2001
From: Diego Prada <prada.gracia@gmail.com>
Date: Sat, 21 Oct 2023 20:40:03 -0600
Subject: [PATCH] in process

---
 .../digestion/argument/compression.py         |  12 +
 .../digestion/argument/compression_opts.py    |  12 +
 .../digestion/argument/float_precision.py     |  12 +
 .../digestion/argument/int_precision.py       |  12 +
 molsysmt/form/file_msmh5/__init__.py          |   2 +
 .../to_molsysmt_MSMH5FileHandler.py           |   8 +
 .../form/file_msmh5/to_molsysmt_Topology.py   |   9 +-
 .../molsysmt_MSMH5FileHandler/__init__.py     |  31 ++
 .../form/molsysmt_MSMH5FileHandler/add.py     |   8 +
 .../append_structures.py                      |   8 +
 .../molsysmt_MSMH5FileHandler/attributes.py   |  39 +++
 .../form/molsysmt_MSMH5FileHandler/copy.py    |  16 +
 .../form/molsysmt_MSMH5FileHandler/extract.py |  27 ++
 .../form/molsysmt_MSMH5FileHandler/get.py     | 242 ++++++++++++++
 .../has_attribute.py                          |  11 +
 .../form/molsysmt_MSMH5FileHandler/is_form.py |   8 +
 .../molsysmt_MSMH5FileHandler/iterators.py    |  25 ++
 .../form/molsysmt_MSMH5FileHandler/merge.py   |   8 +
 .../form/molsysmt_MSMH5FileHandler/set.py     |   1 +
 .../to_molsysmt_MolSys.py                     |   7 +
 .../to_molsysmt_Structures.py                 |   6 +
 .../to_molsysmt_Topology.py                   | 107 ++++++
 .../to_nglview_NGLWidget.py                   |   7 +
 .../form/molsysmt_Topology/to_file_msmh5.py   | 252 ++++++++++----
 molsysmt/native/msmh5_file_handler.py         |  14 +-
 molsysmt/native/topology.py                   |  18 +-
 molsysmt/native/topology2.py                  | 309 ++++++++++++++++++
 27 files changed, 1128 insertions(+), 83 deletions(-)
 create mode 100644 molsysmt/_private/digestion/argument/compression.py
 create mode 100644 molsysmt/_private/digestion/argument/compression_opts.py
 create mode 100644 molsysmt/_private/digestion/argument/float_precision.py
 create mode 100644 molsysmt/_private/digestion/argument/int_precision.py
 create mode 100644 molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/add.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/copy.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/extract.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/get.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/merge.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/set.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py
 create mode 100644 molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py
 create mode 100644 molsysmt/native/topology2.py

diff --git a/molsysmt/_private/digestion/argument/compression.py b/molsysmt/_private/digestion/argument/compression.py
new file mode 100644
index 000000000..1736bdc10
--- /dev/null
+++ b/molsysmt/_private/digestion/argument/compression.py
@@ -0,0 +1,12 @@
+from molsysmt._private.exceptions import ArgumentError
+
+def digest_compression(compression, caller=None):
+
+    if isinstance(compression, str):
+
+        if caller.endswith('to_file_msmh5'):
+            if compression in ['gzip', 'lzf', 'szip']:
+                return compression
+
+    raise ArgumentError('compression', value=compression, caller=caller, message=None)
+
diff --git a/molsysmt/_private/digestion/argument/compression_opts.py b/molsysmt/_private/digestion/argument/compression_opts.py
new file mode 100644
index 000000000..a3497d4da
--- /dev/null
+++ b/molsysmt/_private/digestion/argument/compression_opts.py
@@ -0,0 +1,12 @@
+from molsysmt._private.exceptions import ArgumentError
+
+def digest_compression_opts(compression_opts, caller=None):
+
+    if isinstance(compression_opts, int):
+
+        if caller.endswith('to_file_msmh5'):
+            if 0<=compression_opts<=9:
+                return compression_opts
+
+    raise ArgumentError('compression_opts', value=compression_opts, caller=caller, message=None)
+
diff --git a/molsysmt/_private/digestion/argument/float_precision.py b/molsysmt/_private/digestion/argument/float_precision.py
new file mode 100644
index 000000000..311644d08
--- /dev/null
+++ b/molsysmt/_private/digestion/argument/float_precision.py
@@ -0,0 +1,12 @@
+from molsysmt._private.exceptions import ArgumentError
+
+def digest_float_precision(float_precision, caller=None):
+
+    if isinstance(float_precision, str):
+
+        if caller.endswith('to_file_msmh5'):
+            if float_precision in ['single', 'double']:
+                return float_precision
+
+    raise ArgumentError('float_precision', value=float_precision, caller=caller, message=None)
+
diff --git a/molsysmt/_private/digestion/argument/int_precision.py b/molsysmt/_private/digestion/argument/int_precision.py
new file mode 100644
index 000000000..d0235ee54
--- /dev/null
+++ b/molsysmt/_private/digestion/argument/int_precision.py
@@ -0,0 +1,12 @@
+from molsysmt._private.exceptions import ArgumentError
+
+def digest_int_precision(int_precision, caller=None):
+
+    if isinstance(int_precision, str):
+
+        if caller.endswith('to_file_msmh5'):
+            if int_precision in ['single', 'double']:
+                return int_precision
+
+    raise ArgumentError('int_precision', value=int_precision, caller=caller, message=None)
+
diff --git a/molsysmt/form/file_msmh5/__init__.py b/molsysmt/form/file_msmh5/__init__.py
index 75ef6e729..80ab6eeb7 100644
--- a/molsysmt/form/file_msmh5/__init__.py
+++ b/molsysmt/form/file_msmh5/__init__.py
@@ -16,6 +16,7 @@
 from .set import *
 from .iterators import StructuresIterator, TopologyIterator
 
+from .to_molsysmt_MSMH5FileHandler import to_molsysmt_MSMH5FileHandler
 from .to_molsysmt_MolSys import to_molsysmt_MolSys
 from .to_molsysmt_Topology import to_molsysmt_Topology
 from .to_molsysmt_Structures import to_molsysmt_Structures
@@ -23,6 +24,7 @@
 
 _convert_to={
         'file:msmh5': extract,
+        'molsysmt.MSMH5FileHandler': to_molsysmt_MSMH5FileHandler,
         'molsysmt.MolSys': to_molsysmt_MolSys,
         'molsysmt.Topology': to_molsysmt_Topology,
         'molsysmt.Structures': to_molsysmt_Structures,
diff --git a/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py b/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py
new file mode 100644
index 000000000..f041b9fbd
--- /dev/null
+++ b/molsysmt/form/file_msmh5/to_molsysmt_MSMH5FileHandler.py
@@ -0,0 +1,8 @@
+from molsysmt._private.digestion import digest
+
+@digest(form='file:msmh5')
+def to_molsysmt_MSMH5FileHandler(item, atom_indices='all'):
+
+    from molsysmt.native import MSMH5FileHandler
+
+    return MSMH5FileHandler(item, io_mode='r')
diff --git a/molsysmt/form/file_msmh5/to_molsysmt_Topology.py b/molsysmt/form/file_msmh5/to_molsysmt_Topology.py
index 57c694d06..b6c0b05a6 100644
--- a/molsysmt/form/file_msmh5/to_molsysmt_Topology.py
+++ b/molsysmt/form/file_msmh5/to_molsysmt_Topology.py
@@ -3,4 +3,11 @@
 @digest(form='file:msmh5')
 def to_molsysmt_Topology(item, atom_indices='all'):
 
-    raise NotImplementedError
+    from . import to_molsysmt_MSMH5FileHandler
+    from ..molsysmt_MSMH5FileHandler import to_molsysmt_Topology as molsysmt_MSMH5FileHandler_to_molsysmt_Topology
+
+    handler = to_molsysmt_MSMH5FileHandler(item)
+    tmp_item = molsysmt_MSMH5FileHandler_to_molsysmt_Topology(handler, atom_indices=atom_indices)
+    handler.close()
+
+    return tmp_item
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py b/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py
new file mode 100644
index 000000000..9ac963115
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/__init__.py
@@ -0,0 +1,31 @@
+form_name = 'molsysmt.MSMH5FileHandler'
+form_type = 'class'
+form_info = ["", ""]
+
+from .is_form import is_form
+
+from .attributes import attributes
+from .has_attribute import has_attribute
+
+from .extract import extract
+from .copy import copy
+from .add import add
+from .merge import merge
+from .append_structures import append_structures
+from .get import *
+from .set import *
+from .iterators import StructuresIterator, TopologyIterator
+
+from .to_molsysmt_MolSys import to_molsysmt_MolSys
+from .to_molsysmt_Topology import to_molsysmt_Topology
+from .to_molsysmt_Structures import to_molsysmt_Structures
+from .to_nglview_NGLWidget import to_nglview_NGLWidget
+
+_convert_to={
+        'molsysmt.MSMH5FileHandler': extract,
+        'molsysmt.MolSys': to_molsysmt_MolSys,
+        'molsysmt.Topology': to_molsysmt_Topology,
+        'molsysmt.Structures': to_molsysmt_Structures,
+        'nglview.NGLWidget': to_nglview_NGLWidget,
+        }
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/add.py b/molsysmt/form/molsysmt_MSMH5FileHandler/add.py
new file mode 100644
index 000000000..84bb598a0
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/add.py
@@ -0,0 +1,8 @@
+from molsysmt._private.exceptions import NotImplementedMethodError
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler', to_form='molsysmt.MSMH5FileHandler')
+def add(to_item, item, atom_indices='all', structure_indices='all'):
+
+    raise NotImplementedMethodError()
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py b/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py
new file mode 100644
index 000000000..228cd14b3
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/append_structures.py
@@ -0,0 +1,8 @@
+from molsysmt._private.exceptions import NotImplementedMethodError
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def append_structures(item, structure_id=None, time=None, coordinates=None, box=None):
+
+    raise NotImplementedMethodError()
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py b/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py
new file mode 100644
index 000000000..6e20367b8
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/attributes.py
@@ -0,0 +1,39 @@
+from molsysmt.attribute.attributes import attributes as _all_attributes
+
+attributes = {ii:False for ii in _all_attributes}
+
+attributes['atom_index'] = True
+attributes['atom_id'] = True
+attributes['atom_name'] = True
+attributes['atom_type'] = True
+attributes['bond_index'] = True
+attributes['bond_id'] = True
+attributes['bond_type'] = True
+attributes['bond_order'] = True
+attributes['group_index'] = True
+attributes['group_id'] = True
+attributes['group_name'] = True
+attributes['group_type'] = True
+attributes['component_index'] = True
+attributes['component_id'] = True
+attributes['component_name'] = True
+attributes['component_type'] = True
+attributes['molecule_index'] = True
+attributes['molecule_id'] = True
+attributes['molecule_name'] = True
+attributes['molecule_type'] = True
+attributes['chain_index'] = True
+attributes['chain_id'] = True
+attributes['chain_name'] = True
+attributes['chain_type'] = True
+attributes['entity_index'] = True
+attributes['entity_id'] = True
+attributes['entity_name'] = True
+attributes['entity_type'] = True
+attributes['coordinates'] = True
+attributes['velocities'] = True
+attributes['box'] = True
+attributes['time'] = True
+attributes['structure_id'] = True
+
+del(_all_attributes)
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py b/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py
new file mode 100644
index 000000000..48ed23ad9
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/copy.py
@@ -0,0 +1,16 @@
+from molsysmt._private.exceptions import NotImplementedMethodError
+from molsysmt._private.digestion import digest
+from molsysmt._private.variables import is_all
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def copy(item, output_filename=None):
+
+    if output_filename is None:
+        output_filename = item
+
+    from shutil import copy as copy_file
+    copy_file(item, output_filename)
+    tmp_item = output_filename
+
+    return tmp_item
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py b/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py
new file mode 100644
index 000000000..bf83b41e1
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/extract.py
@@ -0,0 +1,27 @@
+from molsysmt._private.exceptions import NotImplementedMethodError
+from molsysmt._private.digestion import digest
+from molsysmt._private.variables import is_all
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def extract(item, atom_indices='all', structure_indices='all', output_filename=None, copy_if_all=True):
+
+    if output_filename is None:
+        output_filename = item
+
+    if is_all(atom_indices) and is_all(structure_indices):
+
+        if copy_if_all or (output_filename!=item):
+
+            from shutil import copy as copy_file
+            copy_file(item, output_filename)
+            tmp_item = output_filename
+
+        else:
+
+            tmp_item = item
+    else:
+
+        raise NotImplementedMethodError()
+
+    return tmp_item
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/get.py b/molsysmt/form/molsysmt_MSMH5FileHandler/get.py
new file mode 100644
index 000000000..73921f4a5
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/get.py
@@ -0,0 +1,242 @@
+#######################################################################################
+########### THE FOLLOWING LINES NEED TO BE CUSTOMIZED FOR EVERY CLASS  ################
+#######################################################################################
+
+from molsysmt._private.execfile import execfile
+from molsysmt._private.exceptions import NotImplementedMethodError, NotWithThisFormError
+from molsysmt._private.digestion import digest
+
+form='molsysmt.MSMH5FileHandler'
+
+
+## From atom
+
+@digest(form=form)
+def get_atom_id_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_atom_name_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_atom_type_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_group_index_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_component_index_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_chain_index_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_molecule_index_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_entity_index_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_inner_bonded_atoms_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_inner_bonds_from_atom(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_coordinates_from_atom(item, indices='all', structure_indices='all'):
+
+    raise NotImplementedError
+
+## From group
+
+@digest(form=form)
+def get_group_id_from_group(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_group_name_from_group(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_group_type_from_group(item, indices='all'):
+
+    raise NotImplementedError
+
+## From component
+
+@digest(form=form)
+def get_component_id_from_component(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_component_name_from_component(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_component_type_from_component(item, indices='all'):
+
+    raise NotImplementedError
+
+## From molecule
+
+@digest(form=form)
+def get_molecule_id_from_molecule(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_molecule_name_from_molecule(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_molecule_type_from_molecule(item, indices='all'):
+
+    raise NotImplementedError
+
+
+## From chain
+
+@digest(form=form)
+def get_chain_id_from_chain(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_chain_name_from_chain(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_chain_type_from_chain(item, indices='all'):
+
+    raise NotImplementedError
+
+
+## From entity
+
+@digest(form=form)
+def get_entity_id_from_entity(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_entity_name_from_entity(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_entity_type_from_entity(item, indices='all'):
+
+    raise NotImplementedError
+
+
+## From system
+
+@digest(form=form)
+def get_n_atoms_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_groups_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_components_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_chains_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_molecules_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_entities_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_bonds_from_system(item):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_n_structures_from_system(item, structure_indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_box_from_system(item, structure_indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_time_from_system(item, structure_indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_structure_id_from_system(item, structure_indices='all'):
+
+    raise NotImplementedError
+
+
+## From bond
+
+@digest(form=form)
+def get_bond_order_from_bond(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_bond_type_from_bond(item, indices='all'):
+
+    raise NotImplementedError
+
+@digest(form=form)
+def get_bonded_atoms_from_bond(item, indices='all'):
+
+    raise NotImplementedError
+
+
+#######################################################################################
+######### DO NOT TOUCH THE FOLLOWING LINES, JUST INCLUDE THEM AS THEY ARE #############
+#######################################################################################
+
+from os import path
+this_folder = path.dirname(path.abspath(__file__))
+common_get = path.join(this_folder, '../../_private/common_get.py')
+execfile(common_get, globals(), locals())
+del(path, this_folder, common_get)
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py b/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py
new file mode 100644
index 000000000..64675b499
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/has_attribute.py
@@ -0,0 +1,11 @@
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def has_attribute(molecular_system, attribute):
+
+    from . import attributes
+
+    output = attributes[attribute]
+
+    return output
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py b/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py
new file mode 100644
index 000000000..b6ce7de28
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/is_form.py
@@ -0,0 +1,8 @@
+
+def is_form(item):
+
+    item_fullname = item.__class__.__module__+'.'+item.__class__.__name__
+    output = (item_fullname == 'molsysmt.native.msmh5_file_handler.MSMH5FileHandler')
+
+    return output
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py b/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py
new file mode 100644
index 000000000..2c24555c2
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/iterators.py
@@ -0,0 +1,25 @@
+from molsysmt._private.exceptions import NotImplementedIteratorError
+
+class StructuresIterator():
+
+    def __init__(self, molecular_system, atom_indices='all', start=0, interval=1, stop=None, chunk=1, structure_indices=None):
+        pass
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise NotImplementedIteratorError
+
+class TopologyIterator():
+
+    def __init__(self, molecular_system):
+        pass
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise NotImplementedIteratorError
+
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py b/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py
new file mode 100644
index 000000000..ee1f45d4b
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/merge.py
@@ -0,0 +1,8 @@
+from molsysmt._private.exceptions import NotImplementedMethodError
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def merge(items, atom_indices='all', structure_indices='all'):
+
+    raise NotImplementedMethodError()
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/set.py b/molsysmt/form/molsysmt_MSMH5FileHandler/set.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/set.py
@@ -0,0 +1 @@
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py
new file mode 100644
index 000000000..522aa372a
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_MolSys.py
@@ -0,0 +1,7 @@
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def to_molsysmt_MolSys(item, atom_indices='all', structure_indices='all'):
+
+    raise NotImplementedError
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py
new file mode 100644
index 000000000..4685544d3
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py
@@ -0,0 +1,6 @@
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def to_molsysmt_Structures(item, atom_indices='all', structure_indices='all'):
+
+    raise NotImplementedError
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py
new file mode 100644
index 000000000..35701cb08
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Topology.py
@@ -0,0 +1,107 @@
+from molsysmt._private.digestion import digest
+import pandas as pd
+import numpy as np
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def to_molsysmt_Topology(item, atom_indices='all'):
+
+    from molsysmt.native import Topology
+
+    topology_ds = item.file['topology']
+
+    n_atoms = topology_ds['atoms'].attrs['n_atoms']
+    n_bonds = topology_ds['bonds'].attrs['n_bonds']
+
+    tmp_item = Topology()
+
+    # Atoms
+
+    tmp_item.atoms_dataframe['atom_index']=np.arange(n_atoms)
+    tmp_item.atoms_dataframe['atom_id']=topology_ds['atoms']['id'][:]
+    tmp_item.atoms_dataframe['atom_type']=topology_ds['atoms']['type'].asstr()[:]
+    tmp_item.atoms_dataframe['atom_name']=topology_ds['atoms']['name'].asstr()[:]
+    tmp_item.atoms_dataframe['group_index']=topology_ds['atoms']['group_index'][:]
+
+    # Groups
+
+    groups_df = pd.DataFrame({
+        'group_id':topology_ds['groups']['id'][:],
+        'group_name':topology_ds['groups']['name'].asstr()[:],
+        'group_type':topology_ds['groups']['type'].asstr()[:],
+        'component_index':topology_ds['groups']['component_index'][:],
+        })
+
+    aux_df = groups_df.iloc[tmp_item.atoms_dataframe['group_index'].to_numpy()]
+    aux_df.reset_index(inplace=True, drop=True)
+
+    tmp_item.atoms_dataframe['group_id']=aux_df['group_id']
+    tmp_item.atoms_dataframe['group_name']=aux_df['group_name']
+    tmp_item.atoms_dataframe['group_type']=aux_df['group_type']
+    tmp_item.atoms_dataframe['component_index']=aux_df['component_index']
+
+    del(groups_df, aux_df)
+
+    # Components
+
+    components_df = pd.DataFrame({
+        'component_id':topology_ds['components']['id'][:],
+        'component_name':topology_ds['components']['name'].asstr()[:],
+        'component_type':topology_ds['components']['type'].asstr()[:],
+        'molecule_index':topology_ds['components']['molecule_index'][:],
+        })
+
+    aux_df = components_df.iloc[tmp_item.atoms_dataframe['component_index'].to_numpy()]
+    aux_df.reset_index(inplace=True, drop=True)
+
+    tmp_item.atoms_dataframe['component_id']=aux_df['component_id']
+    tmp_item.atoms_dataframe['component_name']=aux_df['component_name']
+    tmp_item.atoms_dataframe['component_type']=aux_df['component_type']
+    tmp_item.atoms_dataframe['molecule_index']=aux_df['molecule_index']
+
+    del(components_df, aux_df)
+
+    # Molecules
+
+    molecules_df = pd.DataFrame({
+        'molecule_id':topology_ds['molecules']['id'][:],
+        'molecule_name':topology_ds['molecules']['name'].asstr()[:],
+        'molecule_type':topology_ds['molecules']['type'].asstr()[:],
+        'entity_index':topology_ds['molecules']['entity_index'][:],
+        })
+
+    aux_df = molecules_df.iloc[tmp_item.atoms_dataframe['molecule_index'].to_numpy()]
+    aux_df.reset_index(inplace=True, drop=True)
+
+    tmp_item.atoms_dataframe['molecule_id']=aux_df['molecule_id']
+    tmp_item.atoms_dataframe['molecule_name']=aux_df['molecule_name']
+    tmp_item.atoms_dataframe['molecule_type']=aux_df['molecule_type']
+    tmp_item.atoms_dataframe['entity_index']=aux_df['entity_index']
+
+    del(molecules_df, aux_df)
+
+    # Entities
+
+    entities_df = pd.DataFrame({
+        'entity_id':topology_ds['entities']['id'][:],
+        'entity_name':topology_ds['entities']['name'].asstr()[:],
+        'entity_type':topology_ds['entities']['type'].asstr()[:],
+        })
+
+    aux_df = entities_df.iloc[tmp_item.atoms_dataframe['entity_index'].to_numpy()]
+    aux_df.reset_index(inplace=True, drop=True)
+
+    tmp_item.atoms_dataframe['entity_id']=aux_df['entity_id']
+    tmp_item.atoms_dataframe['entity_name']=aux_df['entity_name']
+    tmp_item.atoms_dataframe['entity_type']=aux_df['entity_type']
+
+    del(entities_df, aux_df)
+
+    # Bonds
+
+    tmp_item.bonds_dataframe['atom1_index']=topology_ds['bonds']['atom1_index'][:]
+    tmp_item.bonds_dataframe['atom2_index']=topology_ds['bonds']['atom2_index'][:]
+    tmp_item.bonds_dataframe['type']=topology_ds['bonds']['type'].asstr()[:]
+    tmp_item.bonds_dataframe['order']=topology_ds['bonds']['order'].asstr()[:]
+
+    return tmp_item
+
diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py
new file mode 100644
index 000000000..8a877cf0e
--- /dev/null
+++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_nglview_NGLWidget.py
@@ -0,0 +1,7 @@
+from molsysmt._private.digestion import digest
+
+@digest(form='molsysmt.MSMH5FileHandler')
+def to_nglview_NGLWidget(item, atom_indices='all', structure_indices='all'):
+
+    raise NotImplementedError
+
diff --git a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py
index 4670a4051..fbea93aa3 100644
--- a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py
+++ b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py
@@ -47,116 +47,226 @@ def _add_topology_to_msmh5(item, file, atom_indices='all'):
     if not file_is_msmh5:
         raise ValueError
 
-    n_atoms = item.atoms_dataframe.shape[0]
+    # Atoms
 
-    n_atoms = item.atoms_dataframe.shape[0]
+    atoms_df = item.atoms_dataframe
 
-    atom_index_array = item.atoms_dataframe["atom_index"].to_numpy()
-    atom_name_array = item.atoms_dataframe["atom_name"].to_numpy()
-    atom_id_array = item.atoms_dataframe["atom_id"].to_numpy()
-    atom_type_array = item.atoms_dataframe["atom_type"].to_numpy()
+    n_atoms = atoms_df.shape[0]
 
-    group_index_array = item.atoms_dataframe["group_index"].to_numpy()
-    group_name_array = item.atoms_dataframe["group_name"].to_numpy()
-    group_id_array = item.atoms_dataframe["group_id"].to_numpy()
-    group_type_array = item.atoms_dataframe["group_type"].to_numpy()
+    atoms = file['topology']['atoms']
 
-    component_index_array = item.atoms_dataframe["component_index"].to_numpy()
-    component_name_array = item.atoms_dataframe["component_name"].to_numpy()
-    component_id_array = item.atoms_dataframe["component_id"].to_numpy()
-    component_type_array = item.atoms_dataframe["component_type"].to_numpy()
+    atoms.attrs['n_atoms'] = n_atoms
 
-    group_index_array = item.atoms_dataframe["group_index"].to_numpy()
-    group_name_array = item.atoms_dataframe["group_name"].to_numpy()
-    group_id_array = item.atoms_dataframe["group_id"].to_numpy()
-    group_type_array = item.atoms_dataframe["group_type"].to_numpy()
+    atoms['id'].resize((n_atoms,))
+    atoms['name'].resize((n_atoms,))
+    atoms['type'].resize((n_atoms,))
 
+    atoms['id'][:] = atoms_df['atom_id'].to_numpy(dtype=int)
+    atoms['name'][:] = atoms_df['atom_name'].to_numpy(dtype=str)
+    atoms['type'][:] = atoms_df['atom_type'].to_numpy(dtype=str)
 
+    # Groups
 
-    chain_index_array = item.atoms_dataframe["chain_index"].to_numpy()
-    chain_name_array = item.atoms_dataframe["chain_name"].to_numpy()
-    chain_id_array = item.atoms_dataframe["chain_id"].to_numpy()
-    chain_type_array = item.atoms_dataframe["chain_type"].to_numpy()
+    groups_df = atoms_df[['group_index', 'group_id', 'group_name', 'group_type', 'component_index']].drop_duplicates()
 
-    bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy()
-    bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy()
+    n_groups = groups_df.shape[0]
 
+    if n_groups==1:
+        if groups_df['group_index'].iloc[0] == None:
+            n_groups = 0
 
-    for ii in range(n_atoms):
+    groups = file['topology']['groups']
+    groups.attrs['n_groups'] = n_groups
 
+    if n_groups > 0:
 
+        if all(groups_df['group_id'].unique()==[None]):
+            groups_df['group_id']=groups_df['group_index']
 
+        if all(groups_df['group_name'].unique()==[None]):
+            groups_df['group_name']='UNK'
 
-    aux_indices = item.atoms_dataframe['group_index'].unique()
-    where_not_None = np.where(aux_indices!=None)
-    aux_indices = aux_indices[where_not_None]
-    n_groups = aux_indices.shape[0]
+        if all(groups_df['group_type'].unique()==[None]):
+            groups_df['group_type']='UNK'
 
-    aux_indices = item.atoms_dataframe['component_index'].unique()
-    where_not_None = np.where(aux_indices!=None)
-    aux_indices = aux_indices[where_not_None]
-    n_components = aux_indices.shape[0]
+        atoms['group_index'].resize((n_atoms,))
+        groups['id'].resize((n_groups,))
+        groups['name'].resize((n_groups,))
+        groups['type'].resize((n_groups,))
 
-    aux_indices = item.atoms_dataframe['molecule_index'].unique()
-    where_not_None = np.where(aux_indices!=None)
-    aux_indices = aux_indices[where_not_None]
-    n_molecules = aux_indices.shape[0]
+        atoms['group_index'][:] = atoms_df['group_index'].to_numpy(dtype=int)
+        groups['id'][:] = groups_df['group_id'].to_numpy(dtype=int)
+        groups['name'][:] = groups_df['group_name'].to_numpy(dtype=str)
+        groups['type'][:] = groups_df['group_type'].to_numpy(dtype=str)
 
-    aux_indices = item.atoms_dataframe['entity_index'].unique()
-    where_not_None = np.where(aux_indices!=None)
-    aux_indices = aux_indices[where_not_None]
-    n_entities = aux_indices.shape[0]
+    # Components
 
-    aux_indices = item.atoms_dataframe['chain_index'].unique()
-    where_not_None = np.where(aux_indices!=None)
-    aux_indices = aux_indices[where_not_None]
-    n_chains = aux_indices.shape[0]
+    components_df = atoms_df[['component_index', 'component_id', 'component_name', 'component_type', 'molecule_index']].drop_duplicates()
 
-    # Atoms
+    n_components = components_df.shape[0]
 
-    atoms = file['topology']['atoms']
+    if n_components==1:
+        if components_df['component_index'].iloc[0] == None:
+            n_components = 0
 
-    atoms.attrs['n_atoms'] = n_atoms
+    components = file['topology']['components']
+    components.attrs['n_components'] = n_components
 
-    atoms['id'].resize((n_atoms,))
-    atoms['name'].resize((n_atoms,))
-    atoms['type'].resize((n_atoms,))
+    if n_components > 0:
 
-    atoms['id'][:] = item.atoms_dataframe['atom_id'].to_numpy()
-    atoms['name'][:] = item.atoms_dataframe['atom_name'].to_numpy()
-    atoms['type'][:] = item.atoms_dataframe['atom_type'].to_numpy()
+        if all(components_df['component_id'].unique()==[None]):
+            components_df['component_id']=components_df['component_index']
 
-    # Groups
+        if all(components_df['component_name'].unique()==[None]):
+            components_df['component_name']='UNK'
 
-    if n_groups > 0:
+        if all(components_df['component_type'].unique()==[None]):
+            components_df['component_type']='UNK'
 
-        groups = file['topology']['groups']
+        groups['component_index'].resize((n_groups,))
+        components['id'].resize((n_components,))
+        components['name'].resize((n_components,))
+        components['type'].resize((n_components,))
 
-        groups.attrs['n_groups'] = n_groups
+        groups['component_index'][:] = groups_df['component_index'].to_numpy(dtype=int)
+        components['id'][:] = components_df['component_id'].to_numpy(dtype=int)
+        components['name'][:] = components_df['component_name'].to_numpy(dtype=str)
+        components['type'][:] = components_df['component_type'].to_numpy(dtype=str)
 
-        atoms['group_index'].resize((n_atoms,))
-        groups['id'].resize((n_groups,))
-        groups['name'].resize((n_groups,))
-        groups['type'].resize((n_groups,))
+    # Molecules
 
-        atoms['group_index'][:] = item.atoms_dataframe['group_index'].to_numpy()
-        groups['id'][:] = item.atoms_dataframe['group_id'].to_numpy()
-        groups['name'][:] = item.atoms_dataframe['group_name'].to_numpy()
-        groups['type'][:] = item.atoms_dataframe['group_type'].to_numpy()
+    molecules_df = atoms_df[['molecule_index', 'molecule_id', 'molecule_name', 'molecule_type', 'entity_index']].drop_duplicates()
 
-    # Components
+    n_molecules = molecules_df.shape[0]
 
-    if n_components > 0:
+    if n_molecules==1:
+        if molecules_df['molecule_index'].iloc[0] == None:
+            n_molecules = 0
+
+    molecules = file['topology']['molecules']
+    molecules.attrs['n_molecules'] = n_molecules
+
+    if n_molecules > 0:
+
+        if all(molecules_df['molecule_id'].unique()==[None]):
+            molecules_df['molecule_id']=molecules_df['molecule_index']
+
+        if all(molecules_df['molecule_name'].unique()==[None]):
+            molecules_df['molecule_name']='UNK'
+
+        if all(molecules_df['molecule_type'].unique()==[None]):
+            molecules_df['molecule_type']='UNK'
+
+        components['molecule_index'].resize((n_components,))
+        molecules['id'].resize((n_molecules,))
+        molecules['name'].resize((n_molecules,))
+        molecules['type'].resize((n_molecules,))
+
+        components['molecule_index'][:] = components_df['molecule_index'].to_numpy(dtype=int)
+        molecules['id'][:] = molecules_df['molecule_id'].to_numpy(dtype=int)
+        molecules['name'][:] = molecules_df['molecule_name'].to_numpy(dtype=str)
+        molecules['type'][:] = molecules_df['molecule_type'].to_numpy(dtype=str)
+
+    # Entities
+
+    entities_df = atoms_df[['entity_index', 'entity_id', 'entity_name', 'entity_type']].drop_duplicates()
+
+    n_entities = entities_df.shape[0]
+
+    if n_entities==1:
+        if entities_df['entity_index'].iloc[0] == None:
+            n_entities = 0
+
+    entities = file['topology']['entities']
+    entities.attrs['n_entities'] = n_entities
+
+    if n_entities > 0:
+
+        if all(entities_df['entity_id'].unique()==[None]):
+            entities_df['entity_id']=entities_df['entity_index']
+
+        if all(entities_df['entity_name'].unique()==[None]):
+            entities_df['entity_name']='UNK'
+
+        if all(entities_df['entity_type'].unique()==[None]):
+            entities_df['entity_type']='UNK'
+
+        molecules['entity_index'].resize((n_molecules,))
+        entities['id'].resize((n_entities,))
+        entities['name'].resize((n_entities,))
+        entities['type'].resize((n_entities,))
+
+        molecules['entity_index'][:] = molecules_df['entity_index'].to_numpy(dtype=int)
+        entities['id'][:] = entities_df['entity_id'].to_numpy(dtype=int)
+        entities['name'][:] = entities_df['entity_name'].to_numpy(dtype=str)
+        entities['type'][:] = entities_df['entity_type'].to_numpy(dtype=str)
 
-        groups['component_index'].resize((n_groups,))
-        groups['component_index'][:] = item.atoms_dataframe['component_index'].to_numpy()
 
     # Chains
 
+    chains_df = atoms_df[['chain_index', 'chain_id', 'chain_name', 'chain_type']].drop_duplicates()
+
+    n_chains = chains_df.shape[0]
+
+    if n_chains==1:
+        if chains_df['chain_index'].iloc[0] == None:
+            n_chains = 0
+
+    chains = file['topology']['chains']
+    chains.attrs['n_chains'] = n_chains
+
     if n_chains > 0:
 
+        if all(chains_df['chain_id'].unique()==[None]):
+            chains_df['chain_id']=chains_df['chain_index']
+
+        if chains_df['chain_id'].dtype == 'O':
+            chains_df['chain_id']=chains_df['chain_index']
+
+        if all(chains_df['chain_name'].unique()==[None]):
+            chains_df['chain_name']='UNK'
+
+        if all(chains_df['chain_type'].unique()==[None]):
+            chains_df['chain_type']='UNK'
+
         atoms['chain_index'].resize((n_atoms,))
-        atoms['chain_index'][:] = item.atoms_dataframe['chain_index'].to_numpy()
+        chains['id'].resize((n_chains,))
+        chains['name'].resize((n_chains,))
+        chains['type'].resize((n_chains,))
+
+        atoms['chain_index'][:] = atoms_df['chain_index'].to_numpy(dtype=int)
+        chains['id'][:] = chains_df['chain_id'].to_numpy(dtype=int)
+        chains['name'][:] = chains_df['chain_name'].to_numpy(dtype=str)
+        chains['type'][:] = chains_df['chain_type'].to_numpy(dtype=str)
+
+    del(groups_df, components_df, molecules_df, entities_df, chains_df)
+
+    # Bonds
+
+    bonds_df = item.bonds_dataframe
+
+    n_bonds = bonds_df.shape[0]
+
+    bonds = file['topology']['bonds']
+    bonds.attrs['n_bonds'] = n_bonds
+
+    if n_bonds>0:
+
+        if all(bonds_df['order'].unique()==[None]):
+            bonds_df['order']='UNK'
+
+        if all(bonds_df['type'].unique()==[None]):
+            bonds_df['type']='UNK'
+
+        bonds['atom1_index'].resize((n_bonds,))
+        bonds['atom2_index'].resize((n_bonds,))
+        bonds['order'].resize((n_bonds,))
+        bonds['type'].resize((n_bonds,))
+
+        bonds['atom1_index'][:] = bonds_df['atom1_index'].to_numpy(dtype=int)
+        bonds['atom2_index'][:] = bonds_df['atom2_index'].to_numpy(dtype=int)
+        bonds['order'][:] = bonds_df['order'].to_numpy(dtype=str)
+        bonds['type'][:] = bonds_df['type'].to_numpy(dtype=str)
 
 
     if needs_to_be_closed:
diff --git a/molsysmt/native/msmh5_file_handler.py b/molsysmt/native/msmh5_file_handler.py
index 24cfeee39..a2b12d266 100644
--- a/molsysmt/native/msmh5_file_handler.py
+++ b/molsysmt/native/msmh5_file_handler.py
@@ -145,13 +145,13 @@ def _new_msmfile(filename, creator='MolSysMT', compression="gzip", compression_o
     structures.attrs['time_unit']='ps'
     structures.attrs['energy_unit']='kJ/mol'
 
-    bonds.create_dataset('id', (0,), dtype=int_type, maxshape=(None,), **global_dataset_options)
-    bonds.create_dataset('box', (0,6), dtype=float_type, maxshape=(None,6), **global_dataset_options)
-    bonds.create_dataset('coordinates', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options)
-    bonds.create_dataset('velocities', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options)
-    bonds.create_dataset('kinetic_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
-    bonds.create_dataset('potential_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
-    bonds.create_dataset('temperature', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
+    structures.create_dataset('id', (0,), dtype=int_type, maxshape=(None,), **global_dataset_options)
+    structures.create_dataset('box', (0,6), dtype=float_type, maxshape=(None,6), **global_dataset_options)
+    structures.create_dataset('coordinates', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options)
+    structures.create_dataset('velocities', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options)
+    structures.create_dataset('kinetic_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
+    structures.create_dataset('potential_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
+    structures.create_dataset('temperature', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options)
 
     return file
 
diff --git a/molsysmt/native/topology.py b/molsysmt/native/topology.py
index c8e45511f..f57fcd74e 100644
--- a/molsysmt/native/topology.py
+++ b/molsysmt/native/topology.py
@@ -33,13 +33,23 @@ def _nan_to_None(self):
 
 class Bonds_DataFrame(pd.DataFrame):
 
-    def __init__(self):
+    def __init__(self, n_bonds=0):
 
         columns = ['atom1_index', 'atom2_index', 'order', 'type']
 
-
         super().__init__(columns=columns)
 
+        if n_bonds:
+            self["atom_index"] = np.arange(0, n_atoms, dtype=int)
+            self._nan_to_None()
+
+    def all_nan_to_None(self):
+
+        list_columns_where_nan = ['atom1_index','atom2_index','order','type']
+
+        for column in self:
+            self[column].where(self[column].notnull(), None, inplace=True)
+
     def _nan_to_None(self):
 
         list_columns_where_nan = ['order','type']
@@ -57,10 +67,10 @@ def _sort_bonds(self):
 
 class Topology():
 
-    def __init__(self, n_atoms=0):
+    def __init__(self, n_atoms=0, n_bonds=0):
 
         self.atoms_dataframe=Atoms_DataFrame(n_atoms=n_atoms)
-        self.bonds_dataframe=Bonds_DataFrame()
+        self.bonds_dataframe=Bonds_DataFrame(n_bonds=n_bonds)
 
     def extract(self, atom_indices='all', structure_indices='all'):
 
diff --git a/molsysmt/native/topology2.py b/molsysmt/native/topology2.py
new file mode 100644
index 000000000..afa634349
--- /dev/null
+++ b/molsysmt/native/topology2.py
@@ -0,0 +1,309 @@
+import pandas as pd
+import numpy as np
+
+
+class Atoms_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['atom_index', 'atom_name', 'atom_id', 'atom_type',
+                   'group_index', 'group_name', 'group_id', 'group_type',
+                   'component_index', 'component_name', 'component_id', 'component_type',
+                   'chain_index', 'chain_name', 'chain_id', 'chain_type',
+                   'molecule_index', 'molecule_name', 'molecule_id', 'molecule_type',
+                   'entity_index', 'entity_name', 'entity_id', 'entity_type',
+                   'occupancy', 'alternate_location', 'b_factor', 'formal_charge', 'partial_charge']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Groups_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['group_name', 'group_id', 'group_type', 'component_index']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Components_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['component_name', 'component_id', 'component_type', 'molecule_index']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Molecules_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['molecule_name', 'molecule_id', 'molecule_type', 'entity_index']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Entities_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['entity_name', 'entity_id', 'entity_type']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Chains_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['chain_name', 'chain_id', 'chain_type']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+
+class Bonds_DataFrame(pd.DataFrame):
+
+    def __init__(self):
+
+        columns = ['atom1_index', 'atom2_index', 'order', 'type']
+
+        super().__init__(columns=columns)
+
+    def _nan_to_UNK(self):
+
+        for column in self:
+            self[column].where(self[column].notnull(), 'UNK', inplace=True)
+
+    def _sort_bonds(self):
+
+        self_mask = self['atom1_index'] > self['atom2_index']
+        self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index',
+                                      'atom2_index': 'atom1_index'}, axis=1))
+        self.sort_values(by=['atom1_index', 'atom2_index'], inplace=True)
+        self.reset_index(drop=True, inplace=True)
+
+
+class Topology2():
+
+    def __init__(self, n_atoms=0, n_bonds=0):
+
+        self.atoms = Atoms_DataFrame()
+        self.groups = Groups_DataFrame()
+        self.components = Components_DataFrame()
+        self.molecules = Molecules_DataFrame()
+        self.entities = Entities_DataFrame()
+        self.chains = Chains_DataFrame()
+        self.bonds = Bonds_DataFrame()
+
+    def extract(self, atom_indices='all'):
+
+        if type(atom_indices)==str:
+
+            if atom_indices in ['all', 'All', 'ALL']:
+                return self.copy()
+
+        else:
+
+            raise NotImplementedError
+
+        return tmp_item
+
+
+    def add(self, item, selection='all'):
+
+        raise NotImplementedError
+
+
+    def copy(self):
+
+        tmp_item = Topology2()
+
+        tmp_item.atoms = Atoms_DataFrame()
+        tmp_item.groups = Groups_DataFrame()
+        tmp_item.components = Components_DataFrame()
+        tmp_item.molecules = Molecules_DataFrame()
+        tmp_item.entities = Entities_DataFrame()
+        tmp_item.chains = Chains_DataFrame()
+        tmp_item.bonds = Bonds_DataFrame()
+
+        for column in self.atoms.columns:
+            tmp_item.atoms[column]=self.atoms[column].to_numpy()
+
+        for column in self.groups.columns:
+            tmp_item.groups[column]=self.groups[column].to_numpy()
+
+        for column in self.groups.columns:
+            tmp_item.groups[column]=self.groups[column].to_numpy()
+
+
+
+        for column in self.bonds_dataframe.columns:
+            tmp_item.bonds_dataframe[column]=self.bonds_dataframe[column].to_numpy()
+
+        return tmp_item
+
+
+    def _build_components(self):
+
+        from molsysmt.element.component import get_component_type_from_group_names
+        from molsysmt.element.component import get_component_index_from_bonded_atoms
+
+        n_atoms = self.atoms_dataframe.shape[0]
+        n_bonds = self.bonds_dataframe.shape[0]
+
+        group_index_from_atom = self.atoms_dataframe['group_index'].to_numpy()
+        group_name_from_atom = self.atoms_dataframe['group_name'].to_numpy()
+        atom_index_from_bond = self.bonds_dataframe[['atom1_index','atom2_index']].to_numpy(dtype=int, copy=True)
+
+        if n_bonds==0:
+
+            index_array = np.full(n_atoms, None, dtype=object)
+            id_array = np.full(n_atoms, None, dtype=object)
+            name_array = np.full(n_atoms, None, dtype=object)
+            type_array = np.full(n_atoms, None, dtype=object)
+
+        else:
+
+            index_array = get_component_index_from_bonded_atoms(atom_index_from_bond, n_atoms)
+            component_indices = np.unique(index_array)
+            n_components = component_indices.shape[0]
+
+            type_array = np.full(n_atoms, None, dtype=object)
+
+            for ii in component_indices:
+
+                mask = (index_array==ii)
+                group_indices=np.unique(group_index_from_atom[mask])
+                group_names=[]
+                for group_index in group_indices:
+                    first_occurrence = np.where(group_index_from_atom==group_index)[0][0]
+                    group_names.append(group_name_from_atom[first_occurrence])
+
+                type_array[mask]=get_component_type_from_group_names(group_names)
+
+        self.atoms_dataframe["component_index"] = index_array
+        self.atoms_dataframe["component_id"] = index_array
+        self.atoms_dataframe["component_name"] = index_array
+        self.atoms_dataframe["component_type"] = type_array
+
+        del(group_index_from_atom, group_name_from_atom, atom_index_from_bond, index_array, type_array)
+
+    def _build_molecules(self):
+
+        component_index_from_atom = self.atoms_dataframe['component_index'].to_numpy()
+        component_type_from_atom = self.atoms_dataframe['component_type'].to_numpy()
+
+        n_atoms=component_index_from_atom.shape[0]
+        index_array = component_index_from_atom.copy()
+        id_array = np.full(n_atoms, None, dtype=object)
+        name_array = np.full(n_atoms, None, dtype=object)
+        type_array = component_type_from_atom.copy()
+
+        self.atoms_dataframe["molecule_index"] = index_array
+        self.atoms_dataframe["molecule_id"] = id_array
+        self.atoms_dataframe["molecule_name"] = name_array
+        self.atoms_dataframe["molecule_type"] = type_array
+
+        del(component_index_from_atom, component_type_from_atom, index_array, id_array, name_array,
+                type_array)
+
+    def _build_entities(self):
+
+        n_atoms=self.atoms_dataframe.shape[0]
+
+        entity_index = np.empty(n_atoms, dtype=int)
+        entity_name = np.empty(n_atoms, dtype=object)
+        entity_id = np.empty(n_atoms, dtype=object)
+        entity_type = np.empty(n_atoms, dtype=object)
+
+        entities= {}
+        n_entities = 0
+
+        current_molecule_name = '@'
+
+        ii=0
+        for molecule_name, molecule_type in zip(self.atoms_dataframe['molecule_name'],
+                self.atoms_dataframe['molecule_type']):
+
+            if molecule_name!=current_molecule_name:
+
+                current_molecule_name=molecule_name
+                current_molecule_type=molecule_type
+
+                if current_molecule_name in entities:
+
+                    current_entity_index = entities[current_molecule_name]['entity_index']
+                    current_entity_id = entities[current_molecule_name]['entity_id']
+                    current_entity_name = entities[current_molecule_name]['entity_name']
+                    current_entity_type = entities[current_molecule_name]['entity_type']
+
+                else:
+
+                    current_entity_index = n_entities
+                    current_entity_id = n_entities
+                    current_entity_name = current_molecule_name
+                    current_entity_type = current_molecule_type
+
+                    entities[current_entity_name]={}
+                    entities[current_entity_name]['entity_index'] = current_entity_index
+                    entities[current_entity_name]['entity_id'] = current_entity_id
+                    entities[current_entity_name]['entity_name'] = current_entity_name
+                    entities[current_entity_name]['entity_type'] = current_entity_type
+
+                    n_entities +=1
+
+            entity_index[ii]=current_entity_index
+            entity_id[ii]=current_entity_id
+            entity_name[ii]=current_entity_name
+            entity_type[ii]=current_entity_type
+
+            ii+=1
+
+        self.atoms_dataframe["entity_index"] = entity_index
+        self.atoms_dataframe["entity_name"] = entity_name
+        self.atoms_dataframe["entity_type"] = entity_type
+        self.atoms_dataframe["entity_id"] = entity_id
+
+        del(entity_name, entity_type, entity_id)
+        del(entities)
+
+    def _join_molecules(self, indices=None):
+
+        pass
+
+    def _nan_to_None(self):
+
+        self.atoms_dataframe._nan_to_None()
+        self.bonds_dataframe._nan_to_None()
+