diff --git a/molsysmt/form/file_msmh5/is_form.py b/molsysmt/form/file_msmh5/is_form.py index 981fd67b9..b5d3b19ae 100644 --- a/molsysmt/form/file_msmh5/is_form.py +++ b/molsysmt/form/file_msmh5/is_form.py @@ -1,5 +1,6 @@ from pathlib import PosixPath import h5py +import os def is_form(item): @@ -11,9 +12,16 @@ def is_form(item): if isinstance(item, str): if item.endswith('.msmh5'): - with h5py.File(item, "r") as file: - if 'type' in file.attrs: - output = (file.attrs['type']=='msmh5') + if os.path.isfile(item): + + with h5py.File(item, "r") as file: + if 'type' in file.attrs: + output = (file.attrs['type']=='msmh5') + + else: + + output = True return output + diff --git a/molsysmt/form/file_msmh5/to_molsysmt_Structures.py b/molsysmt/form/file_msmh5/to_molsysmt_Structures.py index 9acff18bf..96b9452ea 100644 --- a/molsysmt/form/file_msmh5/to_molsysmt_Structures.py +++ b/molsysmt/form/file_msmh5/to_molsysmt_Structures.py @@ -3,4 +3,12 @@ @digest(form='file:msmh5') def to_molsysmt_Structures(item, atom_indices='all', structure_indices='all'): - raise NotImplementedError + from . import to_molsysmt_MSMH5FileHandler + from ..molsysmt_MSMH5FileHandler import to_molsysmt_Structures as molsysmt_MSMH5FileHandler_to_molsysmt_Structures + + handler = to_molsysmt_MSMH5FileHandler(item) + tmp_item = molsysmt_MSMH5FileHandler_to_molsysmt_Structures(handler, atom_indices=atom_indices, + structure_indices=structure_indices) + handler.close() + + return tmp_item diff --git a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py index 4685544d3..b039cc4e4 100644 --- a/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py +++ b/molsysmt/form/molsysmt_MSMH5FileHandler/to_molsysmt_Structures.py @@ -1,6 +1,110 @@ from molsysmt._private.digestion import digest +from molsysmt import pyunitwizard as puw +import numpy as np @digest(form='molsysmt.MSMH5FileHandler') def to_molsysmt_Structures(item, atom_indices='all', structure_indices='all'): - raise NotImplementedError + from molsysmt.native import Structures + + structures_ds = item.file['structures'] + + n_atoms = structures_ds.attrs['n_atoms'] + n_structures = structures_ds.attrs['n_structures'] + + length_unit = puw.unit(structures_ds.attrs['length_unit']) + time_unit = puw.unit(structures_ds.attrs['time_unit']) + energy_unit = puw.unit(structures_ds.attrs['energy_unit']) + temperature_unit = puw.unit(structures_ds.attrs['temperature_unit']) + + standard_length_unit = puw.get_standard_units(length_unit) + standard_time_unit = puw.get_standard_units(time_unit) + standard_energy_unit = puw.get_standard_units(energy_unit) + standard_temperature_unit = puw.get_standard_units(temperature_unit) + + tmp_item = Structures() + + tmp_item.n_atoms = n_atoms + tmp_item.n_structures = n_structures + + # Coordinates + + if structures_ds['coordinates'].shape[0]>0: + + tmp_item.coordinates = puw.quantity(np.zeros([n_structures, n_atoms, 3], dtype='float64'), + standard_length_unit) + tmp_item.coordinates[:,:,:] = puw.quantity(structures_ds['coordinates'][:,:,:], + length_unit) + + else: + + tmp_item.coordinates = None + + # Velocities + + if structures_ds['velocities'].shape[0]>0: + + tmp_item.velocities = puw.quantity(np.zeros([n_structures, n_atoms, 3], dtype='float64'), + standard_length_unit/standard_time_unit) + tmp_item.velocities[:,:,:] = puw.quantity(structures_ds['velocities'][:,:,:], + length_unit/time_unit) + + else: + + tmp_item.velocities = None + + # Box + + if structures_ds['box'].shape[0]>0: + + tmp_item.box = puw.quantity(np.zeros([n_structures, 3, 3], dtype='float64'), + standard_length_unit) + tmp_item.box[:,:,:] = puw.quantity(structures_ds['box'][:,:,:], + length_unit) + + else: + + tmp_item.box = None + + # Kinetic Energy + + if structures_ds['kinetic_energy'].shape[0]>0: + + tmp_item.kinetic_energy = puw.quantity(np.zeros([n_structures], dtype='float64'), + standard_energy_unit) + tmp_item.kinetic_energy[:] = puw.quantity(structures_ds['kinetic_energy'][:], + energy_unit) + + else: + + tmp_item.kinetic_energy = None + + # Potential Energy + + if structures_ds['potential_energy'].shape[0]>0: + + tmp_item.potential_energy = puw.quantity(np.zeros([n_structures], dtype='float64'), + standard_energy_unit) + tmp_item.potential_energy[:] = puw.quantity(structures_ds['potential_energy'][:], + energy_unit) + + else: + + tmp_item.potential_energy = None + + # Temperature + + if structures_ds['temperature'].shape[0]>0: + + tmp_item.temperature = puw.quantity(np.zeros([n_structures], dtype='float64'), + standard_temperature_unit) + tmp_item.temperature[:] = puw.quantity(structures_ds['temperature'][:], + temperature_unit) + + else: + + tmp_item.temperature = None + + + return tmp_item + diff --git a/molsysmt/form/molsysmt_Structures/to_file_msmh5.py b/molsysmt/form/molsysmt_Structures/to_file_msmh5.py index 32c75df9b..1a7a026a4 100644 --- a/molsysmt/form/molsysmt_Structures/to_file_msmh5.py +++ b/molsysmt/form/molsysmt_Structures/to_file_msmh5.py @@ -1,4 +1,6 @@ from molsysmt._private.digestion import digest +from molsysmt import pyunitwizard as puw +import numpy as np @digest(form='molsysmt.Structures') def to_file_msmh5(item, atom_indices='all', structure_indices='all', output_filename=None, @@ -48,235 +50,84 @@ def _add_structures_to_msmh5(item, file, atom_indices='all', structure_indices=' if not file_is_msmh5: raise ValueError - item.attrs['n_atoms'] = item.n_atoms - item.attrs['n_structures'] = item.n_structures - - length_unit = - length_unit = - - item.attrs['length_unit']= - - # Atoms - - atoms_df = item.atoms_dataframe - - n_atoms = atoms_df.shape[0] - - atoms = file['topology']['atoms'] - - atoms.attrs['n_atoms'] = n_atoms - - atoms['id'].resize((n_atoms,)) - atoms['name'].resize((n_atoms,)) - atoms['type'].resize((n_atoms,)) - - atoms['id'][:] = atoms_df['atom_id'].to_numpy(dtype=int) - atoms['name'][:] = atoms_df['atom_name'].to_numpy(dtype=str) - atoms['type'][:] = atoms_df['atom_type'].to_numpy(dtype=str) - - # Groups - - groups_df = atoms_df[['group_index', 'group_id', 'group_name', 'group_type', 'component_index']].drop_duplicates() - - n_groups = groups_df.shape[0] - - if n_groups==1: - if groups_df['group_index'].iloc[0] == None: - n_groups = 0 - - groups = file['topology']['groups'] - groups.attrs['n_groups'] = n_groups - - if n_groups > 0: - - if all(groups_df['group_id'].unique()==[None]): - groups_df['group_id']=groups_df['group_index'] - - if all(groups_df['group_name'].unique()==[None]): - groups_df['group_name']='UNK' - - if all(groups_df['group_type'].unique()==[None]): - groups_df['group_type']='UNK' - - atoms['group_index'].resize((n_atoms,)) - groups['id'].resize((n_groups,)) - groups['name'].resize((n_groups,)) - groups['type'].resize((n_groups,)) - - atoms['group_index'][:] = atoms_df['group_index'].to_numpy(dtype=int) - groups['id'][:] = groups_df['group_id'].to_numpy(dtype=int) - groups['name'][:] = groups_df['group_name'].to_numpy(dtype=str) - groups['type'][:] = groups_df['group_type'].to_numpy(dtype=str) - - # Components - - components_df = atoms_df[['component_index', 'component_id', 'component_name', 'component_type', 'molecule_index']].drop_duplicates() - - n_components = components_df.shape[0] - - if n_components==1: - if components_df['component_index'].iloc[0] == None: - n_components = 0 - - components = file['topology']['components'] - components.attrs['n_components'] = n_components - - if n_components > 0: - - if all(components_df['component_id'].unique()==[None]): - components_df['component_id']=components_df['component_index'] - - if all(components_df['component_name'].unique()==[None]): - components_df['component_name']='UNK' - - if all(components_df['component_type'].unique()==[None]): - components_df['component_type']='UNK' - - groups['component_index'].resize((n_groups,)) - components['id'].resize((n_components,)) - components['name'].resize((n_components,)) - components['type'].resize((n_components,)) - - groups['component_index'][:] = groups_df['component_index'].to_numpy(dtype=int) - components['id'][:] = components_df['component_id'].to_numpy(dtype=int) - components['name'][:] = components_df['component_name'].to_numpy(dtype=str) - components['type'][:] = components_df['component_type'].to_numpy(dtype=str) - - # Molecules - - molecules_df = atoms_df[['molecule_index', 'molecule_id', 'molecule_name', 'molecule_type', 'entity_index']].drop_duplicates() - - n_molecules = molecules_df.shape[0] - - if n_molecules==1: - if molecules_df['molecule_index'].iloc[0] == None: - n_molecules = 0 - - molecules = file['topology']['molecules'] - molecules.attrs['n_molecules'] = n_molecules - - if n_molecules > 0: - - if all(molecules_df['molecule_id'].unique()==[None]): - molecules_df['molecule_id']=molecules_df['molecule_index'] - - if all(molecules_df['molecule_name'].unique()==[None]): - molecules_df['molecule_name']='UNK' - - if all(molecules_df['molecule_type'].unique()==[None]): - molecules_df['molecule_type']='UNK' - - components['molecule_index'].resize((n_components,)) - molecules['id'].resize((n_molecules,)) - molecules['name'].resize((n_molecules,)) - molecules['type'].resize((n_molecules,)) - - components['molecule_index'][:] = components_df['molecule_index'].to_numpy(dtype=int) - molecules['id'][:] = molecules_df['molecule_id'].to_numpy(dtype=int) - molecules['name'][:] = molecules_df['molecule_name'].to_numpy(dtype=str) - molecules['type'][:] = molecules_df['molecule_type'].to_numpy(dtype=str) - - # Entities - - entities_df = atoms_df[['entity_index', 'entity_id', 'entity_name', 'entity_type']].drop_duplicates() - - n_entities = entities_df.shape[0] - - if n_entities==1: - if entities_df['entity_index'].iloc[0] == None: - n_entities = 0 - - entities = file['topology']['entities'] - entities.attrs['n_entities'] = n_entities - - if n_entities > 0: - - if all(entities_df['entity_id'].unique()==[None]): - entities_df['entity_id']=entities_df['entity_index'] - - if all(entities_df['entity_name'].unique()==[None]): - entities_df['entity_name']='UNK' - - if all(entities_df['entity_type'].unique()==[None]): - entities_df['entity_type']='UNK' - - molecules['entity_index'].resize((n_molecules,)) - entities['id'].resize((n_entities,)) - entities['name'].resize((n_entities,)) - entities['type'].resize((n_entities,)) - - molecules['entity_index'][:] = molecules_df['entity_index'].to_numpy(dtype=int) - entities['id'][:] = entities_df['entity_id'].to_numpy(dtype=int) - entities['name'][:] = entities_df['entity_name'].to_numpy(dtype=str) - entities['type'][:] = entities_df['entity_type'].to_numpy(dtype=str) - - - # Chains - - chains_df = atoms_df[['chain_index', 'chain_id', 'chain_name', 'chain_type']].drop_duplicates() - - n_chains = chains_df.shape[0] - - if n_chains==1: - if chains_df['chain_index'].iloc[0] == None: - n_chains = 0 - - chains = file['topology']['chains'] - chains.attrs['n_chains'] = n_chains - - if n_chains > 0: - - if all(chains_df['chain_id'].unique()==[None]): - chains_df['chain_id']=chains_df['chain_index'] - - if chains_df['chain_id'].dtype == 'O': - chains_df['chain_id']=chains_df['chain_index'] - - if all(chains_df['chain_name'].unique()==[None]): - chains_df['chain_name']='UNK' - - if all(chains_df['chain_type'].unique()==[None]): - chains_df['chain_type']='UNK' - - atoms['chain_index'].resize((n_atoms,)) - chains['id'].resize((n_chains,)) - chains['name'].resize((n_chains,)) - chains['type'].resize((n_chains,)) - - atoms['chain_index'][:] = atoms_df['chain_index'].to_numpy(dtype=int) - chains['id'][:] = chains_df['chain_id'].to_numpy(dtype=int) - chains['name'][:] = chains_df['chain_name'].to_numpy(dtype=str) - chains['type'][:] = chains_df['chain_type'].to_numpy(dtype=str) - - del(groups_df, components_df, molecules_df, entities_df, chains_df) - - # Bonds - - bonds_df = item.bonds_dataframe - - n_bonds = bonds_df.shape[0] - - bonds = file['topology']['bonds'] - bonds.attrs['n_bonds'] = n_bonds - - if n_bonds>0: - - if all(bonds_df['order'].unique()==[None]): - bonds_df['order']='UNK' - - if all(bonds_df['type'].unique()==[None]): - bonds_df['type']='UNK' - - bonds['atom1_index'].resize((n_bonds,)) - bonds['atom2_index'].resize((n_bonds,)) - bonds['order'].resize((n_bonds,)) - bonds['type'].resize((n_bonds,)) - - bonds['atom1_index'][:] = bonds_df['atom1_index'].to_numpy(dtype=int) - bonds['atom2_index'][:] = bonds_df['atom2_index'].to_numpy(dtype=int) - bonds['order'][:] = bonds_df['order'].to_numpy(dtype=str) - bonds['type'][:] = bonds_df['type'].to_numpy(dtype=str) - + int_precision = file.attrs['int_precision'] + float_precision = file.attrs['float_precision'] + + if int_precision=='single': + int_type=np.int32 + elif int_precision=='double': + int_type=np.int64 + + if float_precision=='single': + float_type=np.float32 + elif float_precision=='double': + float_type=np.float64 + + structures_ds = file['structures'] + + n_atoms = item.n_atoms + n_structures = item.n_structures + + structures_ds.attrs['n_atoms'] = item.n_atoms + structures_ds.attrs['n_structures'] = item.n_structures + + length_unit = puw.get_standard_units(dimensionality={'[L]':1}) + time_unit = puw.get_standard_units(dimensionality={'[T]':1}) + energy_unit = puw.get_standard_units(dimensionality={'[L]':2, '[M]':1, '[T]':-2, + '[mol]': -1}) + temperature_unit = puw.get_standard_units(dimensionality={'[K]':1}) + + structures_ds.attrs['length_unit']=str(length_unit) + structures_ds.attrs['time_unit']=str(time_unit) + structures_ds.attrs['energy_unit']=str(energy_unit) + + if item.coordinates is not None: + structures_ds['coordinates'].resize((n_structures,n_atoms,3)) + if puw.check(item.coordinates, unit=length_unit): + aux = puw.get_value(item.coordinates).astype(float_precision) + else: + aux = puw.get_value(item.coordinates, to_unit=length_unit).astype(float_precision) + structures_ds['coordinates'][:,:,:] = aux + + if item.velocities is not None: + structures_ds['velocities'].resize((n_structures,n_atoms,3)) + if puw.check(item.velocities, unit=length_unit/length_time): + aux = puw.get_value(item.velocities).astype(float_precision) + else: + aux = puw.get_value(item.velocities, to_unit=length_unit/length_time).astype(float_precision) + structures_ds['velocities'][:,:,:] = aux + + if item.box is not None: + structures_ds['box'].resize((n_structures,3,3)) + if puw.check(item.box, unit=length_unit): + aux = puw.get_value(item.box).astype(float_precision) + else: + aux = puw.get_value(item.box, to_unit=length_unit).astype(float_precision) + structures_ds['box'][:,:,:] = aux + + if item.kinetic_energy is not None: + structures_ds['kinetic_energy'].resize((n_structures)) + if puw.check(item.kinetic_energy, unit=energy_unit): + aux = puw.get_value(item.kinetic_energy).astype(float_precision) + else: + aux = puw.get_value(item.kinetic_energy, to_unit=length_unit).astype(float_precision) + structures_ds['kinetic_energy'][:] = aux + + if item.potential_energy is not None: + structures_ds['potential_energy'].resize((n_structures)) + if puw.check(item.potential_energy, unit=energy_unit): + aux = puw.get_value(item.potential_energy).astype(float_precision) + else: + aux = puw.get_value(item.potential_energy, to_unit=length_unit).astype(float_precision) + structures_ds['potential_energy'][:] = aux + + if item.temperature is not None: + structures_ds['temperature'].resize((n_structures)) + if puw.check(item.temperature, unit=temperature_unit): + aux = puw.get_value(item.temperature).astype(float_precision) + else: + aux = puw.get_value(item.temperature, to_unit=length_unit).astype(float_precision) + structures_ds['temperature'][:] = aux if needs_to_be_closed: file.close() diff --git a/molsysmt/form/molsysmt_Topology/__init__.py b/molsysmt/form/molsysmt_Topology/__init__.py index 4f9a70e1c..e407b6184 100644 --- a/molsysmt/form/molsysmt_Topology/__init__.py +++ b/molsysmt/form/molsysmt_Topology/__init__.py @@ -16,6 +16,8 @@ from .set import * from .iterators import TopologyIterator +from .write_topology_to_msmh5 import write_topology_to_msmh5 + from .to_string_aminoacids3 import to_string_aminoacids3 from .to_string_aminoacids1 import to_string_aminoacids1 from .to_string_pdb_text import to_string_pdb_text diff --git a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py index fbea93aa3..d6bd3870e 100644 --- a/molsysmt/form/molsysmt_Topology/to_file_msmh5.py +++ b/molsysmt/form/molsysmt_Topology/to_file_msmh5.py @@ -5,272 +5,15 @@ def to_file_msmh5(item, atom_indices='all', coordinates=None, output_filename=No compression='gzip', compression_opts=4, int_precision='single', float_precision='single'): from molsysmt.native import MSMH5FileHandler + from .write_topology_to_msmh5 import write_topology_to_msmh5 handler = MSMH5FileHandler(output_filename, io_mode='w', compression=compression, compression_opts=compression_opts, int_precision=int_precision, float_precision=float_precision, closed=False) - _add_topology_to_msmh5(item, handler, atom_indices=atom_indices) + write_topology_to_msmh5(item, handler, atom_indices=atom_indices) handler.close() return output_filename -def _add_topology_to_msmh5(item, file, atom_indices='all'): - - from h5py._hl.files import File as h5py_File - from molsysmt.native import MSMH5FileHandler - - file_is_msmh5 = False - needs_to_be_closed = False - - if isinstance(file, h5py_File): - - if 'type' in file.attrs: - file_is_msmh5 = (file.attrs['type']=='msmh5') - - elif isinstance(file, MSMH5FileHandler): - - file = file.file - file_is_msmh5 = True - - else: - - from molsysmt.form.file_msmh5.is_form import is_form as is_file_msmh5_form - - file_is_msmh5 = is_file_msmh5_form(file) - - if file_is_msmh5: - file = h5py.File(file, "w") - needs_to_be_closed = True - - if not file_is_msmh5: - raise ValueError - - # Atoms - - atoms_df = item.atoms_dataframe - - n_atoms = atoms_df.shape[0] - - atoms = file['topology']['atoms'] - - atoms.attrs['n_atoms'] = n_atoms - - atoms['id'].resize((n_atoms,)) - atoms['name'].resize((n_atoms,)) - atoms['type'].resize((n_atoms,)) - - atoms['id'][:] = atoms_df['atom_id'].to_numpy(dtype=int) - atoms['name'][:] = atoms_df['atom_name'].to_numpy(dtype=str) - atoms['type'][:] = atoms_df['atom_type'].to_numpy(dtype=str) - - # Groups - - groups_df = atoms_df[['group_index', 'group_id', 'group_name', 'group_type', 'component_index']].drop_duplicates() - - n_groups = groups_df.shape[0] - - if n_groups==1: - if groups_df['group_index'].iloc[0] == None: - n_groups = 0 - - groups = file['topology']['groups'] - groups.attrs['n_groups'] = n_groups - - if n_groups > 0: - - if all(groups_df['group_id'].unique()==[None]): - groups_df['group_id']=groups_df['group_index'] - - if all(groups_df['group_name'].unique()==[None]): - groups_df['group_name']='UNK' - - if all(groups_df['group_type'].unique()==[None]): - groups_df['group_type']='UNK' - - atoms['group_index'].resize((n_atoms,)) - groups['id'].resize((n_groups,)) - groups['name'].resize((n_groups,)) - groups['type'].resize((n_groups,)) - - atoms['group_index'][:] = atoms_df['group_index'].to_numpy(dtype=int) - groups['id'][:] = groups_df['group_id'].to_numpy(dtype=int) - groups['name'][:] = groups_df['group_name'].to_numpy(dtype=str) - groups['type'][:] = groups_df['group_type'].to_numpy(dtype=str) - - # Components - - components_df = atoms_df[['component_index', 'component_id', 'component_name', 'component_type', 'molecule_index']].drop_duplicates() - - n_components = components_df.shape[0] - - if n_components==1: - if components_df['component_index'].iloc[0] == None: - n_components = 0 - - components = file['topology']['components'] - components.attrs['n_components'] = n_components - - if n_components > 0: - - if all(components_df['component_id'].unique()==[None]): - components_df['component_id']=components_df['component_index'] - - if all(components_df['component_name'].unique()==[None]): - components_df['component_name']='UNK' - - if all(components_df['component_type'].unique()==[None]): - components_df['component_type']='UNK' - - groups['component_index'].resize((n_groups,)) - components['id'].resize((n_components,)) - components['name'].resize((n_components,)) - components['type'].resize((n_components,)) - - groups['component_index'][:] = groups_df['component_index'].to_numpy(dtype=int) - components['id'][:] = components_df['component_id'].to_numpy(dtype=int) - components['name'][:] = components_df['component_name'].to_numpy(dtype=str) - components['type'][:] = components_df['component_type'].to_numpy(dtype=str) - - # Molecules - - molecules_df = atoms_df[['molecule_index', 'molecule_id', 'molecule_name', 'molecule_type', 'entity_index']].drop_duplicates() - - n_molecules = molecules_df.shape[0] - - if n_molecules==1: - if molecules_df['molecule_index'].iloc[0] == None: - n_molecules = 0 - - molecules = file['topology']['molecules'] - molecules.attrs['n_molecules'] = n_molecules - - if n_molecules > 0: - - if all(molecules_df['molecule_id'].unique()==[None]): - molecules_df['molecule_id']=molecules_df['molecule_index'] - - if all(molecules_df['molecule_name'].unique()==[None]): - molecules_df['molecule_name']='UNK' - - if all(molecules_df['molecule_type'].unique()==[None]): - molecules_df['molecule_type']='UNK' - - components['molecule_index'].resize((n_components,)) - molecules['id'].resize((n_molecules,)) - molecules['name'].resize((n_molecules,)) - molecules['type'].resize((n_molecules,)) - - components['molecule_index'][:] = components_df['molecule_index'].to_numpy(dtype=int) - molecules['id'][:] = molecules_df['molecule_id'].to_numpy(dtype=int) - molecules['name'][:] = molecules_df['molecule_name'].to_numpy(dtype=str) - molecules['type'][:] = molecules_df['molecule_type'].to_numpy(dtype=str) - - # Entities - - entities_df = atoms_df[['entity_index', 'entity_id', 'entity_name', 'entity_type']].drop_duplicates() - - n_entities = entities_df.shape[0] - - if n_entities==1: - if entities_df['entity_index'].iloc[0] == None: - n_entities = 0 - - entities = file['topology']['entities'] - entities.attrs['n_entities'] = n_entities - - if n_entities > 0: - - if all(entities_df['entity_id'].unique()==[None]): - entities_df['entity_id']=entities_df['entity_index'] - - if all(entities_df['entity_name'].unique()==[None]): - entities_df['entity_name']='UNK' - - if all(entities_df['entity_type'].unique()==[None]): - entities_df['entity_type']='UNK' - - molecules['entity_index'].resize((n_molecules,)) - entities['id'].resize((n_entities,)) - entities['name'].resize((n_entities,)) - entities['type'].resize((n_entities,)) - - molecules['entity_index'][:] = molecules_df['entity_index'].to_numpy(dtype=int) - entities['id'][:] = entities_df['entity_id'].to_numpy(dtype=int) - entities['name'][:] = entities_df['entity_name'].to_numpy(dtype=str) - entities['type'][:] = entities_df['entity_type'].to_numpy(dtype=str) - - - # Chains - - chains_df = atoms_df[['chain_index', 'chain_id', 'chain_name', 'chain_type']].drop_duplicates() - - n_chains = chains_df.shape[0] - - if n_chains==1: - if chains_df['chain_index'].iloc[0] == None: - n_chains = 0 - - chains = file['topology']['chains'] - chains.attrs['n_chains'] = n_chains - - if n_chains > 0: - - if all(chains_df['chain_id'].unique()==[None]): - chains_df['chain_id']=chains_df['chain_index'] - - if chains_df['chain_id'].dtype == 'O': - chains_df['chain_id']=chains_df['chain_index'] - - if all(chains_df['chain_name'].unique()==[None]): - chains_df['chain_name']='UNK' - - if all(chains_df['chain_type'].unique()==[None]): - chains_df['chain_type']='UNK' - - atoms['chain_index'].resize((n_atoms,)) - chains['id'].resize((n_chains,)) - chains['name'].resize((n_chains,)) - chains['type'].resize((n_chains,)) - - atoms['chain_index'][:] = atoms_df['chain_index'].to_numpy(dtype=int) - chains['id'][:] = chains_df['chain_id'].to_numpy(dtype=int) - chains['name'][:] = chains_df['chain_name'].to_numpy(dtype=str) - chains['type'][:] = chains_df['chain_type'].to_numpy(dtype=str) - - del(groups_df, components_df, molecules_df, entities_df, chains_df) - - # Bonds - - bonds_df = item.bonds_dataframe - - n_bonds = bonds_df.shape[0] - - bonds = file['topology']['bonds'] - bonds.attrs['n_bonds'] = n_bonds - - if n_bonds>0: - - if all(bonds_df['order'].unique()==[None]): - bonds_df['order']='UNK' - - if all(bonds_df['type'].unique()==[None]): - bonds_df['type']='UNK' - - bonds['atom1_index'].resize((n_bonds,)) - bonds['atom2_index'].resize((n_bonds,)) - bonds['order'].resize((n_bonds,)) - bonds['type'].resize((n_bonds,)) - - bonds['atom1_index'][:] = bonds_df['atom1_index'].to_numpy(dtype=int) - bonds['atom2_index'][:] = bonds_df['atom2_index'].to_numpy(dtype=int) - bonds['order'][:] = bonds_df['order'].to_numpy(dtype=str) - bonds['type'][:] = bonds_df['type'].to_numpy(dtype=str) - - - if needs_to_be_closed: - file.close() - - pass - diff --git a/molsysmt/form/molsysmt_Topology/write_topology_in_msmh5.py b/molsysmt/form/molsysmt_Topology/write_topology_in_msmh5.py new file mode 100644 index 000000000..c0499699c --- /dev/null +++ b/molsysmt/form/molsysmt_Topology/write_topology_in_msmh5.py @@ -0,0 +1,261 @@ +from molsysmt._private.digestion import digest + +@digest(form='molsysmt.Topology') +def write_topology_to_msmh5(item, file, atom_indices='all'): + + from h5py._hl.files import File as h5py_File + from molsysmt.native import MSMH5FileHandler + + file_is_msmh5 = False + needs_to_be_closed = False + + if isinstance(file, h5py_File): + + if 'type' in file.attrs: + file_is_msmh5 = (file.attrs['type']=='msmh5') + + elif isinstance(file, MSMH5FileHandler): + + file = file.file + file_is_msmh5 = True + + else: + + from molsysmt.form.file_msmh5.is_form import is_form as is_file_msmh5_form + + file_is_msmh5 = is_file_msmh5_form(file) + + if file_is_msmh5: + file = h5py.File(file, "w") + needs_to_be_closed = True + + if not file_is_msmh5: + raise ValueError + + # Atoms + + atoms_df = item.atoms_dataframe + + n_atoms = atoms_df.shape[0] + + atoms = file['topology']['atoms'] + + atoms.attrs['n_atoms'] = n_atoms + + atoms['id'].resize((n_atoms,)) + atoms['name'].resize((n_atoms,)) + atoms['type'].resize((n_atoms,)) + + atoms['id'][:] = atoms_df['atom_id'].to_numpy(dtype=int) + atoms['name'][:] = atoms_df['atom_name'].to_numpy(dtype=str) + atoms['type'][:] = atoms_df['atom_type'].to_numpy(dtype=str) + + # Groups + + groups_df = atoms_df[['group_index', 'group_id', 'group_name', 'group_type', 'component_index']].drop_duplicates() + + n_groups = groups_df.shape[0] + + if n_groups==1: + if groups_df['group_index'].iloc[0] == None: + n_groups = 0 + + groups = file['topology']['groups'] + groups.attrs['n_groups'] = n_groups + + if n_groups > 0: + + if all(groups_df['group_id'].unique()==[None]): + groups_df['group_id']=groups_df['group_index'] + + if all(groups_df['group_name'].unique()==[None]): + groups_df['group_name']='UNK' + + if all(groups_df['group_type'].unique()==[None]): + groups_df['group_type']='UNK' + + atoms['group_index'].resize((n_atoms,)) + groups['id'].resize((n_groups,)) + groups['name'].resize((n_groups,)) + groups['type'].resize((n_groups,)) + + atoms['group_index'][:] = atoms_df['group_index'].to_numpy(dtype=int) + groups['id'][:] = groups_df['group_id'].to_numpy(dtype=int) + groups['name'][:] = groups_df['group_name'].to_numpy(dtype=str) + groups['type'][:] = groups_df['group_type'].to_numpy(dtype=str) + + # Components + + components_df = atoms_df[['component_index', 'component_id', 'component_name', 'component_type', 'molecule_index']].drop_duplicates() + + n_components = components_df.shape[0] + + if n_components==1: + if components_df['component_index'].iloc[0] == None: + n_components = 0 + + components = file['topology']['components'] + components.attrs['n_components'] = n_components + + if n_components > 0: + + if all(components_df['component_id'].unique()==[None]): + components_df['component_id']=components_df['component_index'] + + if all(components_df['component_name'].unique()==[None]): + components_df['component_name']='UNK' + + if all(components_df['component_type'].unique()==[None]): + components_df['component_type']='UNK' + + groups['component_index'].resize((n_groups,)) + components['id'].resize((n_components,)) + components['name'].resize((n_components,)) + components['type'].resize((n_components,)) + + groups['component_index'][:] = groups_df['component_index'].to_numpy(dtype=int) + components['id'][:] = components_df['component_id'].to_numpy(dtype=int) + components['name'][:] = components_df['component_name'].to_numpy(dtype=str) + components['type'][:] = components_df['component_type'].to_numpy(dtype=str) + + # Molecules + + molecules_df = atoms_df[['molecule_index', 'molecule_id', 'molecule_name', 'molecule_type', 'entity_index']].drop_duplicates() + + n_molecules = molecules_df.shape[0] + + if n_molecules==1: + if molecules_df['molecule_index'].iloc[0] == None: + n_molecules = 0 + + molecules = file['topology']['molecules'] + molecules.attrs['n_molecules'] = n_molecules + + if n_molecules > 0: + + if all(molecules_df['molecule_id'].unique()==[None]): + molecules_df['molecule_id']=molecules_df['molecule_index'] + + if all(molecules_df['molecule_name'].unique()==[None]): + molecules_df['molecule_name']='UNK' + + if all(molecules_df['molecule_type'].unique()==[None]): + molecules_df['molecule_type']='UNK' + + components['molecule_index'].resize((n_components,)) + molecules['id'].resize((n_molecules,)) + molecules['name'].resize((n_molecules,)) + molecules['type'].resize((n_molecules,)) + + components['molecule_index'][:] = components_df['molecule_index'].to_numpy(dtype=int) + molecules['id'][:] = molecules_df['molecule_id'].to_numpy(dtype=int) + molecules['name'][:] = molecules_df['molecule_name'].to_numpy(dtype=str) + molecules['type'][:] = molecules_df['molecule_type'].to_numpy(dtype=str) + + # Entities + + entities_df = atoms_df[['entity_index', 'entity_id', 'entity_name', 'entity_type']].drop_duplicates() + + n_entities = entities_df.shape[0] + + if n_entities==1: + if entities_df['entity_index'].iloc[0] == None: + n_entities = 0 + + entities = file['topology']['entities'] + entities.attrs['n_entities'] = n_entities + + if n_entities > 0: + + if all(entities_df['entity_id'].unique()==[None]): + entities_df['entity_id']=entities_df['entity_index'] + + if all(entities_df['entity_name'].unique()==[None]): + entities_df['entity_name']='UNK' + + if all(entities_df['entity_type'].unique()==[None]): + entities_df['entity_type']='UNK' + + molecules['entity_index'].resize((n_molecules,)) + entities['id'].resize((n_entities,)) + entities['name'].resize((n_entities,)) + entities['type'].resize((n_entities,)) + + molecules['entity_index'][:] = molecules_df['entity_index'].to_numpy(dtype=int) + entities['id'][:] = entities_df['entity_id'].to_numpy(dtype=int) + entities['name'][:] = entities_df['entity_name'].to_numpy(dtype=str) + entities['type'][:] = entities_df['entity_type'].to_numpy(dtype=str) + + + # Chains + + chains_df = atoms_df[['chain_index', 'chain_id', 'chain_name', 'chain_type']].drop_duplicates() + + n_chains = chains_df.shape[0] + + if n_chains==1: + if chains_df['chain_index'].iloc[0] == None: + n_chains = 0 + + chains = file['topology']['chains'] + chains.attrs['n_chains'] = n_chains + + if n_chains > 0: + + if all(chains_df['chain_id'].unique()==[None]): + chains_df['chain_id']=chains_df['chain_index'] + + if chains_df['chain_id'].dtype == 'O': + chains_df['chain_id']=chains_df['chain_index'] + + if all(chains_df['chain_name'].unique()==[None]): + chains_df['chain_name']='UNK' + + if all(chains_df['chain_type'].unique()==[None]): + chains_df['chain_type']='UNK' + + atoms['chain_index'].resize((n_atoms,)) + chains['id'].resize((n_chains,)) + chains['name'].resize((n_chains,)) + chains['type'].resize((n_chains,)) + + atoms['chain_index'][:] = atoms_df['chain_index'].to_numpy(dtype=int) + chains['id'][:] = chains_df['chain_id'].to_numpy(dtype=int) + chains['name'][:] = chains_df['chain_name'].to_numpy(dtype=str) + chains['type'][:] = chains_df['chain_type'].to_numpy(dtype=str) + + del(groups_df, components_df, molecules_df, entities_df, chains_df) + + # Bonds + + bonds_df = item.bonds_dataframe + + n_bonds = bonds_df.shape[0] + + bonds = file['topology']['bonds'] + bonds.attrs['n_bonds'] = n_bonds + + if n_bonds>0: + + if all(bonds_df['order'].unique()==[None]): + bonds_df['order']='UNK' + + if all(bonds_df['type'].unique()==[None]): + bonds_df['type']='UNK' + + bonds['atom1_index'].resize((n_bonds,)) + bonds['atom2_index'].resize((n_bonds,)) + bonds['order'].resize((n_bonds,)) + bonds['type'].resize((n_bonds,)) + + bonds['atom1_index'][:] = bonds_df['atom1_index'].to_numpy(dtype=int) + bonds['atom2_index'][:] = bonds_df['atom2_index'].to_numpy(dtype=int) + bonds['order'][:] = bonds_df['order'].to_numpy(dtype=str) + bonds['type'][:] = bonds_df['type'].to_numpy(dtype=str) + + + if needs_to_be_closed: + file.close() + + pass + diff --git a/molsysmt/native/__init__.py b/molsysmt/native/__init__.py index 2d2537d36..1a6602321 100644 --- a/molsysmt/native/__init__.py +++ b/molsysmt/native/__init__.py @@ -1,7 +1,9 @@ from .molsys import MolSys from .topology import Topology -from .topology2 import Topology2 from .structures import Structures +from .molsys2 import MolSys2 +from .topology2 import Topology2 +from .structures2 import Structures2 from .molecular_mechanics import MolecularMechanics from .trajectory_file import TrajectoryFile #from .card import Card diff --git a/molsysmt/native/molsys2.py b/molsysmt/native/molsys2.py new file mode 100644 index 000000000..6079d4994 --- /dev/null +++ b/molsysmt/native/molsys2.py @@ -0,0 +1,49 @@ +from molsysmt._private.variables import is_all + +class MolSys2: + + def __init__(self): + + from .topology import Topology2 + from .structures import Structures2 + from .molecular_mechanics import MolecularMechanics + + self.topology = Topology2() + self.structures = Structures2() + self.molecular_mechanics = MolecularMechanics() + + def extract(self, atom_indices='all', structure_indices='all'): + + if is_all(atom_indices) and is_all(structure_indices): + + return self.copy() + + else: + + tmp_item = MolSys2() + tmp_item.topology = self.topology.extract(atom_indices=atom_indices, structure_indices=structure_indices) + tmp_item.structures = self.structures.extract(atom_indices=atom_indices, structure_indices=structure_indices) + tmp_item.molecular_mechanics = self.molecular_mechanics.copy() + + return tmp_item + + def add(self, item, selection='all', structure_indices='all', syntax='MolSysMT'): + + from molsysmt import convert, get_form, select + atom_indices=select(item, selection=selection, syntax=syntax) + self.topology.add(item.topology, selection=atom_indices) + self.structures.add(item.structures, selection=atom_indices, structure_indices=structure_indices) + + def load_frames(self, selection='all', structure_indices='all', syntax='MolSysMT'): + + atom_indices = self.select(selection=selection, syntax=syntax) + return self.structures.load_frames(atom_indices=atom_indices, structure_indices=structure_indices) + + def copy(self): + + tmp_item = MolSys2() + tmp_item.topology = self.topology.copy() + tmp_item.structures = self.structures.copy() + tmp_item.molecular_mechanics = self.molecular_mechanics.copy() + return tmp_item + diff --git a/molsysmt/native/msmh5_file_handler.py b/molsysmt/native/msmh5_file_handler.py index a2b12d266..8c735a02f 100644 --- a/molsysmt/native/msmh5_file_handler.py +++ b/molsysmt/native/msmh5_file_handler.py @@ -1,12 +1,14 @@ import h5py import numpy as np +from molsysmt import pyunitwizard as puw msmh5_version = "1.0" class MSMH5FileHandler(): def __init__(self, filename, io_mode='r', creator='MolSysMT', compression="gzip", compression_opts=4, - int_precision='single', float_precision='single', closed=False): + int_precision='single', float_precision='single', time_unit=None, energy_unit=None, + temperature_unit=None, charge_unit=None, mass_unit=None, closed=False): self.file = None @@ -14,7 +16,8 @@ def __init__(self, filename, io_mode='r', creator='MolSysMT', compression="gzip" self.file = _new_msmfile(filename, creator=creator, compression=compression, compression_opts=compression_opts, int_precision=int_precision, - float_precision=float_precision) + float_precision=float_precision, length_unit=None, time_unit=None, + energy_unit=None, temperature_unit=None, charge_unit=None, mass_unit=None): elif io_mode=='r': @@ -27,17 +30,34 @@ def __init__(self, filename, io_mode='r', creator='MolSysMT', compression="gzip" if closed: self.file.close() + def write_topology(topology, selection='all', syntax='MolSysMT'): + + from molsysmt.basic import get_form, select, convert + from molsysmt._private.variables import is_all + + if is_all(selection): + atom_indices = 'all' + else: + atom_indices = select(topology, selection=selection, syntax=syntax) + + try: + topology_form = get_form(topology) + _dict_modules[topology_form].write_topology_in_msmh5(topology, self.file, + atom_indices=atom_indices) + except: + aux_topology = convert(topology, to_form='molsysmt.Topology', selection=atom_indices) + _dict_modules['molsysmt.Topology'].write_topology_in_msmh5(aux_topology, self.file, + atom_indices=atom_indices) + del(aux_topology) + def close(self): self.file.close() def _new_msmfile(filename, creator='MolSysMT', compression="gzip", compression_opts=4, - int_precision='single', float_precision='single'): - - if int_precision=='single': - int_type=np.int32 - elif int_precision=='double': - int_type=np.int64 + int_precision='single', float_precision='single', length_unit=None, + time_unit=None, energy_unit=None, temperature_unit=None, charge_unit=None, + mass_unit=None): if float_precision=='single': float_type=np.float32 @@ -49,12 +69,51 @@ def _new_msmfile(filename, creator='MolSysMT', compression="gzip", compression_o file.attrs['version'] = msmh5_version file.attrs['type'] = "msmh5" file.attrs['creator'] = creator + file.attrs['int_precision'] = int_precision + file.attrs['float_precision'] = int_precision + file.attrs['written_structures'] = 0 + + if length_unit is None: + structures.attrs['length_unit']=puw.get_standard_units(dimensionality={'[L]':1}, form='string') + else: + structures.attrs['length_unit']=puw.get_unit(length_unit, to_form='string') + + if time_unit is None: + structures.attrs['time_unit']=puw.get_standard_units(dimensionality={'[T]':1}, form='string') + else: + structures.attrs['time_unit']=puw.get_unit(time_unit, to_form='string') + + if energy_unit is None: + structures.attrs['energy_unit']=puw.get_standard_units(dimensionality={'[L]':2, '[M]':1, + '[T]':-2, '[mol]':-1}, form='string') + else: + structures.attrs['energy_unit']=puw.get_unit(energy_unit, to_form='string') + + if temperature_unit is None: + structures.attrs['temperature_unit']=puw.get_standard_units(dimensionality={'[K]':1}, form='string') + else: + structures.attrs['temperature_unit']=puw.get_unit(temperature_unit, to_form='string') + + if temperature_unit is None: + structures.attrs['charge_unit']=puw.get_standard_units(dimensionality={'[A]':1, '[T]':1}, form='string') + else: + structures.attrs['charge_unit']=puw.get_unit(temperature_unit, to_form='string') + + if temperature_unit is None: + structures.attrs['mass_unit']=puw.get_standard_units(dimensionality={'[M]':1}, form='string') + else: + structures.attrs['mass_unit']=puw.get_unit(mass_unit, to_form='string') global_dataset_options = { 'compression':compression, 'compression_opts':compression_opts, } + if int_precision=='single': + int_type=np.int32 + elif int_precision=='double': + int_type=np.int64 + # Topology topology = file.create_group("topology") @@ -139,14 +198,10 @@ def _new_msmfile(filename, creator='MolSysMT', compression="gzip", compression_o structures.attrs['time_step']=0.0 structures.attrs['constant_id_step']=False structures.attrs['id_step']=0 - structures.attrs['constant_volume']=False - structures.attrs['constant_temperature']=False - structures.attrs['length_unit']='nm' - structures.attrs['time_unit']='ps' - structures.attrs['energy_unit']='kJ/mol' + structures.attrs['constant_box']=False structures.create_dataset('id', (0,), dtype=int_type, maxshape=(None,), **global_dataset_options) - structures.create_dataset('box', (0,6), dtype=float_type, maxshape=(None,6), **global_dataset_options) + structures.create_dataset('box', (0,3,3), dtype=float_type, maxshape=(None,3,3), **global_dataset_options) structures.create_dataset('coordinates', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) structures.create_dataset('velocities', (0,0,3), dtype=float_type, maxshape=(None,None,3), **global_dataset_options) structures.create_dataset('kinetic_energy', (0,), dtype=float_type, maxshape=(None,), **global_dataset_options) diff --git a/molsysmt/native/structures2.py b/molsysmt/native/structures2.py new file mode 100644 index 000000000..3d45b3e68 --- /dev/null +++ b/molsysmt/native/structures2.py @@ -0,0 +1,251 @@ +from copy import deepcopy +import numpy as np +from molsysmt import pyunitwizard as puw +from molsysmt._private.variables import is_all +from molsysmt.basic import get +from molsysmt._private.exceptions import IteratorError +from molsysmt._private.digestion import digest + +class Structures2: + """ Class to store the trajectory data of a molecular system + + + Attributes + ---------- + box : pint.Quantity of shape (n_structures, 3, 3) + The box of the molecular system in nanometers. + + coordinates : pint.Quantity of shape (n_structures, n_atoms, 3) + The coordinates of the trajectory for each frame. + + velocities : pint.Quantity of shape (n_structures, n_atoms, 3) + The velocities of the trajectory for each frame. + + n_atoms : int + Number of atoms in the trajectory. + + n_structures : int + Number of structures or frames in the trajectory. + + time : pint.Quantity of shape (n_structures, ) + The times of the trajectory in picoseconds. + + structure_id : + + + """ + + @digest() + def __init__(self, constant_time_step=False, time_step=None, constant_id_step=False, + id_step=None, constant_box=False, + structure_id=None, time=None, coordinates=None, velocities=None, box=None, + occupancy=None, b_factor=None, alternate_location=None, bioassembly=None, + temperature=None, potential_energy=None, kinetic_energy=None): + + self.n_atoms = 0 + self.n_structures = 0 + + self.constant_time_step = constant_time_step + self.time_step = time_step + + self.constant_id_step = constant_id_step + self.id_step = id_step + + self.constant_box = constant_box + + self.structure_id = structure_id + self.time = time + self.coordinates = coordinates + self.velocities = velocities + self.box = box + self.occupancy = occupancy + self.b_factor = b_factor + self.alternate_location = alternate_location + self.bioassembly = bioassembly + self.temperature = temperature + self.potential_energy = potential_energy + self.kinetic_energy = kinetic_energy + + if coordinates is not None: + self.n_structures = coordinates.shape[0] + self.n_atoms = coordinates.shape[1] + elif velocities is not None: + self.n_structures = velocities.shape[0] + self.n_atoms = velocities.shape[1] + elif box is not None: + self.n_structures = box.shape[0] + else: + self.n_structures = 0 + self.n_atoms = 0 + + @staticmethod + def _concatenate_arrays(array_1, array_2): + """ Concatenates two arrays provided that they are not null.""" + if array_2 is not None: + if array_1 is None: + raise ValueError( + f"The trajectory has no array to append the new frame.") + else: + return np.concatenate([array_1, array_2]) + + @digest() + def append_structures(self, structure_id=None, time=None, coordinates=None, velocities=None, + box=None, temperature=None, potential_energy=None, kinetic_energy=None): + """ Append structures or frames to this object. + + box : pint.Quantity of shape (n_structures, 3, 3) + The box of the structures + + coordinates : pint.Quantity of shape (n_structures, n_atoms, 3) + The coordinates of the trajectory for each frame of it in nanometers. + + time : pint.Quantity of shape (n_structures, ) + The times of the trajectory in picoseconds + + """ + + n_structures = 0 + n_atoms = 0 + + if structure_id is not None and not isinstance(structure_id, (list, np.ndarray)): + structure_id = np.array([structure_id]) + + if time is not None: + time = puw.standardize(time) + if coordinates is not None: + coordinates = puw.standardize(coordinates) + n_structures = coordinates.shape[0] + n_atoms = coordinates.shape[1] + if velocities is not None: + velocities = puw.standardize(velocities) + if box is not None: + box = puw.standardize(box) + if temperature is not None: + temperature = puw.standardize(temperature) + if potential_energy is not None: + potential_energy = puw.standardize(potential_energy) + if kinetic_energy is not None: + kinetic_energy = puw.standardize(kinetic_energy) + + if self.n_structures == 0: + + self.coordinates = coordinates + self.velocities = velocities + self.structure_id = structure_id + self.time = time + self.box = box + self.temperature = temperature + self.potential_energy = potential_energy + self.kinetic_energy = kinetic_energy + self.n_structures = n_structures + self.n_atoms = n_atoms + + else: + + if n_atoms != self.n_atoms: + raise ValueError( + "The coordinates to be appended in the system " + "need to have the same number of atoms.") + + self.structure_id = self._concatenate_arrays(self.structure_id, structure_id) + self.time = self._concatenate_arrays(self.time, time) + self.box = self._concatenate_arrays(self.box, box) + self.temperature = self._concatenate_arrays(self.temperature, temperature) + self.potential_energy = self._concatenate_arrays(self.potential_energy, + potential_energy) + self.kinetic_energy = self._concatenate_arrays(self.kinetic_energy, + kinetic_energy) + + self.coordinates = self._concatenate_arrays(self.coordinates, coordinates) + self.velocities = self._concatenate_arrays(self.velocities, velocities) + self.n_structures += n_structures + + @digest() + def extract(self, atom_indices='all', structure_indices='all'): + """ Returns a new Structures object with the specified atoms and/or + structures. + + Parameters + ---------- + atom_indices : str or arraylike of int, default='all' + The indices of the extracted atoms. + + structure_indices : str or arraylike of int, default='all' + The indices of the extracted structures or frames. + + Returns + ------- + Structures + The new structures object with the extracted atoms and frames. + """ + if is_all(atom_indices) and is_all(structure_indices): + return self.copy() + + else: + + extract_structures = not is_all(structure_indices) + + if self.structure_id is not None and extract_structures: + structure_id = self.structure_id[structure_indices] + else: + structure_id = deepcopy(self.structure_id) + + if self.time is not None and extract_structures: + time = self.time[structure_indices] + else: + time = deepcopy(self.time) + + if self.box is not None and extract_structures: + box = self.box[structure_indices] + else: + box = deepcopy(self.box) + + if self.temperature is not None and extract_structures: + temperature = self.temperature[structure_indices] + else: + temperature = deepcopy(self.temperature) + + if self.potential_energy is not None and extract_structures: + potential_energy = self.potential_energy[structure_indices] + else: + potential_energy = deepcopy(self.potential_energy) + + if self.kinetic_energy is not None and extract_structures: + kinetic_energy = self.kinetic_energy[structure_indices] + else: + kinetic_energy = deepcopy(self.kinetic_energy) + + if self.coordinates is not None: + if not is_all(atom_indices): + coordinates = self.coordinates[:, atom_indices, :] + else: + coordinates = deepcopy(self.coordinates) + if not is_all(structure_indices): + coordinates = coordinates[structure_indices, :, :] + else: + coordinates = None + + if self.velocities is not None: + if not is_all(atom_indices): + velocities = self.velocities[:, atom_indices, :] + else: + velocities = deepcopy(self.velocities) + if not is_all(structure_indices): + velocities = velocities[structure_indices, :, :] + else: + velocities = None + + return Structures(structure_id=structure_id, + time=time, + coordinates=coordinates, + velocities=velocities, + box=box, + temperature=temperature, + potential_energy=potential_energy, + kinetic_energy=kinetic_energy + ) + + def copy(self): + """ Returns a copy of the structures.""" + return deepcopy(self) + diff --git a/molsysmt/thirds/openmm/reporters/__init__.py b/molsysmt/thirds/openmm/reporters/__init__.py index b3077a78b..bf2315596 100644 --- a/molsysmt/thirds/openmm/reporters/__init__.py +++ b/molsysmt/thirds/openmm/reporters/__init__.py @@ -1,2 +1,3 @@ from .molsysmt_TrajectoryDict import MolSysMTTrajectoryDictReporter from .tqdm import TQDMReporter +from .msmh5 import MSMH5Reporter diff --git a/molsysmt/thirds/openmm/reporters/molsysmt_TrajectoryDict.py b/molsysmt/thirds/openmm/reporters/molsysmt_TrajectoryDict.py index 455a9fcd6..6bc827bca 100644 --- a/molsysmt/thirds/openmm/reporters/molsysmt_TrajectoryDict.py +++ b/molsysmt/thirds/openmm/reporters/molsysmt_TrajectoryDict.py @@ -4,24 +4,26 @@ class MolSysMTTrajectoryDictReporter(object): def __init__(self, reportInterval, time=True, coordinates=True, velocities=False, - potentialEnergy=False, kineticEnergy=False, totalEnergy=False, temperature=False, + potentialEnergy=False, kineticEnergy=False, temperature=False, box=False): - self._initialized = False - self._reportInterval = reportInterval - self._dict = {} + self._needsPositions = coordinates + self._needsVelocities = velocities + self._needsForces = False + self._needEnergy = (potentialEnergy or kineticEnergy or totalEnergy or temperature) + self._time = time + self._box = box self._coordinates = coordinates self._velocities = velocities self._potentialEnergy = potentialEnergy self._kineticEnergy = kineticEnergy - self._totalEnergy = totalEnergy self._temperature = temperature - self._box = box - self._needsPositions = coordinates - self._needsVelocities = velocities - self._needsForces = False - self._needEnergy = (potentialEnergy or kineticEnergy or totalEnergy or temperature) + + + self._initialized = False + self._reportInterval = reportInterval + self._dict = {} if self._time: self._dict['time']=[]*unit.picoseconds diff --git a/molsysmt/thirds/openmm/reporters/msmh5.py b/molsysmt/thirds/openmm/reporters/msmh5.py new file mode 100644 index 000000000..a1cdd7053 --- /dev/null +++ b/molsysmt/thirds/openmm/reporters/msmh5.py @@ -0,0 +1,126 @@ +from molsysmt._private.variables import is_all +from molsysmt.basic import select +import numpy as np + +class MSMH5Reporter(object): + + def __init__(self, file, reportInterval, stepSize, steps=None, + context=None, topology=None, selection='all', + time=True, box=True, coordinates=True, velocities=False, + potentialEnergy=False, kineticEnergy=False, temperature=False, + includeInitialContext=True, constantReportInterval=True, + constantStepSize=True, constantBox=True, + compression='gzip', compression_opts=4, + int_precision='single', float_precision='single', + syntax='MolSysMT'): + + self._initialized = False + + self._needsPositions = coordinates + self._needsVelocities = velocities + self._needsForces = False + self._needEnergy = (potentialEnergy or kineticEnergy or totalEnergy or temperature) + + self._time = time + self._box = box + self._coordinates = coordinates + self._velocities = velocities + self._potentialEnergy = potentialEnergy + self._kineticEnergy = kineticEnergy + self._temperature = temperature + + self._reportInterval = reportInterval + self._stepSize = stepSize + self._steps = steps + + self._atom_indices = selection + + if not is_all(self._atom_indices): + if topology is None: + raise ValueError('A topology object is needed.') + self._atom_indices = select(topology, selection=selection, syntax=syntax) + + self._file_handler = MSMH5FileHandler(file, io_mode='w', creator='OpenMM', + compression=compression, compression_opts=compression_opts, + int_precision=int_precision, float_precision=float_precision, + length_unit='nm', time_unit='ps', energy_unit='kJ/mol', + temperature_unit='kelvin', charge_unit='e', mass_unit='dalton') + + if topology is not None: + self._file_handler.write_topology(topology, selection=self._atom_indices) + + + def _initialize(self, simulation): + + import openmm as mm + system = simulation.system + frclist = system.getForces() + if self._temperature: + dof = 0 + for i in range(system.getNumParticles()): + if system.getParticleMass(i) > 0*unit.dalton: + dof += 3 + dof -= system.getNumConstraints() + if any(isinstance(frc, mm.CMMotionRemover) for frc in frclist): + dof -= 3 + self._dof = dof + + # Tengo que detectar si hay barostato + + self._initialized = True + + def describeNextReport(self, simulation): + + steps_left = simulation.currentStep % self._reportInterval + steps = self._reportInterval - steps_left + return (steps, self._needsPositions, self._needsVelocities, self._needsForces, self._needEnergy) + + def report(self, simulation, state): + + if not self._initialized: + self._initialize(simulation) + + if self._time: + time = state.getTime() + self._dict['time'].append(time) + + if self._coordinates: + coordinates = state.getPositions(asNumpy=True) + self._dict['coordinates'].append(coordinates) + + if self._velocities: + velocities = state.getVelocities(asNumpy=True) + self._dict['velocities'].append(velocities) + + if self._potentialEnergy: + potential_energy = state.getPotentialEnergy() + self._dict['potential_energy'].append(potential_energy) + + if self._kineticEnergy: + kinetic_energy = state.getKineticEnergy() + self._dict['kinetic_energy'].append(kinetic_energy) + + if self._totalEnergy: + if not self._kineticEnergy: + kinetic_energy = state.getKineticEnergy() + if not self._potentialEnergy: + potential_energy = state.getPotentialEnergy() + self._dict['total_energy'].append(kinetic_energy+potential_energy) + + if self._temperature: + if not self._kineticEnergy: + kinetic_energy = state.getKineticEnergy() + temperature = 2 * kinetic_energy / (self._dof * unit.MOLAR_GAS_CONSTANT_R) + self._dict['temperature'].append(temperature) + + if self._box: + box = state.getPeriodicBoxVectors(asNumpy=True) + self._dict['box'].append(box) + + def finalize(self): + + for key in self._dict.keys(): + self._dict[key]._value = np.stack(self._dict[key]._value) + + return self._dict + diff --git a/sandbox/checa.py b/sandbox/checa.py deleted file mode 100644 index 6946ed04b..000000000 --- a/sandbox/checa.py +++ /dev/null @@ -1,13 +0,0 @@ -import molsysmt as msm - -molsys = msm.convert('1tcd_solv_min.pdb') -molsys = msm.remove(molsys, structure_indices='all') -traj = msm.get('traj_eq.h5', structure_indices=range(10), time=True, coordinates=True, - box=True, temperature=True, potential_energy=True, kinetic_energy=True, - output_type='dictionary') - -msm.append_structures(molsys, traj) - -msm.info(molsys) - -structures = msm.convert(molsys, 'molsysmt.Structures')