diff --git a/molsysmt/native/topology.py b/molsysmt/native/topology.py index 490e77cee..dbf314596 100644 --- a/molsysmt/native/topology.py +++ b/molsysmt/native/topology.py @@ -20,7 +20,7 @@ def __init__(self, n_atoms=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Groups_DataFrame(pd.DataFrame): @@ -39,7 +39,7 @@ def __init__(self, n_groups=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Components_DataFrame(pd.DataFrame): @@ -58,7 +58,7 @@ def __init__(self, n_components=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Molecules_DataFrame(pd.DataFrame): @@ -77,7 +77,7 @@ def __init__(self, n_molecules=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Entities_DataFrame(pd.DataFrame): @@ -95,7 +95,7 @@ def __init__(self, n_entities=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Chains_DataFrame(pd.DataFrame): @@ -113,7 +113,7 @@ def __init__(self, n_chains=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) class Bonds_DataFrame(pd.DataFrame): @@ -133,12 +133,12 @@ def __init__(self, n_bonds=0): def _fix_null_values(self): for column in self: - self[column].fillna(pd.NA, inplace=True) + self[column]=self[column].fillna(pd.NA) def _sort_bonds(self): - self_mask = self['atom1_index'] > self['atom2_index'] - self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1)) + mask = self['atom1_index'] > self['atom2_index'] + self.loc[mask, ['atom1_index', 'atom2_index']] = self.loc[mask, ['atom2_index', 'atom1_index']].values self.sort_values(by=['atom1_index', 'atom2_index'], inplace=True) self.reset_index(drop=True, inplace=True) @@ -344,15 +344,89 @@ def rebuild_components(self, redefine_indices=True, redefine_ids=True, redefine_ self.components["component_id"] = np.arange(self.components.shape[0], dtype=int) - #if redefine_names: + if redefine_types: + + from molsysmt.element.component.get_component_type import _get_component_type_from_group_names_and_types + + aux_df = self.groups.groupby('component_index').agg(group_name=('group_name', list), + group_type=('group_type', list)) + for row in aux_df.itertuples(index=True): + component_type = _get_component_type_from_group_names_and_types(row.group_name, row.group_type) + self.components.iloc[row.Index,2] = component_type + + if redefine_names: + + from molsysmt.element.group.small_molecule import small_molecule_names + + aux_df = self.groups.groupby('component_index').agg(group_name=('group_name', list), + group_type=('group_type', list)) + + component_types = self.components['component_type'].to_numpy() + + counter = {'peptide':0, 'protein':0, 'small molecule':0, 'unknown':0} + + peptides = {} + proteins = {} + small_molecules = {} + + for component_type, row in zip(component_types, aux_df.itertuples(index=True)): + + if component_type == 'peptide': + + string_peptide = ','.join(row.group_name) + + if string_peptide in peptides: + component_name = peptides[string_peptide] + else: + component_name = component_type+' '+str(counter[component_type]) + peptides[string_peptide] = component_name + counter[component_type] += 1 + + elif component_type == 'protein': + + string_protein = ','.join(row.group_name) + + if string_protein in proteins: + component_name = proteins[string_protein] + else: + component_name = component_type+' '+str(counter[component_type]) + proteins[string_protein] = component_name + counter[component_type] += 1 + + elif component_type == 'small molecule': + + group_name = row.group_name[0] + + if group_name in small_molecules: + component_name = small_molecules[group_name] + else: + if group_name in small_molecule_names: + component_name = small_molecule_names[group_name] + else: + component_name = group_name + small_molecules[component_name] = component_name + + elif component_type in ['ion', 'lipid']: + + component_name = row.group_name[0] + + elif component_type in ['water']: + + component_name = 'water' + + else: + + component_name = 'unknown '+str(counter['unknown']) + counter['unknown']+=1 + + self.components.iloc[row.Index,1] = component_name + + + # component_name = get_component_name(self, element='component', redefine_names=True, skip_digestion=True) # self.components["component_name"] = np.array(component_name, dtype=object) # del component_name - #if redefine_types: - # component_type = get_component_type(self, element='component', redefine_types=True, skip_digestion=True) - # self.components["component_type"] = np.array(component_type, dtype=object) - # del component_type def rebuild_molecules(self, redefine_indices=True, redefine_ids=True, redefine_names=True, redefine_types=True): @@ -464,7 +538,7 @@ def rebuild_entities(self, redefine_indices=True, redefine_ids=True, redefine_na else: entity_index = aux_dict[molecule_name] else: - if 'unknown' in aux_dict: + if 'unknown' not in aux_dict: aux_dict['unknown'] = count entity_index = count count += 1 diff --git a/sandbox/Test_2nzt.ipynb b/sandbox/Test_2nzt.ipynb index 28b8aaeec..21751aa61 100644 --- a/sandbox/Test_2nzt.ipynb +++ b/sandbox/Test_2nzt.ipynb @@ -28,7 +28,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f2af978b2fe34ddb80cb876fca7ea1e5", + "model_id": "cf58aac2129443ffb36647ad5696de50", "version_major": 2, "version_minor": 0 }, @@ -45,77 +45,27 @@ { "cell_type": "code", "execution_count": 3, + "id": "3bf5eb4c-2b5b-4c2a-9659-157db9eeb032", + "metadata": {}, + "outputs": [], + "source": [ + "file = '/home/diego/Ixtlilton/hot_projects/hexii/AF-P52789-F1-model_v4.pdb'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "78decf3e-a27b-4949-ba36-1d022c6aee24", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:23: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:42: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:61: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:80: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:98: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:116: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:136: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", - "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", - "\n", - "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", - "\n", - "\n", - " self[column].fillna(pd.NA, inplace=True)\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:141: FutureWarning: Downcasting behavior in Series and DataFrame methods 'where', 'mask', and 'clip' is deprecated. In a future version this will not infer object dtypes or cast all-round floats to integers. Instead call result.infer_objects(copy=False) for object inference, or cast round floats explicitly. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1))\n", - "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:141: FutureWarning: Downcasting behavior in Series and DataFrame methods 'where', 'mask', and 'clip' is deprecated. In a future version this will not infer object dtypes or cast all-round floats to integers. Instead call result.infer_objects(copy=False) for object inference, or cast round floats explicitly. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", - " self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1))\n" - ] - } - ], + "outputs": [], "source": [ - "molsys = msm.convert('2LAO.pdb', 'molsysmt.Topology')" + "molsys = msm.convert(file, 'molsysmt.Topology')" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "b0f0657d-e5e5-4d4f-8105-c034940879b1", + "execution_count": 7, + "id": "fca1bd2f-bea7-4e7f-b825-4c44c5a3672f", "metadata": {}, "outputs": [ { @@ -139,125 +89,40 @@ " \n", " \n", " \n", - " molecule_id\n", - " molecule_name\n", - " molecule_type\n", - " entity_index\n", + " entity_id\n", + " entity_name\n", + " entity_type\n", " \n", " \n", " \n", " \n", " 0\n", " 0\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 1\n", - " 1\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 2\n", - " 2\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 3\n", - " 3\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 4\n", - " 4\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 85\n", - " 85\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 86\n", - " 86\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 87\n", - " 87\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 88\n", - " 88\n", - " nan\n", - " nan\n", - " <NA>\n", - " \n", - " \n", - " 89\n", - " 89\n", - " nan\n", - " nan\n", - " <NA>\n", + " protein 0\n", + " protein\n", " \n", " \n", "\n", - "

90 rows × 4 columns

\n", "" ], "text/plain": [ - " molecule_id molecule_name molecule_type entity_index\n", - "0 0 nan nan \n", - "1 1 nan nan \n", - "2 2 nan nan \n", - "3 3 nan nan \n", - "4 4 nan nan \n", - ".. ... ... ... ...\n", - "85 85 nan nan \n", - "86 86 nan nan \n", - "87 87 nan nan \n", - "88 88 nan nan \n", - "89 89 nan nan \n", - "\n", - "[90 rows x 4 columns]" + " entity_id entity_name entity_type\n", + "0 0 protein 0 protein" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "molsys.molecules" + "molsys.entities" ] }, { "cell_type": "code", "execution_count": null, - "id": "cfe2dc72-353f-46bf-9db4-085f66cbca5e", + "id": "64cc40c9-1df5-4738-afe3-2bf16fa5d575", "metadata": {}, "outputs": [], "source": []