diff --git a/molsysmt/native/topology.py b/molsysmt/native/topology.py
index 490e77cee..dbf314596 100644
--- a/molsysmt/native/topology.py
+++ b/molsysmt/native/topology.py
@@ -20,7 +20,7 @@ def __init__(self, n_atoms=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Groups_DataFrame(pd.DataFrame):
@@ -39,7 +39,7 @@ def __init__(self, n_groups=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Components_DataFrame(pd.DataFrame):
@@ -58,7 +58,7 @@ def __init__(self, n_components=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Molecules_DataFrame(pd.DataFrame):
@@ -77,7 +77,7 @@ def __init__(self, n_molecules=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Entities_DataFrame(pd.DataFrame):
@@ -95,7 +95,7 @@ def __init__(self, n_entities=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Chains_DataFrame(pd.DataFrame):
@@ -113,7 +113,7 @@ def __init__(self, n_chains=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
class Bonds_DataFrame(pd.DataFrame):
@@ -133,12 +133,12 @@ def __init__(self, n_bonds=0):
def _fix_null_values(self):
for column in self:
- self[column].fillna(pd.NA, inplace=True)
+ self[column]=self[column].fillna(pd.NA)
def _sort_bonds(self):
- self_mask = self['atom1_index'] > self['atom2_index']
- self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1))
+ mask = self['atom1_index'] > self['atom2_index']
+ self.loc[mask, ['atom1_index', 'atom2_index']] = self.loc[mask, ['atom2_index', 'atom1_index']].values
self.sort_values(by=['atom1_index', 'atom2_index'], inplace=True)
self.reset_index(drop=True, inplace=True)
@@ -344,15 +344,89 @@ def rebuild_components(self, redefine_indices=True, redefine_ids=True, redefine_
self.components["component_id"] = np.arange(self.components.shape[0], dtype=int)
- #if redefine_names:
+ if redefine_types:
+
+ from molsysmt.element.component.get_component_type import _get_component_type_from_group_names_and_types
+
+ aux_df = self.groups.groupby('component_index').agg(group_name=('group_name', list),
+ group_type=('group_type', list))
+ for row in aux_df.itertuples(index=True):
+ component_type = _get_component_type_from_group_names_and_types(row.group_name, row.group_type)
+ self.components.iloc[row.Index,2] = component_type
+
+ if redefine_names:
+
+ from molsysmt.element.group.small_molecule import small_molecule_names
+
+ aux_df = self.groups.groupby('component_index').agg(group_name=('group_name', list),
+ group_type=('group_type', list))
+
+ component_types = self.components['component_type'].to_numpy()
+
+ counter = {'peptide':0, 'protein':0, 'small molecule':0, 'unknown':0}
+
+ peptides = {}
+ proteins = {}
+ small_molecules = {}
+
+ for component_type, row in zip(component_types, aux_df.itertuples(index=True)):
+
+ if component_type == 'peptide':
+
+ string_peptide = ','.join(row.group_name)
+
+ if string_peptide in peptides:
+ component_name = peptides[string_peptide]
+ else:
+ component_name = component_type+' '+str(counter[component_type])
+ peptides[string_peptide] = component_name
+ counter[component_type] += 1
+
+ elif component_type == 'protein':
+
+ string_protein = ','.join(row.group_name)
+
+ if string_protein in proteins:
+ component_name = proteins[string_protein]
+ else:
+ component_name = component_type+' '+str(counter[component_type])
+ proteins[string_protein] = component_name
+ counter[component_type] += 1
+
+ elif component_type == 'small molecule':
+
+ group_name = row.group_name[0]
+
+ if group_name in small_molecules:
+ component_name = small_molecules[group_name]
+ else:
+ if group_name in small_molecule_names:
+ component_name = small_molecule_names[group_name]
+ else:
+ component_name = group_name
+ small_molecules[component_name] = component_name
+
+ elif component_type in ['ion', 'lipid']:
+
+ component_name = row.group_name[0]
+
+ elif component_type in ['water']:
+
+ component_name = 'water'
+
+ else:
+
+ component_name = 'unknown '+str(counter['unknown'])
+ counter['unknown']+=1
+
+ self.components.iloc[row.Index,1] = component_name
+
+
+
# component_name = get_component_name(self, element='component', redefine_names=True, skip_digestion=True)
# self.components["component_name"] = np.array(component_name, dtype=object)
# del component_name
- #if redefine_types:
- # component_type = get_component_type(self, element='component', redefine_types=True, skip_digestion=True)
- # self.components["component_type"] = np.array(component_type, dtype=object)
- # del component_type
def rebuild_molecules(self, redefine_indices=True, redefine_ids=True, redefine_names=True, redefine_types=True):
@@ -464,7 +538,7 @@ def rebuild_entities(self, redefine_indices=True, redefine_ids=True, redefine_na
else:
entity_index = aux_dict[molecule_name]
else:
- if 'unknown' in aux_dict:
+ if 'unknown' not in aux_dict:
aux_dict['unknown'] = count
entity_index = count
count += 1
diff --git a/sandbox/Test_2nzt.ipynb b/sandbox/Test_2nzt.ipynb
index 28b8aaeec..21751aa61 100644
--- a/sandbox/Test_2nzt.ipynb
+++ b/sandbox/Test_2nzt.ipynb
@@ -28,7 +28,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f2af978b2fe34ddb80cb876fca7ea1e5",
+ "model_id": "cf58aac2129443ffb36647ad5696de50",
"version_major": 2,
"version_minor": 0
},
@@ -45,77 +45,27 @@
{
"cell_type": "code",
"execution_count": 3,
+ "id": "3bf5eb4c-2b5b-4c2a-9659-157db9eeb032",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "file = '/home/diego/Ixtlilton/hot_projects/hexii/AF-P52789-F1-model_v4.pdb'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
"id": "78decf3e-a27b-4949-ba36-1d022c6aee24",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:23: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:42: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:61: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:80: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:98: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:116: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:136: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
- "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
- "\n",
- "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
- "\n",
- "\n",
- " self[column].fillna(pd.NA, inplace=True)\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:141: FutureWarning: Downcasting behavior in Series and DataFrame methods 'where', 'mask', and 'clip' is deprecated. In a future version this will not infer object dtypes or cast all-round floats to integers. Instead call result.infer_objects(copy=False) for object inference, or cast round floats explicitly. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
- " self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1))\n",
- "/home/diego/repos@uibcdf/MolSysMT/molsysmt/native/topology.py:141: FutureWarning: Downcasting behavior in Series and DataFrame methods 'where', 'mask', and 'clip' is deprecated. In a future version this will not infer object dtypes or cast all-round floats to integers. Instead call result.infer_objects(copy=False) for object inference, or cast round floats explicitly. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
- " self.update(self.loc[self_mask].rename({'atom1_index': 'atom2_index', 'atom2_index': 'atom1_index'}, axis=1))\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "molsys = msm.convert('2LAO.pdb', 'molsysmt.Topology')"
+ "molsys = msm.convert(file, 'molsysmt.Topology')"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "id": "b0f0657d-e5e5-4d4f-8105-c034940879b1",
+ "execution_count": 7,
+ "id": "fca1bd2f-bea7-4e7f-b825-4c44c5a3672f",
"metadata": {},
"outputs": [
{
@@ -139,125 +89,40 @@
" \n",
" \n",
" \n",
" \n",
" \n",
- " molecule_id \n",
- " molecule_name \n",
- " molecule_type \n",
- " entity_index \n",
+ " entity_id \n",
+ " entity_name \n",
+ " entity_type \n",
"
90 rows × 4 columns
\n", "" ], "text/plain": [ - " molecule_id molecule_name molecule_type entity_index\n", - "0 0 nan nan