Skip to content

Commit

Permalink
Including saccharides
Browse files Browse the repository at this point in the history
  • Loading branch information
dprada committed Dec 26, 2024
1 parent d8d8ab0 commit abe38a1
Show file tree
Hide file tree
Showing 63 changed files with 561 additions and 1,078 deletions.
2 changes: 1 addition & 1 deletion molsysmt/_private/exceptions/argument_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ArgumentError(Exception):
.. admonition:: See Also
:class: attention
:ref:`Developer Guide \> Exceptions \> BadCallError <developer:exceptions:BadCallError>`
:ref:`Developer Guide > Exceptions > BadCallError <developer:exceptions:BadCallError>`
"""

Expand Down
8 changes: 7 additions & 1 deletion molsysmt/build/get_missing_bonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'
from molsysmt.element.group.amino_acid import get_bonded_atom_pairs as _bonds_in_amino_acid
from molsysmt.element.group.terminal_capping import get_bonded_atom_pairs as _bonds_in_terminal_capping
from molsysmt.element.group.small_molecule import get_bonded_atom_pairs as _bonds_in_small_molecule
from molsysmt.element.group.saccharide import get_bonded_atom_pairs as _bonds_in_saccharide
from molsysmt.element.group.terminal_capping import is_n_terminal_capping, is_c_terminal_capping

old_bonds = get(molecular_system, selection=selection, inner_bonded_atom_pairs=True)
Expand Down Expand Up @@ -121,7 +122,12 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'

elif group_type=='saccharide':

raise NotImplementedError('Group type "saccharide" not implemented')
aux_bonds = _bonds_in_saccharide(group_name, atom_names, atom_indices, sorted=False)
if aux_bonds is None:
aux_bonds = _bonds_in_unknown_group(molecular_system, atom_indices, atom_names,
structure_index=structure_index, threshold=threshold,
sorted=False)
bonds += aux_bonds

elif group_type=='oligosaccharide':

Expand Down
Binary file added molsysmt/data/databases/saccharides/0.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/1.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/2.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/3.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/4.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/5.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/6.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/7.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/8.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/9.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/A.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/B.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/C.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/D.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/E.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/F.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/G.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/H.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/I.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/J.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/K.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/L.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/M.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/N.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/O.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/P.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/Q.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/R.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/S.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/T.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/U.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/V.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/W.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/X.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/Y.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/saccharides/Z.pkl.gz
Binary file not shown.
Binary file not shown.
67 changes: 67 additions & 0 deletions molsysmt/data/databases/saccharides/make_saccharides_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# wget https://files.wwpdb.org/pub/pdb/data/monomers/components.cif

from molsysmt.native import CIFFileHandler
import pickle

handler = CIFFileHandler('../components.cif')
cif_entries = handler.parse()

types_dict = {}

for ii in cif_entries:
jj = cif_entries[ii]['_chem_comp']['type']
if jj in types_dict:
types_dict[jj].append(ii)
else:
types_dict[jj] = [ii]

# Adding only non-polymer and other
types = ['saccharide', 'D-saccharide', '"L-saccharide, beta linking"', '"L-saccharide, alpha linking"',
'D-SACCHARIDE', 'L-saccharide', '"D-saccharide, beta linking"', '"D-saccharide, alpha linking"',
'L-SACCHARIDE', 'SACCHARIDE']

data = []
for ii in types:
data += types_dict[ii]

output = {}

for value in data:

key = cif_entries[value]

tmp_dict={}

tmp_dict['name']=key['_chem_comp']['name']
tmp_dict['topology']=[]
tmp_dict['topology'].append({'atoms':key['_chem_comp_atom']['atom_id'], 'bonds':[]})
tmp_dict['topology'].append({'atoms':key['_chem_comp_atom']['alt_atom_id'], 'bonds':[]})
if '_chem_comp_bond' in key:
if not isinstance(key['_chem_comp_bond']['atom_id_1'],list):
key['_chem_comp_bond']['atom_id_1'] = [key['_chem_comp_bond']['atom_id_1']]
key['_chem_comp_bond']['atom_id_2'] = [key['_chem_comp_bond']['atom_id_2']]
for atom1,atom2 in zip(key['_chem_comp_bond']['atom_id_1'], key['_chem_comp_bond']['atom_id_2']):
tmp_dict['topology'][0]['bonds'].append([atom1,atom2])
ii = tmp_dict['topology'][0]['atoms'].index(atom1)
jj = tmp_dict['topology'][0]['atoms'].index(atom2)
atom1 = tmp_dict['topology'][1]['atoms'][ii]
atom2 = tmp_dict['topology'][1]['atoms'][jj]
tmp_dict['topology'][1]['bonds'].append([atom1,atom2])

output[value]=tmp_dict

split_output = {}
for name,value in output.items():
if name[0] not in split_output:
split_output[name[0]]={}
split_output[name[0]][name]=value

import gzip

for file_name, aux_output in split_output.items():
with gzip.open(file_name+'.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(aux_output, fff)

with gzip.open('group_names.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(sorted(list(output.keys())), fff)

Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
else:
types_dict[jj] = [ii]

# Adding only non-polymer and other
types = ['NON-POLYMER', 'non-polymer', 'other']
data = []
for ii in types:
data += types_dict[ii]
data.remove('UNL')

# quito iones
# removing ions
ions = []
for value in data:
key = cif_entries[value]
Expand All @@ -31,6 +32,13 @@
for ion in ions:
data.remove(ion)

# removing saccharides
types = ['saccharide', 'D-saccharide', '"L-saccharide, beta linking"', '"L-saccharide, alpha linking"',
'D-SACCHARIDE', 'L-saccharide', '"D-saccharide, beta linking"', '"D-saccharide, alpha linking"',
'L-SACCHARIDE', 'SACCHARIDE', '']



output = {}

for value in data:
Expand Down
9 changes: 8 additions & 1 deletion molsysmt/element/atom/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,9 @@
'O2D' : 'O',
'O31' : 'O',
'O32' : 'O',
'O1P' : 'O',
'O2P' : 'O',
'O3P' : 'O',
'OT1' : 'O',
'OT2' : 'O',
'OC1' : 'O',
Expand Down Expand Up @@ -512,6 +515,10 @@
'CL' : 'CL',
'Cl' : 'CL',
'CLA' : 'CL',
'FE' : 'FE'
'FE' : 'FE',

## Unknown
'UNK' : 'X',

}

2 changes: 1 addition & 1 deletion molsysmt/element/component/get_component_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _get_component_type_from_group_names_and_types(group_names, group_types):
last_group_type = group_types[-1]
first_group_name = group_names[0]

if first_group_type in ['water', 'ion', 'small molecule', 'lipid']:
if first_group_type in ['water', 'ion', 'small molecule', 'lipid', 'saccharide']:
tmp_type = first_group_type
elif (first_group_type == 'amino acid') or (first_group_type == 'terminal capping'):
if first_group_type == 'terminal capping':
Expand Down
3 changes: 2 additions & 1 deletion molsysmt/element/group/get_bonded_atom_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def get_bonded_atom_pairs(group_name, atom_names, atom_indices=None, sorted=True
from .small_molecule import get_bonded_atom_pairs as get_bonded_atom_pairs_from_small_molecule
from .terminal_capping import get_bonded_atom_pairs as get_bonded_atom_pairs_from_terminal_capping
from .water import get_bonded_atom_pairs as get_bonded_atom_pairs_from_water
from .saccharide import get_bonded_atom_pairs as get_bonded_atom_pairs_from_saccharide

group_type = get_group_type_from_group_name(group_name)

Expand Down Expand Up @@ -38,7 +39,7 @@ def get_bonded_atom_pairs(group_name, atom_names, atom_indices=None, sorted=True

case 'saccharide':

pass
bonds = get_bonded_atom_pairs_from_saccharide(group_name, atom_names, atom_indices=atom_indices, sorted=sorted)

case 'small molecule':

Expand Down
4 changes: 3 additions & 1 deletion molsysmt/element/group/saccharide/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .saccharide_names import saccharide_names
from .is_saccharide import is_saccharide
from .group_names import group_names
from .get_group_db import get_group_db
from .get_bonded_atom_pairs import get_bonded_atom_pairs
45 changes: 45 additions & 0 deletions molsysmt/element/group/saccharide/get_bonded_atom_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np

_sorted=sorted

def get_bonded_atom_pairs(group_name, atom_names, atom_indices=None, sorted=True):

from . import group_names, get_group_db

if group_name not in group_names:
raise ValueError

if atom_indices is None:
atom_indices = np.arange(len(atom_names), dtype=int).tolist()

db = get_group_db(group_name)

is_in = -1
for ii,jj in enumerate(db['topology']):
if np.all(np.isin(atom_names, jj['atoms'])):
is_in=ii
break

if is_in!=-1:

bonds = []
for ii,jj in db['topology'][is_in]['bonds']:
if ii in atom_names:
if jj in atom_names:
iii = atom_indices[atom_names.index(ii)]
jjj = atom_indices[atom_names.index(jj)]
if iii<jjj:
bonds.append([iii,jjj])
else:
bonds.append([jjj,iii])

else:

print(group_name, atom_names)

raise ValueError

if sorted:
return _sorted(bonds)
else:
return bonds
22 changes: 22 additions & 0 deletions molsysmt/element/group/saccharide/get_group_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pickle
import sys
import gzip
import numpy as np
from molsysmt.element.group.saccharide import group_names

from importlib.resources import files
def path(package, file):
return files(package).joinpath(file)


def get_group_db(group_name):

if group_name not in group_names:
raise ValueError

with gzip.open(path('molsysmt.data.databases.saccharides',group_name[0]+'.pkl.gz'), 'rb') as fff:
dbs = pickle.load(fff)

db = dbs[group_name]

return db
15 changes: 15 additions & 0 deletions molsysmt/element/group/saccharide/group_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pickle
import sys
import gzip

from importlib.resources import files
def path(package, file):
return files(package).joinpath(file)

try:
with gzip.open(path('molsysmt.data.databases.saccharides','group_names.pkl.gz'), 'rb') as fff:
group_names = pickle.load(fff)
except:
group_names = None
print('The file molsysmt.data.databases.saccharides.group_names.pkl.gz was not loaded.')

7 changes: 5 additions & 2 deletions molsysmt/element/group/saccharide/is_saccharide.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from .saccharide_names import saccharide_names
from .group_names import group_names

def is_saccharide(name):
"""
To be written soon...
"""

return (name in saccharide_names)
return (name in group_names)

3 changes: 0 additions & 3 deletions molsysmt/element/group/saccharide/saccharide_names.py

This file was deleted.

3 changes: 3 additions & 0 deletions molsysmt/element/molecule/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
'dna',
'rna',
'lipid',
'saccharide',
'oligosaccharide'
]

Expand All @@ -37,6 +38,7 @@
'dna': 'dnas',
'rna': 'rnas',
'lipid': 'lipids',
'saccharide': 'saccharides',
'oligosaccharide': 'oligosaccharides',
}

Expand All @@ -49,6 +51,7 @@
'dnas': 'dna',
'rnas': 'rna',
'lipids': 'lipid',
'saccharides': 'saccharide',
'oligosaccharides': 'oligosaccharide',
}

Expand Down
Loading

0 comments on commit abe38a1

Please sign in to comment.