Skip to content

Commit

Permalink
In process
Browse files Browse the repository at this point in the history
  • Loading branch information
dprada committed Feb 19, 2024
1 parent e855dde commit e3b7660
Show file tree
Hide file tree
Showing 122 changed files with 234 additions and 5,437 deletions.
Binary file added molsysmt/data/databases/amino_acids/0.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/1.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/2.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/3.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/4.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/5.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/6.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/7.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/8.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/9.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/A.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/B.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/C.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/D.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/E.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/F.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/G.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/H.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/I.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/J.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/K.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/L.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/M.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/N.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/O.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/P.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/Q.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/R.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/S.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/T.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/U.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/V.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/W.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/X.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/Y.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/amino_acids/Z.pkl.gz
Binary file not shown.
Binary file not shown.
File renamed without changes.
Binary file removed molsysmt/data/databases/components.pkl
Binary file not shown.
Binary file added molsysmt/data/databases/ions/1.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/2.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/3.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/4.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/6.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/A.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/B.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/C.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/D.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/E.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/F.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/G.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/H.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/I.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/K.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/L.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/M.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/N.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/O.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/P.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/R.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/S.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/T.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/U.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/V.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/W.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/Y.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/Z.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/ions/group_names.pkl.gz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,17 @@

aux_dict['name']=key['_chem_comp']['name']
aux_dict['three_letter_code']=key['_chem_comp']['three_letter_code']
aux_dict['formal_charge']=float(key['_chem_comp']['pdbx_formal_charge'])
aux_dict['atom_name']=key['_chem_comp_atom']['atom_id']
aux_dict['alt_atom_name']=key['_chem_comp_atom']['alt_atom_id']
aux_dict['atom_type']=key['_chem_comp_atom']['type_symbol']
aux_dict['charge']=[float(ii) for ii in key['_chem_comp_atom']['charge']]
try:
charge = []
for ii in key['_chem_comp_atom']['charge']:
charge.append(float(ii))
aux_dict['charge']=charge
except:
aux_dict['charge']=[]
if '_chem_comp_bond' in key:
aux_dict['bonds']=[[ii,jj] for ii,jj in zip(key['_chem_comp_bond']['atom_id_1'],
key['_chem_comp_bond']['atom_id_2'])]
Expand All @@ -56,7 +63,18 @@
#with open('small_molecules_db.pkl', 'wb') as fff:
# pickle.dump(output, fff)

split_output = {}
for name,value in output.items():
if name[0] not in split_output:
split_output[name[0]]={}
split_output[name[0]][name]=value

import gzip
with gzip.open('amino_acids_db.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(output, fff)

for file_name, aux_output in split_output.items():
with gzip.open('amino_acids/'+file_name+'.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(aux_output, fff)

with gzip.open('amino_acids/group_names.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(list(output.keys()), fff)

Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,34 @@
data += types_dict[ii]
data.remove('UNL')

# quito iones
ions = []
for value in data:
key = cif_entries[value]
if key['_chem_comp']['name'].endswith(' ION"') or key['_chem_comp']['name'].endswith(' ion"'):
ions.append(value)

output = {}

for value in data:
for value in ions:

key = cif_entries[value]

aux_dict={}

aux_dict['name']=key['_chem_comp']['name']
aux_dict['three_letter_code']=key['_chem_comp']['three_letter_code']
aux_dict['formal_charge']=float(key['_chem_comp']['pdbx_formal_charge'])
aux_dict['atom_name']=key['_chem_comp_atom']['atom_id']
aux_dict['alt_atom_name']=key['_chem_comp_atom']['alt_atom_id']
aux_dict['atom_type']=key['_chem_comp_atom']['type_symbol']
aux_dict['charge']=[float(ii) for ii in key['_chem_comp_atom']['charge']]
try:
charge = []
for ii in key['_chem_comp_atom']['charge']:
charge.append(float(ii))
aux_dict['charge']=charge
except:
aux_dict['charge']=[]
if '_chem_comp_bond' in key:
aux_dict['bonds']=[[ii,jj] for ii,jj in zip(key['_chem_comp_bond']['atom_id_1'],
key['_chem_comp_bond']['atom_id_2'])]
Expand All @@ -46,7 +60,19 @@
#with open('small_molecules_db.pkl', 'wb') as fff:
# pickle.dump(output, fff)

split_output = {}
for name,value in output.items():
if name[0] not in split_output:
split_output[name[0]]={}
split_output[name[0]][name]=value

import gzip
with gzip.open('small_molecules_db.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(output, fff)

for file_name, aux_output in split_output.items():
with gzip.open('ions/'+file_name+'.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(aux_output, fff)

with gzip.open('ions/group_names.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(list(output.keys()), fff)


81 changes: 81 additions & 0 deletions molsysmt/data/databases/make_small_molecules_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# wget https://files.wwpdb.org/pub/pdb/data/monomers/components.cif

from molsysmt.native import CIFFileHandler
import pickle

handler = CIFFileHandler('components.cif')
cif_entries = handler.parse()

types_dict = {}

for ii in cif_entries:
jj = cif_entries[ii]['_chem_comp']['type']
if jj in types_dict:
types_dict[jj].append(ii)
else:
types_dict[jj] = [ii]

types = ['NON-POLYMER', 'non-polymer', 'other']
data = []
for ii in types:
data += types_dict[ii]
data.remove('UNL')

# quito iones
ions = []
for value in data:
key = cif_entries[value]
if key['_chem_comp']['name'].endswith(' ION"') or key['_chem_comp']['name'].endswith(' ion"'):
ions.append(value)

for ion in ions:
data.remove(ion)

output = {}

for value in data:

key = cif_entries[value]

aux_dict={}

aux_dict['name']=key['_chem_comp']['name']
aux_dict['three_letter_code']=key['_chem_comp']['three_letter_code']
aux_dict['formal_charge']=float(key['_chem_comp']['pdbx_formal_charge'])
aux_dict['atom_name']=key['_chem_comp_atom']['atom_id']
aux_dict['alt_atom_name']=key['_chem_comp_atom']['alt_atom_id']
aux_dict['atom_type']=key['_chem_comp_atom']['type_symbol']
try:
charge = []
for ii in key['_chem_comp_atom']['charge']:
charge.append(float(ii))
aux_dict['charge']=charge
except:
aux_dict['charge']=[]
if '_chem_comp_bond' in key:
aux_dict['bonds']=[[ii,jj] for ii,jj in zip(key['_chem_comp_bond']['atom_id_1'],
key['_chem_comp_bond']['atom_id_2'])]
else:
aux_dict['bonds']=[]

output[value]=aux_dict

#with open('small_molecules_db.pkl', 'wb') as fff:
# pickle.dump(output, fff)

split_output = {}
for name,value in output.items():
if name[0] not in split_output:
split_output[name[0]]={}
split_output[name[0]][name]=value

import gzip

for file_name, aux_output in split_output.items():
with gzip.open('small_molecules/'+file_name+'.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(aux_output, fff)

with gzip.open('small_molecules/group_names.pkl.gz', 'wb', compresslevel=9) as fff:
pickle.dump(list(output.keys()), fff)


Binary file added molsysmt/data/databases/small_molecules/0.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/1.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/2.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/3.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/4.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/5.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/6.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/7.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/8.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/9.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/A.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/B.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/C.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/D.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/E.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/F.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/G.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/H.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/I.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/J.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/K.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/L.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/M.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/N.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/O.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/P.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/Q.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/R.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/S.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/T.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/U.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/V.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/W.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/X.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/Y.pkl.gz
Binary file not shown.
Binary file added molsysmt/data/databases/small_molecules/Z.pkl.gz
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion molsysmt/element/group/amino_acid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .names import names
from .get_amino_acid_type_from_name import get_amino_acid_type_from_name
from .get_1_letter_code_from_name import get_1_letter_code_from_name
from .is_amino_acid import is_amino_acid
from .group_names import group_names

19 changes: 19 additions & 0 deletions molsysmt/element/group/amino_acid/group_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pickle
import sys
import gzip

if sys.version_info[1]==10:
from importlib.resources import files
def path(package, file):
return files(package).joinpath(file)
elif sys.version_info[1] in (8,9):
from pathlib import PurePath
parent = PurePath(__file__).parent
def path(package, file):
data_dir = package.split('.')[-1]
return parent.joinpath('../data/'+data_dir+'/'+file).__str__()


with gzip.open(path('molsysmt.data.databases.amino_acids','group_names.pkl.gz'), 'rb') as fff:
group_names = pickle.load(fff)

4 changes: 2 additions & 2 deletions molsysmt/element/group/amino_acid/is_amino_acid.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from .get_amino_acid_type_from_name import name_to_type
from .group_names import group_names

def is_amino_acid(name):
"""
To be written soon...
"""
return (name in name_to_type)
return (name in group_names)

5 changes: 0 additions & 5 deletions molsysmt/element/group/amino_acid/names.py

This file was deleted.

2 changes: 1 addition & 1 deletion molsysmt/element/group/ion/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .ion_names import ion_names
from .is_ion import is_ion
from .group_names import group_names
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pickle
import sys
import gzip

if sys.version_info[1]==10:
from importlib.resources import files
Expand All @@ -13,6 +14,6 @@ def path(package, file):
return parent.joinpath('../data/'+data_dir+'/'+file).__str__()


with open(path('molsysmt.data.databases','components.pkl'), 'rb') as fff:
small_molecule_names = pickle.load(fff)
with gzip.open(path('molsysmt.data.databases.ions','group_names.pkl.gz'), 'rb') as fff:
group_names = pickle.load(fff)

4 changes: 2 additions & 2 deletions molsysmt/element/group/ion/is_ion.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .ion_names import ion_names
from .group_names import group_names

def is_ion(name):
"""
To be written soon...
"""

return (name in ion_names)
return (name in group_names)

3 changes: 2 additions & 1 deletion molsysmt/element/group/small_molecule/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .small_molecule_names import small_molecule_names
from .is_small_molecule import is_small_molecule
from .group_names import group_names

Binary file not shown.
19 changes: 19 additions & 0 deletions molsysmt/element/group/small_molecule/group_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pickle
import sys
import gzip

if sys.version_info[1]==10:
from importlib.resources import files
def path(package, file):
return files(package).joinpath(file)
elif sys.version_info[1] in (8,9):
from pathlib import PurePath
parent = PurePath(__file__).parent
def path(package, file):
data_dir = package.split('.')[-1]
return parent.joinpath('../data/'+data_dir+'/'+file).__str__()


with gzip.open(path('molsysmt.data.databases.small_molecules','group_names.pkl.gz'), 'rb') as fff:
group_names = pickle.load(fff)

4 changes: 2 additions & 2 deletions molsysmt/element/group/small_molecule/is_small_molecule.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from .small_molecule_names import small_molecule_names
from .group_names import group_names

def is_small_molecule(name):
"""
To be written soon...
"""

return (name in small_molecule_names)
return (name in group_names)

Binary file not shown.
Loading

0 comments on commit e3b7660

Please sign in to comment.