diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c889e18..ebe2dbc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### 1.7.5 - 27/10/2024 + +* Improves the tensor->PDB writer (`graphein.protein.tensor.io.to_pdb`) by automatically unravelling residue-level b-factor predictions/annotations ([#352](https://github.com/a-r-j/pull/352)). + ### 1.7.4 - 26/10/2023 * Adds support for PyG 2.4+ ([#350](https://www.github.com/a-r-j/graphein/pull/339)) diff --git a/docs/source/conf.py b/docs/source/conf.py index 72e29a35..2c59d589 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -34,7 +34,7 @@ copyright = f"{datetime.datetime.now().year}, {author}" # The full version, including alpha/beta/rc tags -release = "1.7.4" +release = "1.7.5" # -- General configuration --------------------------------------------------- diff --git a/graphein/__init__.py b/graphein/__init__.py index a760bf35..8ac51f3e 100644 --- a/graphein/__init__.py +++ b/graphein/__init__.py @@ -12,7 +12,7 @@ from .testing import * __author__ = "Arian Jamasb " -__version__ = "1.7.4" +__version__ = "1.7.5" logger.configure( diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index d7021a32..567e8c5f 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -392,7 +392,9 @@ def to_dataframe( :param insertions: List of insertion codes, defaults to ``None`` (``""``). :type insertions: Optional[List[Union[str, float]]], optional :param b_factors: List or tensor of b factors (length: num residues), - defaults to ``None`` (``""``). + defaults to ``None`` (``""``). If ``b_factors`` is of length/shape + number of residues (as opposed to number of atoms) it is automatically + unravelled to the correct length. :type b_factors: Optional[List[Union[str, float]]], optional :param occupancy: List or tensor of occupancy values (length: num residues), defaults to ``None`` (``1.0``). @@ -434,12 +436,25 @@ def to_dataframe( element_symbols = [ELEMENT_SYMBOL_MAP[a] for a in atom_type] chains = ["A"] * len(res_nums) if chains is None else chains[res_nums - 1] + if b_factors is not None: + num_b_factors = ( + len(b_factors) + if isinstance(b_factors, list) + else b_factors.shape[0] + ) + b_factors = ( + b_factors[res_nums - 1] + if num_b_factors == x.shape[0] + else b_factors + ) + if isinstance(b_factors, torch.Tensor): + b_factors = b_factors.tolist() + else: + b_factors = [0.0] * len(res_nums) if segment_id is None: segment_id = [""] * len(res_nums) if insertions is None: insertions = [""] * len(res_nums) - if b_factors is None: - b_factors = [0.0] * len(res_nums) if occupancy is None: occupancy = [1.0] * len(res_nums) if charge is None: @@ -480,7 +495,6 @@ def to_dataframe( "line_idx": atom_number, } df = pd.DataFrame().from_dict(out) - if biopandas: ppdb = PandasPdb() ppdb.df["ATOM"] = df @@ -501,7 +515,7 @@ def to_pdb(x: AtomTensor, out_path: str, gz: bool = False, **kwargs): :type x: AtomTensor :param out_path: Path to output pdb file. :type out_path: str - :param gz: Whether to gzip out the ouput, defaults to ``False``. + :param gz: Whether to gzip out the output, defaults to ``False``. :type gz: bool, optional :param kwargs: Keyword args for :func:`graphein.protein.tensor.to_dataframe` """ diff --git a/setup.py b/setup.py index 520cb3d3..8ef0238f 100644 --- a/setup.py +++ b/setup.py @@ -134,7 +134,7 @@ def run(self): setup( name="graphein", - version="1.7.4", + version="1.7.5", description="Protein & Interactomic Graph Construction for Machine Learning", long_description=long_description, long_description_content_type="text/markdown",