Skip to content

Commit

Permalink
Merge pull request #8 from gentangle/opt_traj
Browse files Browse the repository at this point in the history
[FEATURE] compute CM directly in trajectory
  • Loading branch information
loscati committed Mar 13, 2024
2 parents 9db3c77 + a3fd346 commit 877925d
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 104 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Changelog
=========

* Switching from `logging` to `loguru` for logging
* Faster frame section when using `trajectory`
* Now `trajectory` can handle more `mask` options to select which frame use to compute GE: a list of frames, a numpy bool with selected frames, a dictionary with `start`, `stop` and `step` keys
* `trajectory` now is consistent with `singlechain`: both automatically computes the contact map for the given topology

v0.8.7 (2024-03-13)
------------------------------------------------------------

Expand Down
9 changes: 8 additions & 1 deletion pyge/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
__version__ = "0.8.7"
__version__ = "0.8.7"

import sys

from loguru import logger

logger.remove(0)
logger.add(sys.stderr, level="INFO")
10 changes: 4 additions & 6 deletions pyge/contacts/pdb_parser.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
"""Parser to load residue list from PDB."""

import logging
import sys
from typing import List

import Bio.PDB as pdb
from loguru import logger

from pyge.contacts.protein_letters import protein_letters_3to1

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)


def get_residues(
file, model_id, chain_id, to_include=None, to_ignore=None, debug=False
Expand Down Expand Up @@ -58,6 +55,7 @@ def get_residues(
res_list : List[Bio.PDB.Residue.Residue]
List of residue object
"""
file = str(file)
if to_include is None:
to_include = []
if to_ignore is None:
Expand Down Expand Up @@ -108,7 +106,7 @@ def get_residues(

elif residue.get_resname() in to_include:
if debug:
logging.debug(
logger.info(
(
f"Residue {residue.get_resname()} with resID {residue.id[1]} "
"has been INCLUDED because specified "
Expand All @@ -119,7 +117,7 @@ def get_residues(

elif residue.get_resname() in to_ignore:
if debug:
logging.debug(
logger.info(
(
f"Residue {residue.get_resname()} with resID {residue.id[1]} "
"has been EXCLUDED because specified "
Expand Down
119 changes: 66 additions & 53 deletions pyge/singlechain.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,17 @@
The user is responsible for this matter.
"""

import logging
import re
import sys
from dataclasses import dataclass
from typing import List

import MDAnalysis as mda
from loguru import logger

from pyge import gent
from pyge.contacts.contactmap import compute_contactmap
from pyge.gent import GE, GETermini

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)


@dataclass
class GEChain:
Expand All @@ -33,6 +30,63 @@ class GEChain:
global_weighted: GE


def _check_cm_options(cm_options):
"""
Check the consistency of the input options for chain modeling.
Parameters
----------
cm_options : dict
A dictionary containing the options for chain modeling.
Raises
---------
ValueError: If the 'chain_id' or 'threshold' keywords are missing.
"""

# Setup variables
if "model_id" not in cm_options:
cm_options["model_id"] = 1
logger.warning("You did not provide the model_id, using the default value (1)")
if "chain_id" not in cm_options:
raise ValueError("You must provide the keyword 'chain_id'")
if "threshold" not in cm_options:
raise ValueError("You must provide the keyword 'threshold'")
if "to_include" not in cm_options:
cm_options["to_include"] = None
if "to_ignore" not in cm_options:
cm_options["to_ignore"] = None
if "pdb_for_cm" not in cm_options:
cm_options["pdb_for_cm"] = None


def _check_altloc(selection_options, cm_options):
"""Check for altloc consistency and return the selection string.
See the docstring of the ge_from_pdb function for more details.
"""
# default altloc if not modified by the user
altloc = None
if selection_options is not None:
# Retrive the altloc option from the section_options or
# from cm_options
if "altloc" in selection_options:
search = re.search("altloc", selection_options)
altloc = selection_options[search.span()[1] : search.span()[1] + 2].strip(
" "
)
assert len(altloc) == 1
elif "altloc" in cm_options:
altloc = cm_options["altloc"]
selection = selection_options
else:
selection = ""
if "altloc" in cm_options:
altloc = cm_options["altloc"]

return selection, altloc


def _ca_selection_from_topology(
topology_file, selection_options=None, trajectory_file=None
):
Expand Down Expand Up @@ -69,7 +123,7 @@ def _ca_selection_from_topology(
universe = mda.Universe(str(topology_file))

ca_selection = universe.select_atoms("name CA" + selection_options)
logging.debug(f"Number of CA atoms selected: {len(ca_selection)}")
logger.info(f"Number of CA atoms selected: {len(ca_selection)}")

return universe, ca_selection

Expand Down Expand Up @@ -154,60 +208,19 @@ def ge_from_pdb(pdb_file, ge_options, cm_options, selection_options=None):
global_weighted : GE
Same structure as above for the weighted GE of the whole chain
"""
# Setup variables
if "model_id" in cm_options:
model_id = cm_options["model_id"]
else:
model_id = 1
logging.warning(
"WARNING: you did not provide the model_id, using the default value (1)"
)
if "chain_id" in cm_options:
chain_id = cm_options["chain_id"]
else:
raise ValueError("You must provide the keyword 'chain_id'")
if "threshold" in cm_options:
threshold = cm_options["threshold"]
else:
raise ValueError("You must provide the keyword 'threshold'")
if "to_include" in cm_options:
to_include = cm_options["to_include"]
else:
to_include = None
if "to_ignore" in cm_options:
to_ignore = cm_options["to_ignore"]
else:
to_ignore = None

# default altloc if not modified by the user
altloc = None
if selection_options is not None:
# Retrive the altloc option from the section_options or
# from cm_options
if "altloc" in selection_options:
search = re.search("altloc", selection_options)
altloc = selection_options[search.span()[1] : search.span()[1] + 2].strip(
" "
)
assert len(altloc) == 1
elif "altloc" in cm_options:
altloc = cm_options["altloc"]
selection = selection_options
else:
selection = ""
if "altloc" in cm_options:
altloc = cm_options["altloc"]
_check_cm_options(cm_options)
selection, altloc = _check_altloc(selection_options, cm_options)

pdb_file = str(pdb_file)
_, ca_selection = _ca_selection_from_topology(pdb_file, selection)
cm = compute_contactmap(
pdb_file,
model_id,
chain_id,
threshold,
cm_options["model_id"],
cm_options["chain_id"],
cm_options["threshold"],
altloc=altloc,
to_include=to_include,
to_ignore=to_ignore,
to_include=cm_options["to_include"],
to_ignore=cm_options["to_ignore"],
)

if "thr_min_len" in ge_options:
Expand Down
Loading

0 comments on commit 877925d

Please sign in to comment.