stop reading many frames to get times for shifted_correlation

remove print
Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin
2025-09-05 14:47:53 +02:00 · 2025-08-14 16:43:56 +02:00 · 2025-08-14 16:35:12 +02:00 · 2025-08-14 15:20:16 +02:00 · 2025-08-09 18:43:50 +02:00 · 2025-08-09 16:11:42 +02:00
11 changed files with 191 additions and 119 deletions
--- a/README.md
+++ b/README.md
@@ -26,13 +26,9 @@ time, msd = md.correlation.shifted_correlation(
 ## Installation
 === DEPRECATED: 2025-08-19 === 
 The package requires the Python package [pygmx](https://github.com/mdevaluate/pygmx),
 which handles reading of Gromacs file formats.
 Installation of pygmx is described in its own repository.
 === DEPRECATED: 2025-08-19 === 
 The package requires the Python package [pygmx](https://github.com/mdevaluate/pygmx),
 The mdevaluate package itself is plain Python code and, hence, can be imported from its directory directly, 
 or may be installed via setuptools to the local Python environment by running
--- a/create_mdevaluate_module.sh
+++ b/create_mdevaluate_module.sh
@@ -1,71 +0,0 @@
 #!/bin/bash
 CONDA_VERSION=2024.10
 PYTHON_VERSION=3.12
 if [ -z "$1" ]; then
    echo "No argument supplied, version to create expected"
    exit 1
 fi
 if [ ! -w "/nfsopt/mdevaluate"]; then
 	echo "Please remount /nfsopt writable"
 	exit 2
 fi
 MD_VERSION=$1
 # purge evtl. loaded modules
 module purge
 echo "Create mdevaluate Python environemnt using conda"
 echo "Using conda version: $CONDA_VERSION"
 echo "Using Python version: $PYTHON_VERSION"
 module load anaconda3/$CONDA_VERSION
 conda create -y --prefix /nfsopt/mdevaluate/mdevaluate-${MD_VERSION} \
             python=$PYTHON_VERSION
 module purge
 echo "Create modulefile for mdevaluate/$MD_VERSION"
 cat > /nfsopt/modulefiles/mdevaluate/$MD_VERSION <<EOF
 #%Module1.0#####################################################################
 ##
 ## dot modulefile
 ##
 ## modulefiles/dot.  Generated from dot.in by configure.
 ##
 module-whatis	"Enables the mdevaluate Python environment."
 set version	${MD_VERSION}
 set module_path /nfsopt/mdevaluate/mdevaluate-\$version/bin
 prepend-path PATH \$module_path
 EOF
 echo "Loading mdevaluate environment and install packages"
 module load mdevaluate/${MD_VERSION}
 pip install jupyter \
 spyder \
 mdanalysis \
 pathos \
 pandas \
 dask \
 sqlalchemy \
 psycopg2-binary \
 trimesh \
 pyvista \
 seaborn \
 black \
 black[jupyter] \
 tables \
 pyedr \
 pytest
 pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/mdevaluate.git
 pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/python-store.git
 pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/python-tudplot.git
--- a/src/mdevaluate/init.py
+++ b/src/mdevaluate/init.py
@@ -16,7 +16,7 @@ from . import reader
 from . import system
 from . import utils
 from . import extra
-from .logging import logger
+from .logging_util import logger
 def open(
--- a/src/mdevaluate/autosave.py
+++ b/src/mdevaluate/autosave.py
@@ -5,7 +5,7 @@ from typing import Optional, Callable, Iterable
 import numpy as np
 from .checksum import checksum
-from .logging import logger
+from .logging_util import logger
 autosave_directory: Optional[str] = None
 load_autosave_data = False
@@ -166,8 +166,10 @@ def autosave_data(
        @functools.wraps(function)
        def autosave(*args, **kwargs):
            description = kwargs.pop("description", "")
            autosave_dir_overwrite = kwargs.pop("autosave_dir_overwrite", None)
            autosave_dir = autosave_dir_overwrite if autosave_dir_overwrite is not None else autosave_directory
            autoload = kwargs.pop("autoload", True) and load_autosave_data
-            if autosave_directory is not None:
+            if autosave_dir is not None:
                relevant_args = list(args[:nargs])
                if kwargs_keys is not None:
                    for key in [*posargs_keys, *kwargs_keys]:
--- a/src/mdevaluate/checksum.py
+++ b/src/mdevaluate/checksum.py
@@ -1,9 +1,14 @@
 import functools
 import hashlib
-from .logging import logger
+from .logging_util import logger
 from types import ModuleType, FunctionType
 import inspect
 from typing import Iterable
 import ast
 import io
 import tokenize
 import re
 import textwrap
 import numpy as np
@@ -28,19 +33,46 @@ def version(version_nr: int, calls: Iterable = ()):
    return decorator
-def strip_comments(s: str):
+def strip_comments(source: str) -> str:
-    """Strips comment lines and docstring from Python source string."""
+    """Removes docstrings, comments, and irrelevant whitespace from Python source code."""
-    o = ""
+
-    in_docstring = False
+    # Step 1: Remove docstrings using AST
-    for l in s.split("\n"):
+    def remove_docstrings(node):
-        if l.strip().startswith(("#", '"', "'")) or in_docstring:
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
-            in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1
+            if (doc := ast.get_docstring(node, clean=False)):
                first_stmt = node.body[0]
                if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant):
                    node.body.pop(0)  # Remove the docstring entirely
        for child in ast.iter_child_nodes(node):
            remove_docstrings(child)
    tree = ast.parse(textwrap.dedent(source))
    remove_docstrings(tree)
    code_without_docstrings = ast.unparse(tree)
    # Step 2: Remove comments using tokenize
    tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline)
    result = []
    last_lineno = -1
    last_col = 0
    for toknum, tokval, (srow, scol), (erow, ecol), line in tokens:
        if toknum == tokenize.COMMENT:
            continue
-        o += l + "\n"
+        if srow > last_lineno:
-    return o
+            last_col = 0
        if scol > last_col:
            result.append(" " * (scol - last_col))
        result.append(tokval)
        last_lineno, last_col = erow, ecol
    code_no_comments = ''.join(result)
    # Step 3: Remove empty lines (whitespace-only or truly blank)
    return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""])
-def checksum(*args, csum=None):
+def checksum(*args, csum=None, _seen=None):
    """
    Calculate a checksum of any object, by sha1 hash.
@@ -60,7 +92,15 @@ def checksum(*args, csum=None):
        csum = hashlib.sha1()
        csum.update(str(SALT).encode())
    if _seen is None:
        _seen = set()
    for arg in args:
        obj_id = id(arg)
        if obj_id in _seen:
            continue
        _seen.add(obj_id)
        if hasattr(arg, "__checksum__"):
            logger.debug("Checksum via __checksum__: %s", str(arg))
            csum.update(str(arg.__checksum__()).encode())
@@ -77,15 +117,15 @@ def checksum(*args, csum=None):
            for key in sorted(merged):  # deterministic ordering
                v = merged[key]
                if v is not arg:
-                    checksum(v, csum=csum)
+                    checksum(v, csum=csum, _seen=_seen)
        elif isinstance(arg, functools.partial):
            logger.debug("Checksum via partial for %s", str(arg))
-            checksum(arg.func, csum=csum)
+            checksum(arg.func, csum=csum, _seen=_seen)
            for x in arg.args:
-                checksum(x, csum=csum)
+                checksum(x, csum=csum, _seen=_seen)
            for k in sorted(arg.keywords.keys()):
                csum.update(k.encode())
-                checksum(arg.keywords[k], csum=csum)
+                checksum(arg.keywords[k], csum=csum, _seen=_seen)
        elif isinstance(arg, np.ndarray):
            csum.update(arg.tobytes())
        else:
--- a/src/mdevaluate/coordinates.py
+++ b/src/mdevaluate/coordinates.py
@@ -1,6 +1,6 @@
 from functools import partial, wraps
 from copy import copy
-from .logging import logger
+from .logging_util import logger
 from typing import Optional, Callable, List, Tuple
 import numpy as np
@@ -434,6 +434,34 @@ def center_of_masses(
        ]
    ).T[mask]
    return np.array(positions)
@map_coordinates
 def center_of_atoms(
    frame: CoordinateFrame, atom_indices=None, shear: bool = False
 ) -> NDArray:
    if atom_indices is None:
        atom_indices = list(range(len(frame)))
    res_ids = frame.residue_ids[atom_indices]
    if shear:
        coords = frame[atom_indices]
        box = frame.box
        sort_ind = res_ids.argsort(kind="stable")
        i = np.concatenate([[0], np.where(np.diff(res_ids[sort_ind]) > 0)[0] + 1])
        coms = coords[sort_ind[i]][res_ids - min(res_ids)]
        cor = pbc_diff(coords, coms, box)
        coords = coms + cor
    else:
        coords = frame.whole[atom_indices]
    mask = np.bincount(res_ids)[1:] != 0
    positions = np.array(
        [
            np.bincount(res_ids, weights=c)[1:]
            / np.bincount(res_ids)[1:]
            for c in coords.T
        ]
    ).T[mask]
    return np.array(positions)
@map_coordinates
--- a/src/mdevaluate/correlation.py
+++ b/src/mdevaluate/correlation.py
@@ -18,7 +18,7 @@ def log_indices(first: int, last: int, num: int = 100) -> np.ndarray:
    return np.unique(np.int_(ls) - 1 + first)
-@autosave_data(2)
+@autosave_data(nargs=2, kwargs_keys=('selector', 'segments', 'skip', 'window', 'average', 'points',), version=1.0)
 def shifted_correlation(
    function: Callable,
    frames: Coordinates,
@@ -147,7 +147,8 @@ def shifted_correlation(
    num_frames = int(len(frames) * window)
    ls = np.logspace(0, np.log10(num_frames + 1), num=points)
    idx = np.unique(np.int_(ls) - 1)
-    t = np.array([frames[i].time for i in idx]) - frames[0].time
+    dt = round(frames[1].time - frames[0].time, 6) # round to avoid bad floats
    t = idx * dt
    result = np.array(
        [
@@ -199,7 +200,7 @@ def msd(
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')
-def isf(
+def isf_raw(
    start_frame: CoordinateFrame,
    end_frame: CoordinateFrame,
    q: float = 22.7,
@@ -216,29 +217,59 @@ def isf(
        displacements = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
        distance = (displacements**2).sum(axis=1) ** 0.5
-        return np.sinc(distance * q / np.pi).mean()
+        return np.sinc(distance * q / np.pi)
    elif axis == "xy" or axis == "yx":
        distance = (displacements[:, [0, 1]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "xz" or axis == "zx":
        distance = (displacements[:, [0, 2]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "yz" or axis == "zy":
        distance = (displacements[:, [1, 2]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "x":
        distance = np.abs(displacements[:, 0])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    elif axis == "y":
        distance = np.abs(displacements[:, 1])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    elif axis == "z":
        distance = np.abs(displacements[:, 2])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    else:
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')
 def isf(
    start_frame: CoordinateFrame,
    end_frame: CoordinateFrame,
    q: float = 22.7,
    trajectory: Coordinates = None,
    axis: str = "all",
 ) -> float:
    """
    Incoherent intermediate scattering function averaged over all particles.
    See isf_raw for details.
    """
    return isf_raw(start_frame, end_frame, q=q, trajectory=trajectory, axis=axis).mean()
 def isf_mean_var(
    start_frame: CoordinateFrame,
    end_frame: CoordinateFrame,
    q: float = 22.7,
    trajectory: Coordinates = None,
    axis: str = "all",
 ) -> float:
    """
    Incoherent intermediate scattering function averaged over all particles and the
    variance.
    See isf_raw for details.
    """
    values = isf_raw(start_frame, end_frame, q=q, trajectory=trajectory, axis=axis)
    return values.mean(), values.var()
 def rotational_autocorrelation(
    start_frame: CoordinateFrame, end_frame: CoordinateFrame, order: int = 2
 ) -> float:
@@ -430,10 +461,11 @@ def non_gaussian_parameter(
    end_frame: CoordinateFrame,
    trajectory: Coordinates = None,
    axis: str = "all",
    full_output = False,
 ) -> float:
-    """
+    r"""
    Calculate the non-Gaussian parameter.
-    ..math:
+    .. math:
      \alpha_2 (t) =
        \frac{3}{5}\frac{\langle r_i^4(t)\rangle}{\langle r_i^2(t)\rangle^2} - 1
    """
@@ -442,27 +474,41 @@ def non_gaussian_parameter(
    else:
        vectors = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
-        r = (vectors**2).sum(axis=1)
+        r2 = (vectors**2).sum(axis=1)
        dimensions = 3
    elif axis == "xy" or axis == "yx":
-        r = (vectors[:, [0, 1]]**2).sum(axis=1)
+        r2 = (vectors[:, [0, 1]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "xz" or axis == "zx":
-        r = (vectors[:, [0, 2]]**2).sum(axis=1)
+        r2 = (vectors[:, [0, 2]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "yz" or axis == "zy":
-        r = (vectors[:, [1, 2]]**2).sum(axis=1)
+        r2 = (vectors[:, [1, 2]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "x":
-        r = vectors[:, 0] ** 2
+        r2 = vectors[:, 0] ** 2
        dimensions = 1
    elif axis == "y":
-        r = vectors[:, 1] ** 2
+        r2 = vectors[:, 1] ** 2
        dimensions = 1
    elif axis == "z":
-        r = vectors[:, 2] ** 2
+        r2 = vectors[:, 2] ** 2
        dimensions = 1
    else:
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')
    m2 = np.mean(r2)
    m4 = np.mean(r2**2)
    if m2 == 0.0:
        if full_output:
            return 0.0, 0.0, 0.0
        else:
            return 0.0
    alpha_2 = (m4 / ((1 + 2 / dimensions) * m2**2)) - 1
    if full_output:
        return alpha_2, m2, m4
    else:
        return alpha_2
    return (np.mean(r**2) / ((1 + 2 / dimensions) * (np.mean(r) ** 2))) - 1
--- a/src/mdevaluate/logging_util.py
+++ b/src/mdevaluate/logging_util.py
--- a/src/mdevaluate/pbc.py
+++ b/src/mdevaluate/pbc.py
@@ -7,7 +7,7 @@ from numpy.typing import ArrayLike, NDArray
 from itertools import product
-from .logging import logger
+from .logging_util import logger
 if TYPE_CHECKING:
    from mdevaluate.coordinates import CoordinateFrame
--- a/src/mdevaluate/reader.py
+++ b/src/mdevaluate/reader.py
@@ -19,13 +19,13 @@ import MDAnalysis
 from scipy import sparse
 from .checksum import checksum
-from .logging import logger
+from .logging_util import logger
 from . import atoms
 from .coordinates import Coordinates
 CSR_ATTRS = ("data", "indices", "indptr")
 NOJUMP_MAGIC = 2016
-Group_RE = re.compile("\[ ([-+\w]+) \]")
+Group_RE = re.compile(r"\[ ([-+\w]+) \]")
 class NojumpError(Exception):
@@ -275,7 +275,7 @@ def load_nojump_matrices(reader: BaseReader):
                "Loaded Nojump matrices: {}".format(nojump_load_filename(reader))
            )
        else:
-            logger.info("Invlaid Nojump Data: {}".format(nojump_load_filename(reader)))
+            logger.info("Invalid Nojump Data: {}".format(nojump_load_filename(reader)))
    except KeyError:
        logger.info("Removing zip-File: %s", zipname)
        os.remove(nojump_load_filename(reader))
--- a/src/mdevaluate/utils.py
+++ b/src/mdevaluate/utils.py
@@ -14,7 +14,7 @@ from scipy.ndimage import uniform_filter1d
 from scipy.interpolate import interp1d
 from scipy.optimize import curve_fit
-from .logging import logger
+from .logging_util import logger
 from .functions import kww, kww_1e
@@ -357,6 +357,37 @@ def quick1etau(t: ArrayLike, C: ArrayLike, n: int = 7) -> float:
    return tau_est
 def quicknongaussfit(t, C, width=2):
    """
    Estimates the time and height of the peak in the non-Gaussian function.
    C is C(t) the correlation function
    """
    def ffunc(t,y0,A_main,log_tau_main,sig_main):
        main_peak = A_main*np.exp(-(t - log_tau_main)**2 / (2 * sig_main**2))
        return y0 + main_peak
    # first rough estimate, the closest time. This is returned if the interpolation fails!
    tau_est = t[np.argmax(C)]
    nG_max = np.amax(C)
    try:
        with np.errstate(invalid='ignore'):
            corr = C[t > 0]
            time = np.log10(t[t > 0])
            tau = time[np.argmax(corr)]
            mask = (time>tau-width/2) & (time<tau+width/2)
            time = time[mask] ; corr = corr[mask]
            nG_min = C[t > 0].min()
            guess = [nG_min, nG_max-nG_min, tau, 0.6]
            popt = curve_fit(ffunc, time, corr, p0=guess, maxfev=10000)[0]
            tau_est = 10**popt[-2]
            nG_max = popt[0] + popt[1]
    except:
        pass
    if np.isnan(tau_est):
        tau_est = np.inf
    return tau_est, nG_max
 def susceptibility(
    time: NDArray, correlation: NDArray, **kwargs
 ) -> tuple[NDArray, NDArray]:
Author	SHA1	Message	Date
robrobo	6b7641f152	stop reading many frames to get times for shifted_correlation	2025-09-05 14:47:53 +02:00
robrobo	cd7097ad46	remove print	2025-08-14 16:43:56 +02:00
robrobo	a0ca2d8657	Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin	2025-08-14 16:35:12 +02:00
robrobo	0f47475f22	quick version of center_of_masses for equal weights	2025-08-14 15:20:16 +02:00
robrobo	f6ff7606ad	decorator for autosave_data now consumes and autosave_dir_overwrite. this can enable autosave by itself and takes precedence over the enable(dir) value. provided the same way as the description	2025-08-09 18:43:50 +02:00
robrobo	96c624efee	Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin	2025-08-09 16:11:42 +02:00
robrobo	accb43d7e6	Merge branch 'refactor_logging' into feature/compatibility_robin	2025-08-09 13:58:15 +02:00
robrobo	07b14a6cd6	Merge branch 'main' into feature/compatibility_robin	2025-08-09 13:57:29 +02:00
robrobo	e124506d10	fixed typo in logging output	2025-08-09 13:54:23 +02:00
robrobo	8169e76964	Merge branch 'main' into refactor_logging	2025-08-09 13:52:58 +02:00
robrobo	9f6af2af11	corrected spelling	2025-07-17 20:21:43 +02:00
robrobo	0ffce2f17a	added full_output option to non gaussian parameter so that correct statistics can be done afterwards	2025-07-17 18:45:11 +02:00
robrobo	0eff84910b	simplified quick non gaussian fit function so that it is more robust	2025-07-17 18:44:20 +02:00
robrobo	dae2d6ed95	created wrappers around the isf to allow for access of more raw data and the variance for accurate kai4 calculation	2025-07-17 12:50:49 +02:00
robrobo	ec4094cd92	added quick estimation for peak tau and height of non Gaussian	2025-07-17 12:34:43 +02:00
robrobo	00043637e9	added a _seen set to avoid infinite recursions due to function arguments; also, applied pbc_diff to neighbors in tetrahedral order	2025-07-11 20:54:27 +02:00
robrobo	7585e598dc	dedent before ast.parse for non-toplevel functions	2025-06-17 01:02:15 +02:00
robrobo	6d8b86c1ef	extended checksum.strip_comments function to work with prefixed docstrings and other small features	2025-06-16 22:15:22 +02:00
robrobo	a2a0ae8d7b	renamed logging to loggin_util to avoid circular import with python logging in some cases; added two raw strings to docstrings and fixed a sphinx syntax in one	2025-06-16 21:00:42 +02:00