stop reading many frames to get times for shifted_correlation

remove print
Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin
2025-09-05 14:47:53 +02:00 · 2025-08-14 16:43:56 +02:00 · 2025-08-14 16:35:12 +02:00 · 2025-08-14 15:20:16 +02:00 · 2025-08-09 18:43:50 +02:00 · 2025-08-09 16:11:42 +02:00
11 changed files with 191 additions and 119 deletions
--- a/README.md
+++ b/README.md
@@ -26,13 +26,9 @@ time, msd = md.correlation.shifted_correlation(

 ## Installation

-=== DEPRECATED: 2025-08-19 === 
 The package requires the Python package [pygmx](https://github.com/mdevaluate/pygmx),
 which handles reading of Gromacs file formats.
 Installation of pygmx is described in its own repository.
-=== DEPRECATED: 2025-08-19 === 
-
-The package requires the Python package [pygmx](https://github.com/mdevaluate/pygmx),

 The mdevaluate package itself is plain Python code and, hence, can be imported from its directory directly, 
 or may be installed via setuptools to the local Python environment by running
--- a/create_mdevaluate_module.sh
+++ b/create_mdevaluate_module.sh
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-CONDA_VERSION=2024.10
-PYTHON_VERSION=3.12
-
-if [ -z "$1" ]; then
-    echo "No argument supplied, version to create expected"
-    exit 1
-fi
-
-
-if [ ! -w "/nfsopt/mdevaluate"]; then
-	echo "Please remount /nfsopt writable"
-	exit 2
-fi
-
-MD_VERSION=$1
-
-# purge evtl. loaded modules
-module purge
-
-
-echo "Create mdevaluate Python environemnt using conda"
-echo "Using conda version: $CONDA_VERSION"
-echo "Using Python version: $PYTHON_VERSION"
-
-module load anaconda3/$CONDA_VERSION
-conda create -y --prefix /nfsopt/mdevaluate/mdevaluate-${MD_VERSION} \
-             python=$PYTHON_VERSION
-module purge
-
-echo "Create modulefile for mdevaluate/$MD_VERSION"
-cat > /nfsopt/modulefiles/mdevaluate/$MD_VERSION <<EOF
-#%Module1.0#####################################################################
-##
-## dot modulefile
-##
-## modulefiles/dot.  Generated from dot.in by configure.
-##
-
-module-whatis	"Enables the mdevaluate Python environment."
-
-set version	${MD_VERSION}
-set module_path /nfsopt/mdevaluate/mdevaluate-\$version/bin
-
-prepend-path PATH \$module_path
-
-EOF
-
-echo "Loading mdevaluate environment and install packages"
-module load mdevaluate/${MD_VERSION}
-pip install jupyter \
-spyder \
-mdanalysis \
-pathos \
-pandas \
-dask \
-sqlalchemy \
-psycopg2-binary \
-trimesh \
-pyvista \
-seaborn \
-black \
-black[jupyter] \
-tables \
-pyedr \
-pytest
-
-pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/mdevaluate.git
-pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/python-store.git
-pip install git+https://gitea.pkm.physik.tu-darmstadt.de/IPKM/python-tudplot.git
--- a/src/mdevaluate/init.py
+++ b/src/mdevaluate/init.py
@@ -16,7 +16,7 @@ from . import reader
 from . import system
 from . import utils
 from . import extra
-from .logging import logger
+from .logging_util import logger


 def open(
--- a/src/mdevaluate/autosave.py
+++ b/src/mdevaluate/autosave.py
@@ -5,7 +5,7 @@ from typing import Optional, Callable, Iterable

 import numpy as np
 from .checksum import checksum
-from .logging import logger
+from .logging_util import logger

 autosave_directory: Optional[str] = None
 load_autosave_data = False
@@ -166,8 +166,10 @@ def autosave_data(
        @functools.wraps(function)
        def autosave(*args, **kwargs):
            description = kwargs.pop("description", "")
+            autosave_dir_overwrite = kwargs.pop("autosave_dir_overwrite", None)
+            autosave_dir = autosave_dir_overwrite if autosave_dir_overwrite is not None else autosave_directory
            autoload = kwargs.pop("autoload", True) and load_autosave_data
-            if autosave_directory is not None:
+            if autosave_dir is not None:
                relevant_args = list(args[:nargs])
                if kwargs_keys is not None:
                    for key in [*posargs_keys, *kwargs_keys]:
--- a/src/mdevaluate/checksum.py
+++ b/src/mdevaluate/checksum.py
@@ -1,9 +1,14 @@
 import functools
 import hashlib
-from .logging import logger
+from .logging_util import logger
 from types import ModuleType, FunctionType
 import inspect
 from typing import Iterable
+import ast
+import io
+import tokenize
+import re
+import textwrap

 import numpy as np

@@ -28,19 +33,46 @@ def version(version_nr: int, calls: Iterable = ()):
    return decorator


-def strip_comments(s: str):
-    """Strips comment lines and docstring from Python source string."""
-    o = ""
-    in_docstring = False
-    for l in s.split("\n"):
-        if l.strip().startswith(("#", '"', "'")) or in_docstring:
-            in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1
+def strip_comments(source: str) -> str:
+    """Removes docstrings, comments, and irrelevant whitespace from Python source code."""
+
+    # Step 1: Remove docstrings using AST
+    def remove_docstrings(node):
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
+            if (doc := ast.get_docstring(node, clean=False)):
+                first_stmt = node.body[0]
+                if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant):
+                    node.body.pop(0)  # Remove the docstring entirely
+        for child in ast.iter_child_nodes(node):
+            remove_docstrings(child)
+
+    tree = ast.parse(textwrap.dedent(source))
+    remove_docstrings(tree)
+    code_without_docstrings = ast.unparse(tree)
+
+    # Step 2: Remove comments using tokenize
+    tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline)
+    result = []
+    last_lineno = -1
+    last_col = 0
+
+    for toknum, tokval, (srow, scol), (erow, ecol), line in tokens:
+        if toknum == tokenize.COMMENT:
            continue
-        o += l + "\n"
-    return o
+        if srow > last_lineno:
+            last_col = 0
+        if scol > last_col:
+            result.append(" " * (scol - last_col))
+        result.append(tokval)
+        last_lineno, last_col = erow, ecol
+
+    code_no_comments = ''.join(result)
+
+    # Step 3: Remove empty lines (whitespace-only or truly blank)
+    return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""])


-def checksum(*args, csum=None):
+def checksum(*args, csum=None, _seen=None):
    """
    Calculate a checksum of any object, by sha1 hash.

@@ -60,7 +92,15 @@ def checksum(*args, csum=None):
        csum = hashlib.sha1()
        csum.update(str(SALT).encode())

+    if _seen is None:
+        _seen = set()
+
    for arg in args:
+        obj_id = id(arg)
+        if obj_id in _seen:
+            continue
+        _seen.add(obj_id)
+
        if hasattr(arg, "__checksum__"):
            logger.debug("Checksum via __checksum__: %s", str(arg))
            csum.update(str(arg.__checksum__()).encode())
@@ -77,15 +117,15 @@ def checksum(*args, csum=None):
            for key in sorted(merged):  # deterministic ordering
                v = merged[key]
                if v is not arg:
-                    checksum(v, csum=csum)
+                    checksum(v, csum=csum, _seen=_seen)
        elif isinstance(arg, functools.partial):
            logger.debug("Checksum via partial for %s", str(arg))
-            checksum(arg.func, csum=csum)
+            checksum(arg.func, csum=csum, _seen=_seen)
            for x in arg.args:
-                checksum(x, csum=csum)
+                checksum(x, csum=csum, _seen=_seen)
            for k in sorted(arg.keywords.keys()):
                csum.update(k.encode())
-                checksum(arg.keywords[k], csum=csum)
+                checksum(arg.keywords[k], csum=csum, _seen=_seen)
        elif isinstance(arg, np.ndarray):
            csum.update(arg.tobytes())
        else:
--- a/src/mdevaluate/coordinates.py
+++ b/src/mdevaluate/coordinates.py
@@ -1,6 +1,6 @@
 from functools import partial, wraps
 from copy import copy
-from .logging import logger
+from .logging_util import logger
 from typing import Optional, Callable, List, Tuple

 import numpy as np
@@ -436,6 +436,34 @@ def center_of_masses(
    return np.array(positions)
    
    
+@map_coordinates
+def center_of_atoms(
+    frame: CoordinateFrame, atom_indices=None, shear: bool = False
+) -> NDArray:
+    if atom_indices is None:
+        atom_indices = list(range(len(frame)))
+    res_ids = frame.residue_ids[atom_indices]
+    if shear:
+        coords = frame[atom_indices]
+        box = frame.box
+        sort_ind = res_ids.argsort(kind="stable")
+        i = np.concatenate([[0], np.where(np.diff(res_ids[sort_ind]) > 0)[0] + 1])
+        coms = coords[sort_ind[i]][res_ids - min(res_ids)]
+        cor = pbc_diff(coords, coms, box)
+        coords = coms + cor
+    else:
+        coords = frame.whole[atom_indices]
+    mask = np.bincount(res_ids)[1:] != 0
+    positions = np.array(
+        [
+            np.bincount(res_ids, weights=c)[1:]
+            / np.bincount(res_ids)[1:]
+            for c in coords.T
+        ]
+    ).T[mask]
+    return np.array(positions)
+
+
@map_coordinates
 def pore_coordinates(
    frame: CoordinateFrame, origin: ArrayLike, sym_axis: str = "z"
--- a/src/mdevaluate/correlation.py
+++ b/src/mdevaluate/correlation.py
@@ -18,7 +18,7 @@ def log_indices(first: int, last: int, num: int = 100) -> np.ndarray:
    return np.unique(np.int_(ls) - 1 + first)


-@autosave_data(2)
+@autosave_data(nargs=2, kwargs_keys=('selector', 'segments', 'skip', 'window', 'average', 'points',), version=1.0)
 def shifted_correlation(
    function: Callable,
    frames: Coordinates,
@@ -147,7 +147,8 @@ def shifted_correlation(
    num_frames = int(len(frames) * window)
    ls = np.logspace(0, np.log10(num_frames + 1), num=points)
    idx = np.unique(np.int_(ls) - 1)
-    t = np.array([frames[i].time for i in idx]) - frames[0].time
+    dt = round(frames[1].time - frames[0].time, 6) # round to avoid bad floats
+    t = idx * dt

    result = np.array(
        [
@@ -199,7 +200,7 @@ def msd(
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')


-def isf(
+def isf_raw(
    start_frame: CoordinateFrame,
    end_frame: CoordinateFrame,
    q: float = 22.7,
@@ -216,29 +217,59 @@ def isf(
        displacements = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
        distance = (displacements**2).sum(axis=1) ** 0.5
-        return np.sinc(distance * q / np.pi).mean()
+        return np.sinc(distance * q / np.pi)
    elif axis == "xy" or axis == "yx":
        distance = (displacements[:, [0, 1]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "xz" or axis == "zx":
        distance = (displacements[:, [0, 2]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "yz" or axis == "zy":
        distance = (displacements[:, [1, 2]]**2).sum(axis=1) ** 0.5
-        return np.real(jn(0, distance * q)).mean()
+        return np.real(jn(0, distance * q))
    elif axis == "x":
        distance = np.abs(displacements[:, 0])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    elif axis == "y":
        distance = np.abs(displacements[:, 1])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    elif axis == "z":
        distance = np.abs(displacements[:, 2])
-        return np.mean(np.cos(np.abs(q * distance)))
+        return np.cos(np.abs(q * distance))
    else:
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')


+def isf(
+    start_frame: CoordinateFrame,
+    end_frame: CoordinateFrame,
+    q: float = 22.7,
+    trajectory: Coordinates = None,
+    axis: str = "all",
+) -> float:
+    """
+    Incoherent intermediate scattering function averaged over all particles.
+    See isf_raw for details.
+    """
+    return isf_raw(start_frame, end_frame, q=q, trajectory=trajectory, axis=axis).mean()
+
+
+def isf_mean_var(
+    start_frame: CoordinateFrame,
+    end_frame: CoordinateFrame,
+    q: float = 22.7,
+    trajectory: Coordinates = None,
+    axis: str = "all",
+) -> float:
+    """
+    Incoherent intermediate scattering function averaged over all particles and the
+    variance.
+    See isf_raw for details.
+    """
+    values = isf_raw(start_frame, end_frame, q=q, trajectory=trajectory, axis=axis)
+    return values.mean(), values.var()
+
+
 def rotational_autocorrelation(
    start_frame: CoordinateFrame, end_frame: CoordinateFrame, order: int = 2
 ) -> float:
@@ -430,10 +461,11 @@ def non_gaussian_parameter(
    end_frame: CoordinateFrame,
    trajectory: Coordinates = None,
    axis: str = "all",
+    full_output = False,
 ) -> float:
-    """
+    r"""
    Calculate the non-Gaussian parameter.
-    ..math:
+    .. math:
      \alpha_2 (t) =
        \frac{3}{5}\frac{\langle r_i^4(t)\rangle}{\langle r_i^2(t)\rangle^2} - 1
    """
@@ -442,27 +474,41 @@ def non_gaussian_parameter(
    else:
        vectors = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
-        r = (vectors**2).sum(axis=1)
+        r2 = (vectors**2).sum(axis=1)
        dimensions = 3
    elif axis == "xy" or axis == "yx":
-        r = (vectors[:, [0, 1]]**2).sum(axis=1)
+        r2 = (vectors[:, [0, 1]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "xz" or axis == "zx":
-        r = (vectors[:, [0, 2]]**2).sum(axis=1)
+        r2 = (vectors[:, [0, 2]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "yz" or axis == "zy":
-        r = (vectors[:, [1, 2]]**2).sum(axis=1)
+        r2 = (vectors[:, [1, 2]]**2).sum(axis=1)
        dimensions = 2
    elif axis == "x":
-        r = vectors[:, 0] ** 2
+        r2 = vectors[:, 0] ** 2
        dimensions = 1
    elif axis == "y":
-        r = vectors[:, 1] ** 2
+        r2 = vectors[:, 1] ** 2
        dimensions = 1
    elif axis == "z":
-        r = vectors[:, 2] ** 2
+        r2 = vectors[:, 2] ** 2
        dimensions = 1
    else:
        raise ValueError('Parameter axis has to be ether "all", "x", "y", or "z"!')
    
-    return (np.mean(r**2) / ((1 + 2 / dimensions) * (np.mean(r) ** 2))) - 1
+    m2 = np.mean(r2)
+    m4 = np.mean(r2**2)
+    if m2 == 0.0:
+        if full_output:
+            return 0.0, 0.0, 0.0
+        else:
+            return 0.0
+
+    alpha_2 = (m4 / ((1 + 2 / dimensions) * m2**2)) - 1
+    if full_output:
+        return alpha_2, m2, m4
+    else:
+        return alpha_2
+
+
--- a/src/mdevaluate/logging_util.py
+++ b/src/mdevaluate/logging_util.py
--- a/src/mdevaluate/pbc.py
+++ b/src/mdevaluate/pbc.py
@@ -7,7 +7,7 @@ from numpy.typing import ArrayLike, NDArray

 from itertools import product

-from .logging import logger
+from .logging_util import logger

 if TYPE_CHECKING:
    from mdevaluate.coordinates import CoordinateFrame
--- a/src/mdevaluate/reader.py
+++ b/src/mdevaluate/reader.py
@@ -19,13 +19,13 @@ import MDAnalysis
 from scipy import sparse

 from .checksum import checksum
-from .logging import logger
+from .logging_util import logger
 from . import atoms
 from .coordinates import Coordinates

 CSR_ATTRS = ("data", "indices", "indptr")
 NOJUMP_MAGIC = 2016
-Group_RE = re.compile("\[ ([-+\w]+) \]")
+Group_RE = re.compile(r"\[ ([-+\w]+) \]")


 class NojumpError(Exception):
@@ -275,7 +275,7 @@ def load_nojump_matrices(reader: BaseReader):
                "Loaded Nojump matrices: {}".format(nojump_load_filename(reader))
            )
        else:
-            logger.info("Invlaid Nojump Data: {}".format(nojump_load_filename(reader)))
+            logger.info("Invalid Nojump Data: {}".format(nojump_load_filename(reader)))
    except KeyError:
        logger.info("Removing zip-File: %s", zipname)
        os.remove(nojump_load_filename(reader))
--- a/src/mdevaluate/utils.py
+++ b/src/mdevaluate/utils.py
@@ -14,7 +14,7 @@ from scipy.ndimage import uniform_filter1d
 from scipy.interpolate import interp1d
 from scipy.optimize import curve_fit

-from .logging import logger
+from .logging_util import logger
 from .functions import kww, kww_1e


@@ -357,6 +357,37 @@ def quick1etau(t: ArrayLike, C: ArrayLike, n: int = 7) -> float:
    return tau_est


+def quicknongaussfit(t, C, width=2):
+    """
+    Estimates the time and height of the peak in the non-Gaussian function.
+    C is C(t) the correlation function
+    """
+    def ffunc(t,y0,A_main,log_tau_main,sig_main):
+        main_peak = A_main*np.exp(-(t - log_tau_main)**2 / (2 * sig_main**2))
+        return y0 + main_peak
+    
+    # first rough estimate, the closest time. This is returned if the interpolation fails!
+    tau_est = t[np.argmax(C)]
+    nG_max = np.amax(C)
+    try:
+        with np.errstate(invalid='ignore'):
+            corr = C[t > 0]
+            time = np.log10(t[t > 0])
+            tau = time[np.argmax(corr)]
+            mask = (time>tau-width/2) & (time<tau+width/2)
+            time = time[mask] ; corr = corr[mask]
+            nG_min = C[t > 0].min()
+            guess = [nG_min, nG_max-nG_min, tau, 0.6]
+            popt = curve_fit(ffunc, time, corr, p0=guess, maxfev=10000)[0]
+            tau_est = 10**popt[-2]
+            nG_max = popt[0] + popt[1]
+    except:
+        pass
+    if np.isnan(tau_est):
+        tau_est = np.inf
+    return tau_est, nG_max
+
+
 def susceptibility(
    time: NDArray, correlation: NDArray, **kwargs
 ) -> tuple[NDArray, NDArray]:
Author	SHA1	Message	Date
robrobo	6b7641f152	stop reading many frames to get times for shifted_correlation	2025-09-05 14:47:53 +02:00
robrobo	cd7097ad46	remove print	2025-08-14 16:43:56 +02:00
robrobo	a0ca2d8657	Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin	2025-08-14 16:35:12 +02:00
robrobo	0f47475f22	quick version of center_of_masses for equal weights	2025-08-14 15:20:16 +02:00
robrobo	f6ff7606ad	decorator for autosave_data now consumes and autosave_dir_overwrite. this can enable autosave by itself and takes precedence over the enable(dir) value. provided the same way as the description	2025-08-09 18:43:50 +02:00
robrobo	96c624efee	Merge branch 'fix/nojump_and_npt_caching' into feature/compatibility_robin	2025-08-09 16:11:42 +02:00
robrobo	accb43d7e6	Merge branch 'refactor_logging' into feature/compatibility_robin	2025-08-09 13:58:15 +02:00
robrobo	07b14a6cd6	Merge branch 'main' into feature/compatibility_robin	2025-08-09 13:57:29 +02:00
robrobo	e124506d10	fixed typo in logging output	2025-08-09 13:54:23 +02:00
robrobo	8169e76964	Merge branch 'main' into refactor_logging	2025-08-09 13:52:58 +02:00
robrobo	9f6af2af11	corrected spelling	2025-07-17 20:21:43 +02:00
robrobo	0ffce2f17a	added full_output option to non gaussian parameter so that correct statistics can be done afterwards	2025-07-17 18:45:11 +02:00
robrobo	0eff84910b	simplified quick non gaussian fit function so that it is more robust	2025-07-17 18:44:20 +02:00
robrobo	dae2d6ed95	created wrappers around the isf to allow for access of more raw data and the variance for accurate kai4 calculation	2025-07-17 12:50:49 +02:00
robrobo	ec4094cd92	added quick estimation for peak tau and height of non Gaussian	2025-07-17 12:34:43 +02:00
robrobo	00043637e9	added a _seen set to avoid infinite recursions due to function arguments; also, applied pbc_diff to neighbors in tetrahedral order	2025-07-11 20:54:27 +02:00
robrobo	7585e598dc	dedent before ast.parse for non-toplevel functions	2025-06-17 01:02:15 +02:00
robrobo	6d8b86c1ef	extended checksum.strip_comments function to work with prefixed docstrings and other small features	2025-06-16 22:15:22 +02:00
robrobo	a2a0ae8d7b	renamed logging to loggin_util to avoid circular import with python logging in some cases; added two raw strings to docstrings and fixed a sphinx syntax in one	2025-06-16 21:00:42 +02:00