Fixed formatting

Added autosave to shifted_correlation
Improved performance and split up shifted_correlation
2024-06-14 10:12:50 +02:00 · 2024-06-14 10:12:18 +02:00 · 2024-06-14 10:11:20 +02:00 · 2024-06-14 10:10:58 +02:00 · 2024-06-14 10:10:37 +02:00 · 2024-05-27 14:27:09 +02:00
13 changed files with 456 additions and 220 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "mdevaluate"
-version = "24.01"
+version = "24.06"
 dependencies = [
    "mdanalysis",
    "pandas",
    "dask",
    "pathos",
-    "tables"
+    "tables",
+    "pyedr"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -4,3 +4,4 @@ dask
 pathos
 tables
 pytest
+pyedr
--- a/src/mdevaluate/coordinates.py
+++ b/src/mdevaluate/coordinates.py
@ -218,7 +218,7 @@ class Coordinates:
            self.get_frame.clear_cache()

    def __iter__(self):
-        for i in range(len(self))[self._slice]:
+        for i in range(len(self.frames))[self._slice]:
            yield self[i]

    @singledispatchmethod
@ -232,7 +232,7 @@ class Coordinates:
        return sliced

    def __len__(self):
-        return len(self.frames)
+        return len(self.frames[self._slice])

    def __checksum__(self):
        return checksum(self.frames, self.atom_filter, self._slice, self.mode)
@ -692,10 +692,6 @@ def number_of_neighbors(
    elif not distinct:
        dnn = 1

-    print(atoms[:5])
-    print(query_atoms[:5])
-    print(" ")
-
    box = atoms.box
    if np.all(np.diag(np.diag(box)) == box):
        atoms = atoms % np.diag(box)
--- a/src/mdevaluate/correlation.py
+++ b/src/mdevaluate/correlation.py
@ -2,7 +2,7 @@ from typing import Callable, Optional

 import numpy as np
 from numpy.typing import ArrayLike
-from scipy.special import legendre
+from scipy.special import legendre, jn
 import dask.array as darray
 from functools import partial
 from scipy.spatial import KDTree
@ -13,9 +13,95 @@ from .pbc import pbc_diff, pbc_points
 from .coordinates import Coordinates, CoordinateFrame, displacements_without_drift


-def log_indices(first: int, last: int, num: int = 100) -> np.ndarray:
-    ls = np.logspace(0, np.log10(last - first + 1), num=num)
-    return np.unique(np.int_(ls) - 1 + first)
+def _is_multi_selector(selection):
+    if len(selection) == 0:
+        return False
+    elif (
+        isinstance(selection[0], int)
+        or isinstance(selection[0], bool)
+        or isinstance(selection[0], np.integer)
+        or isinstance(selection[0], np.bool_)
+    ):
+        return False
+    else:
+        for indices in selection:
+            if len(indices) == 0:
+                continue
+            elif (
+                isinstance(indices[0], int)
+                or isinstance(indices[0], bool)
+                or isinstance(indices[0], np.integer)
+                or isinstance(indices[0], np.bool_)
+            ):
+                return True
+            else:
+                raise ValueError(
+                    "selector has more than two dimensions or does not "
+                    "contain int or bool types"
+                )
+
+
+def _calc_correlation(
+    frames: Coordinates,
+    start_frame: CoordinateFrame,
+    function: Callable,
+    selection: np.ndarray,
+    shifted_idx: np.ndarray,
+) -> np.ndarray:
+    if len(selection) == 0:
+        correlation = np.zeros(len(shifted_idx))
+    else:
+        start = start_frame[selection]
+        correlation = np.array(
+            [
+                function(start, frames[frame_index][selection])
+                for frame_index in shifted_idx
+            ]
+        )
+    return correlation
+
+
+def _calc_correlation_multi(
+    frames: Coordinates,
+    start_frame: CoordinateFrame,
+    function: Callable,
+    selection: np.ndarray,
+    shifted_idx: np.ndarray,
+) -> np.ndarray:
+    correlations = np.zeros((len(selection), len(shifted_idx)))
+    for i, frame_index in enumerate(shifted_idx):
+        frame = frames[frame_index]
+        for j, current_selection in enumerate(selection):
+            if len(selection) == 0:
+                correlations[j, i] = 0
+            else:
+                correlations[j, i] = function(
+                    start_frame[current_selection], frame[current_selection]
+                )
+    return correlations
+
+
+def _average_correlation(result):
+    averaged_result = []
+    for n in range(result.shape[1]):
+        clean_result = []
+        for entry in result[:, n]:
+            if np.all(entry == 0):
+                continue
+            else:
+                clean_result.append(entry)
+        averaged_result.append(np.average(np.array(clean_result), axis=0))
+    return np.array(averaged_result)
+
+
+def _average_correlation_multi(result):
+    clean_result = []
+    for entry in result:
+        if np.all(entry == 0):
+            continue
+        else:
+            clean_result.append(entry)
+    return np.average(np.array(clean_result), axis=0)


@autosave_data(2)
@ -28,113 +114,82 @@ def shifted_correlation(
    window: float = 0.5,
    average: bool = True,
    points: int = 100,
-) -> (np.ndarray, np.ndarray):
+) -> tuple[np.ndarray, np.ndarray]:
+    """Compute a time-dependent correlation function for a given trajectory.
+
+    To improve statistics, multiple (possibly overlapping) windows will be
+    layed over the whole trajectory and the correlation is computed for them separately.
+    The start frames of the windows are spaced linearly over the valid region of
+    the trajectory (skipping frames in the beginning given by skip parameter).
+
+    The points within each window are spaced logarithmically.
+
+    Only a certain subset of the given atoms may be selected for each window
+    individually using a selector function.
+
+    Note that this function is specifically optimized for multi selectors, which select
+    multiple selection sets per window, for which the correlation is to be computed
+    separately.
+
+
+    Arguments
+    ---------
+    function:
+        The (correlation) function to evaluate.
+        Should be of the form (CoordinateFrame, CoordinateFrame) -> float
+
+    frames:
+        Trajectory to evaluate on
+
+    selector: (optional)
+        Selection function so select only certain selection sets for each start frame.
+        Should be of the form
+            (CoordinateFrame) -> list[A]
+        where A is something you can index an ndarray with.
+        For example a list of indices or a bool array.
+        Must return the same number of selection sets for every frame.
+
+    segments:
+        Number of start frames
+
+    skip:
+        Percentage of trajectory to skip from the start
+
+    window:
+        Length of each segment given as percentage of trajectory
+
+    average:
+        Whether to return averaged results.
+        See below for details on the returned ndarray.
+
+    points:
+        Number of points per segment
+
+
+    Returns
+    -------
+    times: ndarray
+        1d array of time differences to start frame
+    result: ndarray
+        2d ndarray of averaged (or non-averaged) correlations.
+
+        When average==True (default) the returned array will be of the shape (S, P)
+        where S is the number of selection sets and P the number of points per window.
+        For selection sets that where empty for all start frames all data points will be
+        zero.
+
+        When average==False the returned array will be of shape (W, S) with
+        dtype=object. The elements are either ndarrays of shape (P,) containing the
+        correlation data for the specific window and selection set or None if the
+        corresponding selection set was empty.
+        W is the number of segments (windows).
+        S and P are the same as for average==True.
+
    """
-    Calculate the time series for a correlation function.
-
-    The times at which the correlation is calculated are determined by
-    a logarithmic distribution.
-
-    Args:
-        function: The function that should be correlated
-        frames: The coordinates of the simulation data
-        selector (opt.):
-                    A function that returns the indices depending on
-                    the staring frame for which particles the
-                    correlation should be calculated.
-        segments (int, opt.):
-                    The number of segments the time window will be
-                    shifted
-        skip (float, opt.):
-                    The fraction of the trajectory that will be skipped
-                    at the beginning, if this is None the start index
-                    of the frames slice will be used, which defaults
-                    to 0.1.
-        window (float, opt.):
-                    The fraction of the simulation the time series will
-                    cover
-        average (bool, opt.):
-                    If True, returns averaged correlation function
-        points (int, opt.):
-                    The number of timeshifts for which the correlation
-                    should be calculated
-    Returns:
-        tuple:
-            A list of length N that contains the timeshiftes of the frames at which
-            the time series was calculated and a numpy array of shape (segments, N)
-            that holds the (non-avaraged) correlation data
-
-    Example:
-        Calculating the mean square displacement of a coordinate object
-        named ``coords``:
-
-        >>> time, data = shifted_correlation(msd, coords)
-    """
-
-    def get_correlation(
-        frames: CoordinateFrame,
-        start_frame: CoordinateFrame,
-        index: np.ndarray,
-        shifted_idx: np.ndarray,
-    ) -> np.ndarray:
-        if len(index) == 0:
-            correlation = np.zeros(len(shifted_idx))
-        else:
-            start = frames[start_frame][index]
-            correlation = np.array(
-                [function(start, frames[frame][index]) for frame in shifted_idx]
-            )
-        return correlation
-
-    def apply_selector(
-        start_frame: CoordinateFrame,
-        frames: CoordinateFrame,
-        idx: np.ndarray,
-        selector: Optional[Callable] = None,
-    ):
-        shifted_idx = idx + start_frame
-
-        if selector is None:
-            index = np.arange(len(frames[start_frame]))
-            return get_correlation(frames, start_frame, index, shifted_idx)
-        else:
-            index = selector(frames[start_frame])
-            if len(index) == 0:
-                return np.zeros(len(shifted_idx))
-
-            elif (
-                isinstance(index[0], int)
-                or isinstance(index[0], bool)
-                or isinstance(index[0], np.integer)
-                or isinstance(index[0], np.bool_)
-            ):
-                return get_correlation(frames, start_frame, index, shifted_idx)
-            else:
-                correlations = []
-                for ind in index:
-                    if len(ind) == 0:
-                        correlations.append(np.zeros(len(shifted_idx)))
-
-                    elif (
-                        isinstance(ind[0], int)
-                        or isinstance(ind[0], bool)
-                        or isinstance(ind[0], np.integer)
-                        or isinstance(ind[0], np.bool_)
-                    ):
-                        correlations.append(
-                            get_correlation(frames, start_frame, ind, shifted_idx)
-                        )
-                    else:
-                        raise ValueError(
-                            "selector has more than two dimensions or does not "
-                            "contain int or bool types"
-                        )
-                return correlations
-
    if 1 - skip < window:
        window = 1 - skip

-    start_frames = np.unique(
+    start_frame_indices = np.unique(
        np.linspace(
            len(frames) * skip,
            len(frames) * (1 - window),
@ -144,28 +199,44 @@ def shifted_correlation(
        )
    )

-    num_frames = int(len(frames) * window)
-    ls = np.logspace(0, np.log10(num_frames + 1), num=points)
-    idx = np.unique(np.int_(ls) - 1)
-    t = np.array([frames[i].time for i in idx]) - frames[0].time
-
-    result = np.array(
-        [
-            apply_selector(start_frame, frames=frames, idx=idx, selector=selector)
-            for start_frame in start_frames
-        ]
+    num_frames_per_window = int(len(frames) * window)
+    logspaced_indices = np.logspace(0, np.log10(num_frames_per_window + 1), num=points)
+    logspaced_indices = np.unique(np.int_(logspaced_indices) - 1)
+    logspaced_time = (
+        np.array([frames[i].time for i in logspaced_indices]) - frames[0].time
    )

+    if selector is None:
+        multi_selector = False
+    else:
+        selection = selector(frames[0])
+        multi_selector = _is_multi_selector(selection)
+
+    result = []
+    for start_frame_index in start_frame_indices:
+        shifted_idx = logspaced_indices + start_frame_index
+        start_frame = frames[start_frame_index]
+        if selector is None:
+            selection = np.arange(len(start_frame))
+        else:
+            selection = selector(start_frame)
+        if multi_selector:
+            result_segment = _calc_correlation_multi(
+                frames, start_frame, function, selection, shifted_idx
+            )
+        else:
+            result_segment = _calc_correlation(
+                frames, start_frame, function, selection, shifted_idx
+            )
+        result.append(result_segment)
+    result = np.array(result)
+
    if average:
-        clean_result = []
-        for entry in result:
-            if np.all(entry == 0):
-                continue
-            else:
-                clean_result.append(entry)
-        result = np.array(clean_result)
-        result = np.average(result, axis=0)
-    return t, result
+        if multi_selector:
+            result = _average_correlation_multi(result)
+        else:
+            result = _average_correlation(result)
+    return logspaced_time, result


 def msd(
@ -183,6 +254,12 @@ def msd(
        displacements = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
        return (displacements**2).sum(axis=1).mean()
+    elif axis == "xy" or axis == "yx":
+        return (displacements[:, [0, 1]] ** 2).sum(axis=1).mean()
+    elif axis == "xz" or axis == "zx":
+        return (displacements[:, [0, 2]] ** 2).sum(axis=1).mean()
+    elif axis == "yz" or axis == "zy":
+        return (displacements[:, [1, 2]] ** 2).sum(axis=1).mean()
    elif axis == "x":
        return (displacements[:, 0] ** 2).mean()
    elif axis == "y":
@ -211,6 +288,15 @@ def isf(
    if axis == "all":
        distance = (displacements**2).sum(axis=1) ** 0.5
        return np.sinc(distance * q / np.pi).mean()
+    elif axis == "xy" or axis == "yx":
+        distance = (displacements[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
+        return np.real(jn(0, distance * q)).mean()
+    elif axis == "xz" or axis == "zx":
+        distance = (displacements[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
+        return np.real(jn(0, distance * q)).mean()
+    elif axis == "yz" or axis == "zy":
+        distance = (displacements[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
+        return np.real(jn(0, distance * q)).mean()
    elif axis == "x":
        distance = np.abs(displacements[:, 0])
        return np.mean(np.cos(np.abs(q * distance)))
@ -262,6 +348,12 @@ def van_hove_self(
        vectors = displacements_without_drift(start_frame, end_frame, trajectory)
    if axis == "all":
        delta_r = (vectors**2).sum(axis=1) ** 0.5
+    elif axis == "xy" or axis == "yx":
+        delta_r = (vectors[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
+    elif axis == "xz" or axis == "zx":
+        delta_r = (vectors[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
+    elif axis == "yz" or axis == "zy":
+        delta_r = (vectors[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
    elif axis == "x":
        delta_r = np.abs(vectors[:, 0])
    elif axis == "y":
@ -423,6 +515,15 @@ def non_gaussian_parameter(
    if axis == "all":
        r = (vectors**2).sum(axis=1)
        dimensions = 3
+    elif axis == "xy" or axis == "yx":
+        r = (vectors[:, [0, 1]] ** 2).sum(axis=1)
+        dimensions = 2
+    elif axis == "xz" or axis == "zx":
+        r = (vectors[:, [0, 2]] ** 2).sum(axis=1)
+        dimensions = 2
+    elif axis == "yz" or axis == "zy":
+        r = (vectors[:, [1, 2]] ** 2).sum(axis=1)
+        dimensions = 2
    elif axis == "x":
        r = vectors[:, 0] ** 2
        dimensions = 1
--- a/src/mdevaluate/distribution.py
+++ b/src/mdevaluate/distribution.py
@ -126,9 +126,6 @@ def rdf(
    particles_in_volume = int(
        np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1
    )
-    print(atoms_a[:5])
-    print(atoms_b[:5])
-    print(" ")
    distances, indices = next_neighbors(
        atoms_a,
        atoms_b,
@ -309,7 +306,7 @@ def next_neighbor_distribution(
    )[1]
    resname_nn = reference.residue_names[nn]
    count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1)
-    return np.histogram(count_nn, bins=bins, normed=normed)[0]
+    return np.histogram(count_nn, bins=bins, density=normed)[0]


 def hbonds(
--- a/src/mdevaluate/extra/chill.py
+++ b/src/mdevaluate/extra/chill.py
@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates
 from mdevaluate.pbc import pbc_points


-def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
+def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
    tree = KDTree(atoms)

    dist, indices = tree.query(atoms, N + 1)
@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray:


 def selector_ice(
-    start_frame: CoordinateFrame,
-    traj: Coordinates,
+    oxygen_atoms_water: CoordinateFrame,
    chosen_ice_types: ArrayLike,
    combined: bool = True,
+    next_neighbor_distance: float = 0.35,
 ) -> NDArray:
-    oxygen = traj.subset(atom_name="OW")
-    atoms = oxygen[start_frame.step]
-    atoms = atoms % np.diag(atoms.box)
-    atoms_PBC = pbc_points(atoms, thickness=1)
-    aij, indices = a_ij(atoms_PBC)
+    atoms = oxygen_atoms_water
+    atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2)
+    aij, indices = calc_aij(atoms_PBC)
    tree = KDTree(atoms_PBC)
-    neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True)
+    neighbors = tree.query_ball_point(
+        atoms_PBC, next_neighbor_distance, return_length=True
+    ) - 1
    index_SOL = atoms_PBC.tolist().index(atoms[0].tolist())
    index_SOL = np.arange(index_SOL, index_SOL + len(atoms))
    ice_Types = classify_ice(aij, indices, neighbors, index_SOL)
@ -117,9 +117,9 @@ def selector_ice(
 def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
    def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray:
        atoms_PBC = pbc_points(frame, thickness=1)
-        aij, indices = a_ij(atoms_PBC)
+        aij, indices = calc_aij(atoms_PBC)
        tree = KDTree(atoms_PBC)
-        neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True)
+        neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1
        index = selector(frame, atoms_PBC)
        ice_types_data = classify_ice(aij, indices, neighbors, index)
        ice_parts_data = count_ice_types(ice_types_data)
@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
 def ice_clusters_traj(
    traj: Coordinates, segments: int = 10000, skip: float = 0.1
 ) -> list:
-    def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]:
-        selection = selector_ice(frame, traj, [0, 1, 2])
+    def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]:
+        selection = selector_ice(frame, [0, 1, 2])
        if len(selection) == 0:
            return frame.time, []
        else:
@ -194,6 +194,6 @@ def ice_clusters_traj(
        np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments))
    )
    all_clusters = [
-        ice_clusters(traj[frame_index], traj) for frame_index in frame_indices
+        ice_clusters(traj[frame_index]) for frame_index in frame_indices
    ]
    return all_clusters
--- a/src/mdevaluate/extra/free_energy_landscape.py
+++ b/src/mdevaluate/extra/free_energy_landscape.py
@ -1,9 +1,9 @@
 from functools import partial
+from typing import Optional

 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from numpy.polynomial.polynomial import Polynomial as Poly
-import math
 from scipy.spatial import KDTree
 import pandas as pd
 import multiprocessing as mp
@ -11,6 +11,56 @@ import multiprocessing as mp
 from ..coordinates import Coordinates


+def _pbc_points_reduced(
+    coordinates: ArrayLike,
+    pore_geometry: str,
+    box: Optional[NDArray] = None,
+    thickness: Optional[float] = None,
+) -> tuple[NDArray, NDArray]:
+    if box is None:
+        box = coordinates.box
+    if pore_geometry == "cylindrical":
+        grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]])
+        indices = np.tile(np.arange(len(coordinates)), 3)
+    elif pore_geometry == "slit":
+        grid = np.array(
+            [[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]]
+        )
+        indices = np.tile(np.arange(len(coordinates)), 9)
+    else:
+        raise ValueError(
+            f"pore_geometry is {pore_geometry}, should either be "
+            f"'cylindrical' or 'slit'"
+        )
+    coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0)
+    size = np.diag(box)
+
+    if thickness is not None:
+        mask = np.all(coordinates_pbc > -thickness, axis=1)
+        coordinates_pbc = coordinates_pbc[mask]
+        indices = indices[mask]
+        mask = np.all(coordinates_pbc < size + thickness, axis=1)
+        coordinates_pbc = coordinates_pbc[mask]
+        indices = indices[mask]
+
+    return coordinates_pbc, indices
+
+
+def _build_tree(points, box, r_max, pore_geometry):
+    if np.all(np.diag(np.diag(box)) == box):
+        tree = KDTree(points % box, boxsize=box)
+        points_pbc_index = None
+    else:
+        points_pbc, points_pbc_index = _pbc_points_reduced(
+            points,
+            pore_geometry,
+            box,
+            thickness=r_max + 0.01,
+        )
+        tree = KDTree(points_pbc)
+    return tree, points_pbc_index
+
+
 def occupation_matrix(
    trajectory: Coordinates,
    edge_length: float = 0.05,
@ -28,11 +78,7 @@ def occupation_matrix(
    z_bins = np.arange(0, box[2][2] + edge_length, edge_length)
    bins = [x_bins, y_bins, z_bins]
    # Trajectory is split for parallel computing
-    size = math.ceil(len(frame_indices) / nodes)
-    indices = [
-        np.arange(len(frame_indices))[i : i + size]
-        for i in range(0, len(frame_indices), size)
-    ]
+    indices = np.array_split(frame_indices, nodes)
    pool = mp.Pool(nodes)
    results = pool.map(
        partial(_calc_histogram, trajectory=trajectory, bins=bins), indices
@ -72,19 +118,19 @@ def _calc_histogram(


 def find_maxima(
-    occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05
+    occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str
 ) -> pd.DataFrame:
    maxima_df = occupation_df.copy()
    maxima_df["maxima"] = None
    points = np.array(maxima_df[["x", "y", "z"]])
-    tree = KDTree(points, boxsize=box)
-    all_neighbors = tree.query_ball_point(
-        points, edge_length * 3 ** (1 / 2) + edge_length / 100
-    )
+    tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry)
    for i in range(len(maxima_df)):
        if maxima_df.loc[i, "maxima"] is not None:
            continue
-        neighbors = np.array(all_neighbors[i])
+        maxima_pos = maxima_df.loc[i, ["x", "y", "z"]]
+        neighbors = np.array(tree.query_ball_point(maxima_pos, radius))
+        if points_pbc_index is not None:
+            neighbors = points_pbc_index[neighbors]
        neighbors = neighbors[neighbors != i]
        if len(neighbors) == 0:
            maxima_df.loc[i, "maxima"] = True
@ -104,23 +150,39 @@ def _calc_energies(
    maxima_df: pd.DataFrame,
    bins: ArrayLike,
    box: NDArray,
+    pore_geometry: str,
    T: float,
+    nodes: int = 8,
 ) -> NDArray:
    points = np.array(maxima_df[["x", "y", "z"]])
-    tree = KDTree(points, boxsize=box)
+    tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry)
    maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]]
    maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"])
    num_of_neighbors = np.max(
        tree.query_ball_point(maxima, bins[-1], return_length=True)
    )
-    distances, indices = tree.query(
-        maxima, k=num_of_neighbors, distance_upper_bound=bins[-1]
-    )
+    split_maxima = []
+    for i in range(0, len(maxima), 1000):
+        split_maxima.append(maxima[i : i + 1000])
+
+    distances = []
+    indices = []
+    for maxima in split_maxima:
+        distances_step, indices_step = tree.query(
+            maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes
+        )
+        distances.append(distances_step)
+        indices.append(indices_step)
+    distances = np.concatenate(distances)
+    indices = np.concatenate(indices)
    all_energy_hist = []
    all_occupied_bins_hist = []
    if distances.ndim == 1:
        current_distances = distances[1:][distances[1:] <= bins[-1]]
-        current_indices = indices[1:][distances[1:] <= bins[-1]]
+        if points_pbc_index is None:
+            current_indices = indices[1:][distances[1:] <= bins[-1]]
+        else:
+            current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]]
        energy = (
            -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations)
            * T
@ -131,8 +193,12 @@ def _calc_energies(
        return result
    for i, maxima_occupation in enumerate(maxima_occupations):
        current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]]
-        current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
-
+        if points_pbc_index is None:
+            current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
+        else:
+            current_indices = points_pbc_index[
+                indices[i, 1:][distances[i, 1:] <= bins[-1]]
+            ]
        energy = (
            -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation)
            * T
@ -168,9 +234,12 @@ def distance_resolved_energies(
    distance_bins: ArrayLike,
    r_bins: ArrayLike,
    box: NDArray,
+    pore_geometry: str,
    temperature: float,
+    nodes: int = 8,
 ) -> pd.DataFrame:
    results = []
+    distances = []
    for i in range(len(distance_bins) - 1):
        maxima_indices = np.array(
            maxima_df.index[
@ -179,11 +248,22 @@ def distance_resolved_energies(
                * (maxima_df["maxima"] == True)
            ]
        )
-        results.append(
-            _calc_energies(maxima_indices, maxima_df, r_bins, box, temperature)
-        )
+        try:
+            results.append(
+                _calc_energies(
+                    maxima_indices,
+                    maxima_df,
+                    r_bins,
+                    box,
+                    pore_geometry,
+                    temperature,
+                    nodes,
+                )
+            )
+            distances.append((distance_bins[i] + distance_bins[i + 1]) / 2)
+        except ValueError:
+            pass

-    distances = (distance_bins[:-1] + distance_bins[1:]) / 2
    radii = (r_bins[:-1] + r_bins[1:]) / 2
    d = np.array([d for d in distances for r in radii])
    r = np.array([r for d in distances for r in radii])
@ -192,7 +272,11 @@ def distance_resolved_energies(


 def find_energy_maxima(
-    energy_df: pd.DataFrame, r_min: float, r_max: float
+    energy_df: pd.DataFrame,
+    r_min: float,
+    r_max: float,
+    r_eval: float = None,
+    degree: int = 2,
 ) -> pd.DataFrame:
    distances = []
    energies = []
@ -201,6 +285,9 @@ def find_energy_maxima(
        x = np.array(data_d["r"])
        y = np.array(data_d["energy"])
        mask = (x >= r_min) * (x <= r_max)
-        p3 = Poly.fit(x[mask], y[mask], deg=2)
-        energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
+        p3 = Poly.fit(x[mask], y[mask], deg=degree)
+        if r_eval is None:
+            energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
+        else:
+            energies.append(p3(r_eval))
    return pd.DataFrame({"d": distances, "energy": energies})
--- a/src/mdevaluate/extra/water.py
+++ b/src/mdevaluate/extra/water.py
@ -265,7 +265,7 @@ def LSI(
    )
    distributions = np.array(
        [
-            LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None)
+            LSI_distribution(trajectory[frame_index], bins, selector=None)
            for frame_index in frame_indices
        ]
    )
--- a/src/mdevaluate/pbc.py
+++ b/src/mdevaluate/pbc.py
@ -156,7 +156,7 @@ def nojump(frame: CoordinateFrame, usecache: bool = True) -> CoordinateFrame:
                    [m[i0 : abstep + 1].sum(axis=0) for m in reader.nojump_matrices]
                ).T
            )
-            * frame.box.diagonal()
+            @ frame.box
        )

        reader._nojump_cache[abstep] = delta
@ -173,7 +173,7 @@ def nojump(frame: CoordinateFrame, usecache: bool = True) -> CoordinateFrame:
                    ]
                ).T
            )
-            * frame.box.diagonal()
+            @ frame.box
        )
    return frame - delta

--- a/src/mdevaluate/reader.py
+++ b/src/mdevaluate/reader.py
@ -152,7 +152,7 @@ def nojump_load_filename(reader: BaseReader):
        )
        if os.path.exists(full_path_fallback):
            return full_path_fallback
-    if os.path.exists(fname) or is_writeable(directory):
+    if os.path.exists(full_path) or is_writeable(directory):
        return full_path
    else:
        user_data_dir = os.path.join("/data/", os.environ["HOME"].split("/")[-1])
@ -187,19 +187,33 @@ def nojump_save_filename(reader: BaseReader):

 def parse_jumps(trajectory: Coordinates):
    prev = trajectory[0].whole
-    box = prev.box.diagonal()
+    box = prev.box
    SparseData = namedtuple("SparseData", ["data", "row", "col"])
    jump_data = (
        SparseData(data=array("b"), row=array("l"), col=array("l")),
        SparseData(data=array("b"), row=array("l"), col=array("l")),
        SparseData(data=array("b"), row=array("l"), col=array("l")),
    )
-
    for i, curr in enumerate(trajectory):
        if i % 500 == 0:
            logger.debug("Parse jumps Step: %d", i)
-        delta = ((curr - prev) / box).round().astype(np.int8)
+        r3 = np.subtract(curr, prev)
+        delta_z = np.array(np.rint(np.divide(r3[:, 2], box[2][2])), dtype=np.int8)
+        r2 = np.subtract(
+            r3,
+            (np.rint(np.divide(r3[:, 2], box[2][2])))[:, np.newaxis]
+            * box[2][np.newaxis, :],
+            )
+        delta_y = np.array(np.rint(np.divide(r2[:, 1], box[1][1])), dtype=np.int8)
+        r1 = np.subtract(
+            r2,
+            (np.rint(np.divide(r2[:, 1], box[1][1])))[:, np.newaxis]
+            * box[1][np.newaxis, :],
+            )
+        delta_x = np.array(np.rint(np.divide(r1[:, 0], box[0][0])), dtype=np.int8)
+        delta = np.array([delta_x, delta_y, delta_z]).T
        prev = curr
+        box = prev.box
        for d in range(3):
            (col,) = np.where(delta[:, d] != 0)
            jump_data[d].col.extend(col)
--- a/src/mdevaluate/utils.py
+++ b/src/mdevaluate/utils.py
@ -177,7 +177,7 @@ def coherent_sum(
    func: Callable[[ArrayLike, ArrayLike], float],
    coord_a: ArrayLike,
    coord_b: ArrayLike,
-) -> float:
+) -> NDArray:
    """
    Perform a coherent sum over two arrays :math:`A, B`.

--- a/test/test_correlation.py
+++ b/test/test_correlation.py
@ -0,0 +1,57 @@
+import os
+import pytest
+
+import mdevaluate
+from mdevaluate import correlation
+import numpy as np
+
+
+@pytest.fixture
+def trajectory(request):
+    return mdevaluate.open(os.path.join(os.path.dirname(__file__), "data/water"))
+
+
+def test_shifted_correlation(trajectory):
+    test_array = np.array([100, 82, 65, 49, 39, 29, 20, 13, 7])
+    OW = trajectory.subset(atom_name="OW")
+    t, result = correlation.shifted_correlation(
+        correlation.isf, OW, segments=10, skip=0.1, points=10
+    )
+    assert (np.array(result * 100, dtype=int) == test_array).all()
+
+
+def test_shifted_correlation_no_average(trajectory):
+    t, result = correlation.shifted_correlation(
+        correlation.isf, trajectory, segments=10, skip=0.1, points=5, average=False
+    )
+    assert result.shape == (10, 5)
+
+
+def test_shifted_correlation_selector(trajectory):
+    test_array = np.array([100, 82, 64, 48, 37, 28, 19, 11, 5])
+
+    def selector(frame):
+        index = np.argwhere((frame[:, 0] >= 0) * (frame[:, 0] < 1))
+        return index.flatten()
+
+    OW = trajectory.subset(atom_name="OW")
+    t, result = correlation.shifted_correlation(
+        correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
+    )
+    assert (np.array(result * 100, dtype=int) == test_array).all()
+
+
+def test_shifted_correlation_multi_selector(trajectory):
+    def selector(frame):
+        indices = []
+        for i in range(3):
+            x = frame[:, 0].flatten()
+            index = np.argwhere((x >= i) * (x < i + 1))
+            indices.append(index.flatten())
+        return indices
+
+    OW = trajectory.subset(atom_name="OW")
+    t, result = correlation.shifted_correlation(
+        correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
+    )
+    assert result.shape == (3, 9)
--- a/test/test_free_energy_landscape.py
+++ b/test/test_free_energy_landscape.py
@ -13,42 +13,24 @@ def trajectory(request):


 def test_get_fel(trajectory):
-    test_array = np.array(
-        [
-            174.46253634,
-            174.60905476,
-            178.57658092,
-            182.43001192,
-            180.57916378,
-            176.49886217,
-            178.96018547,
-            181.13561782,
-            178.31026314,
-            176.08903996,
-            180.71215345,
-            181.59703135,
-            180.34329368,
-            187.02474488,
-            197.99167477,
-            214.05788031,
-            245.58571282,
-            287.52457507,
-            331.53492965,
-        ]
-    )
+    test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.])

    OW = trajectory.subset(atom_name="OW")
-
-    box = np.diag(trajectory[0].box)
-    box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
-    occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000)
-    maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05)
-    maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2)
-    r_bins = np.arange(0, 2, 0.02)
-    distance_bins = np.arange(0.05, 2.05, 0.1)
+    box = trajectory[0].box
+    box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
+    occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10)
+    radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100
+    maxima_matrix = fel.find_maxima(
+        occupation_matrix,
+        box=box_voxels,
+        radius=radius_maxima,
+        pore_geometry="cylindrical",
+    )
+    maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2)
+    r_bins = np.arange(0, 0.5, 0.02)
+    distance_bins = np.arange(1.8, 1.9, 0.01)
    energy_df = fel.distance_resolved_energies(
-        maxima_matrix, distance_bins, r_bins, box, 225
+        maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225
    )
    result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15)
-
    assert (np.round(np.array(result["energy"])) == np.round(test_array)).all()
Author	SHA1	Message	Date
Sebastian Kloth	adae920527	Fixed formatting	2024-06-14 10:12:50 +02:00
Sebastian Kloth	11793d7960	Added autosave to shifted_correlation	2024-06-14 10:12:18 +02:00
Sebastian Kloth	55b06fa61d	Improved performance and split up shifted_correlation	2024-06-14 10:11:20 +02:00
Sebastian Kloth	3aa91d7482	Updated version number and type hint	2024-06-14 10:10:58 +02:00
Sebastian Kloth	7d8c5d849d	Added tests for shifted_correlation	2024-06-14 10:10:37 +02:00
Sebastian Kloth	c09549902a	Added Robins adjustments for FEL	2024-05-27 14:27:09 +02:00
Sebastian Kloth	b7bb8cb379	Merge branch 'refs/heads/mdeval_dev'	2024-05-02 16:18:18 +02:00
Sebastian Kloth	33c4756e34	Fixed frame-index selection in occupancy calculation.	2024-05-02 16:17:54 +02:00
Sebastian Kloth	7b9f8b6773	Merge branch 'mdeval_dev'	2024-03-05 13:58:15 +01:00
Sebastian Kloth	c89cead81c	Moved KDTree build up in its own function and made find_maxima query for each point separately.	2024-03-05 13:57:55 +01:00
Sebastian Kloth	31eb145a13	Merge branch 'mdeval_dev' # Conflicts: # src/mdevaluate/coordinates.py	2024-02-26 14:20:12 +01:00
Sebastian Kloth	af3758cbef	Fixed iter in Coordinates	2024-02-26 14:18:23 +01:00
Sebastian Kloth	93d020a4de	Updated fel test to run faster	2024-02-26 14:14:29 +01:00
Sebastian Kloth	b5395098ce	Fixed iter of Coordinates after last change	2024-02-26 13:57:44 +01:00
Sebastian Kloth	5e80701562	Coordinates returns now correct length after slicing	2024-02-26 13:50:46 +01:00
Sebastian Kloth	363e420cd8	Fixed chill ice_cluster_traj() function	2024-02-16 11:11:17 +01:00
Sebastian Kloth	6b77ef78e1	Fixed chill selector_ice() function	2024-02-15 11:49:20 +01:00
Sebastian Kloth	0c940115af	Fixed LSI	2024-02-08 17:24:44 +01:00
Sebastian Kloth	b0f29907df	Updated fel to triclinic box case	2024-02-08 17:24:22 +01:00
Sebastian Kloth	37bf496b21	Fixed reading nojump_matrices from non-writable directories	2024-02-06 13:29:38 +01:00
Sebastian Kloth	befaef2dfa	Fixed van Hove calculation in plane	2024-02-06 09:38:38 +01:00
Sebastian Kloth	8ea7da5d2f	Removed prints from number_of_neighbors	2024-01-31 15:36:51 +01:00
Sebastian Kloth	b405842452	Changed version number	2024-01-31 11:32:52 +01:00
Sebastian Kloth	f5cf453d61	Fixed isf for 2d case	2024-01-31 11:32:29 +01:00
Sebastian Kloth	4394f70530	Fixed pyproject.toml	2024-01-31 09:02:57 +01:00
Sebastian Kloth	298da3818d	Changed parameter normed to density in np.histogram	2024-01-31 09:02:36 +01:00
Sebastian Kloth	d9278eed83	Removed prints from rdf	2024-01-31 08:59:39 +01:00
Sebastian Kloth	787882810c	Added pyedr in requirements.txt	2024-01-30 17:03:57 +01:00
Sebastian Kloth	f527d25864	Changed dynamic calculations to include 2D cases	2024-01-30 16:46:11 +01:00
Sebastian Kloth	77771738ab	Fixed parse jumps for triclinic box case	2024-01-30 16:27:59 +01:00
Sebastian Kloth	3e8fd04726	Fixed construction of nojump trajectory for triclinic box case	2024-01-29 18:21:36 +01:00