Compare commits
31 Commits
02fed343f0
...
mdeval_dev
Author | SHA1 | Date | |
---|---|---|---|
adae920527 | |||
11793d7960 | |||
55b06fa61d | |||
3aa91d7482 | |||
7d8c5d849d | |||
c09549902a | |||
b7bb8cb379 | |||
33c4756e34 | |||
7b9f8b6773 | |||
c89cead81c | |||
31eb145a13 | |||
af3758cbef | |||
93d020a4de | |||
b5395098ce | |||
5e80701562 | |||
363e420cd8 | |||
6b77ef78e1 | |||
0c940115af | |||
b0f29907df | |||
37bf496b21 | |||
befaef2dfa | |||
8ea7da5d2f | |||
b405842452 | |||
f5cf453d61 | |||
4394f70530 | |||
298da3818d | |||
d9278eed83 | |||
787882810c | |||
f527d25864 | |||
77771738ab | |||
3e8fd04726 |
@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "mdevaluate"
|
||||
version = "24.01"
|
||||
version = "24.06"
|
||||
dependencies = [
|
||||
"mdanalysis",
|
||||
"pandas",
|
||||
"dask",
|
||||
"pathos",
|
||||
"tables"
|
||||
"tables",
|
||||
"pyedr"
|
||||
]
|
||||
|
@ -4,3 +4,4 @@ dask
|
||||
pathos
|
||||
tables
|
||||
pytest
|
||||
pyedr
|
@ -218,7 +218,7 @@ class Coordinates:
|
||||
self.get_frame.clear_cache()
|
||||
|
||||
def __iter__(self):
|
||||
for i in range(len(self))[self._slice]:
|
||||
for i in range(len(self.frames))[self._slice]:
|
||||
yield self[i]
|
||||
|
||||
@singledispatchmethod
|
||||
@ -232,7 +232,7 @@ class Coordinates:
|
||||
return sliced
|
||||
|
||||
def __len__(self):
|
||||
return len(self.frames)
|
||||
return len(self.frames[self._slice])
|
||||
|
||||
def __checksum__(self):
|
||||
return checksum(self.frames, self.atom_filter, self._slice, self.mode)
|
||||
@ -692,10 +692,6 @@ def number_of_neighbors(
|
||||
elif not distinct:
|
||||
dnn = 1
|
||||
|
||||
print(atoms[:5])
|
||||
print(query_atoms[:5])
|
||||
print(" ")
|
||||
|
||||
box = atoms.box
|
||||
if np.all(np.diag(np.diag(box)) == box):
|
||||
atoms = atoms % np.diag(box)
|
||||
|
@ -2,7 +2,7 @@ from typing import Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike
|
||||
from scipy.special import legendre
|
||||
from scipy.special import legendre, jn
|
||||
import dask.array as darray
|
||||
from functools import partial
|
||||
from scipy.spatial import KDTree
|
||||
@ -13,9 +13,95 @@ from .pbc import pbc_diff, pbc_points
|
||||
from .coordinates import Coordinates, CoordinateFrame, displacements_without_drift
|
||||
|
||||
|
||||
def log_indices(first: int, last: int, num: int = 100) -> np.ndarray:
|
||||
ls = np.logspace(0, np.log10(last - first + 1), num=num)
|
||||
return np.unique(np.int_(ls) - 1 + first)
|
||||
def _is_multi_selector(selection):
|
||||
if len(selection) == 0:
|
||||
return False
|
||||
elif (
|
||||
isinstance(selection[0], int)
|
||||
or isinstance(selection[0], bool)
|
||||
or isinstance(selection[0], np.integer)
|
||||
or isinstance(selection[0], np.bool_)
|
||||
):
|
||||
return False
|
||||
else:
|
||||
for indices in selection:
|
||||
if len(indices) == 0:
|
||||
continue
|
||||
elif (
|
||||
isinstance(indices[0], int)
|
||||
or isinstance(indices[0], bool)
|
||||
or isinstance(indices[0], np.integer)
|
||||
or isinstance(indices[0], np.bool_)
|
||||
):
|
||||
return True
|
||||
else:
|
||||
raise ValueError(
|
||||
"selector has more than two dimensions or does not "
|
||||
"contain int or bool types"
|
||||
)
|
||||
|
||||
|
||||
def _calc_correlation(
|
||||
frames: Coordinates,
|
||||
start_frame: CoordinateFrame,
|
||||
function: Callable,
|
||||
selection: np.ndarray,
|
||||
shifted_idx: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
if len(selection) == 0:
|
||||
correlation = np.zeros(len(shifted_idx))
|
||||
else:
|
||||
start = start_frame[selection]
|
||||
correlation = np.array(
|
||||
[
|
||||
function(start, frames[frame_index][selection])
|
||||
for frame_index in shifted_idx
|
||||
]
|
||||
)
|
||||
return correlation
|
||||
|
||||
|
||||
def _calc_correlation_multi(
|
||||
frames: Coordinates,
|
||||
start_frame: CoordinateFrame,
|
||||
function: Callable,
|
||||
selection: np.ndarray,
|
||||
shifted_idx: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
correlations = np.zeros((len(selection), len(shifted_idx)))
|
||||
for i, frame_index in enumerate(shifted_idx):
|
||||
frame = frames[frame_index]
|
||||
for j, current_selection in enumerate(selection):
|
||||
if len(selection) == 0:
|
||||
correlations[j, i] = 0
|
||||
else:
|
||||
correlations[j, i] = function(
|
||||
start_frame[current_selection], frame[current_selection]
|
||||
)
|
||||
return correlations
|
||||
|
||||
|
||||
def _average_correlation(result):
|
||||
averaged_result = []
|
||||
for n in range(result.shape[1]):
|
||||
clean_result = []
|
||||
for entry in result[:, n]:
|
||||
if np.all(entry == 0):
|
||||
continue
|
||||
else:
|
||||
clean_result.append(entry)
|
||||
averaged_result.append(np.average(np.array(clean_result), axis=0))
|
||||
return np.array(averaged_result)
|
||||
|
||||
|
||||
def _average_correlation_multi(result):
|
||||
clean_result = []
|
||||
for entry in result:
|
||||
if np.all(entry == 0):
|
||||
continue
|
||||
else:
|
||||
clean_result.append(entry)
|
||||
return np.average(np.array(clean_result), axis=0)
|
||||
|
||||
|
||||
@autosave_data(2)
|
||||
@ -28,113 +114,82 @@ def shifted_correlation(
|
||||
window: float = 0.5,
|
||||
average: bool = True,
|
||||
points: int = 100,
|
||||
) -> (np.ndarray, np.ndarray):
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Compute a time-dependent correlation function for a given trajectory.
|
||||
|
||||
To improve statistics, multiple (possibly overlapping) windows will be
|
||||
layed over the whole trajectory and the correlation is computed for them separately.
|
||||
The start frames of the windows are spaced linearly over the valid region of
|
||||
the trajectory (skipping frames in the beginning given by skip parameter).
|
||||
|
||||
The points within each window are spaced logarithmically.
|
||||
|
||||
Only a certain subset of the given atoms may be selected for each window
|
||||
individually using a selector function.
|
||||
|
||||
Note that this function is specifically optimized for multi selectors, which select
|
||||
multiple selection sets per window, for which the correlation is to be computed
|
||||
separately.
|
||||
|
||||
|
||||
Arguments
|
||||
---------
|
||||
function:
|
||||
The (correlation) function to evaluate.
|
||||
Should be of the form (CoordinateFrame, CoordinateFrame) -> float
|
||||
|
||||
frames:
|
||||
Trajectory to evaluate on
|
||||
|
||||
selector: (optional)
|
||||
Selection function so select only certain selection sets for each start frame.
|
||||
Should be of the form
|
||||
(CoordinateFrame) -> list[A]
|
||||
where A is something you can index an ndarray with.
|
||||
For example a list of indices or a bool array.
|
||||
Must return the same number of selection sets for every frame.
|
||||
|
||||
segments:
|
||||
Number of start frames
|
||||
|
||||
skip:
|
||||
Percentage of trajectory to skip from the start
|
||||
|
||||
window:
|
||||
Length of each segment given as percentage of trajectory
|
||||
|
||||
average:
|
||||
Whether to return averaged results.
|
||||
See below for details on the returned ndarray.
|
||||
|
||||
points:
|
||||
Number of points per segment
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
times: ndarray
|
||||
1d array of time differences to start frame
|
||||
result: ndarray
|
||||
2d ndarray of averaged (or non-averaged) correlations.
|
||||
|
||||
When average==True (default) the returned array will be of the shape (S, P)
|
||||
where S is the number of selection sets and P the number of points per window.
|
||||
For selection sets that where empty for all start frames all data points will be
|
||||
zero.
|
||||
|
||||
When average==False the returned array will be of shape (W, S) with
|
||||
dtype=object. The elements are either ndarrays of shape (P,) containing the
|
||||
correlation data for the specific window and selection set or None if the
|
||||
corresponding selection set was empty.
|
||||
W is the number of segments (windows).
|
||||
S and P are the same as for average==True.
|
||||
|
||||
"""
|
||||
Calculate the time series for a correlation function.
|
||||
|
||||
The times at which the correlation is calculated are determined by
|
||||
a logarithmic distribution.
|
||||
|
||||
Args:
|
||||
function: The function that should be correlated
|
||||
frames: The coordinates of the simulation data
|
||||
selector (opt.):
|
||||
A function that returns the indices depending on
|
||||
the staring frame for which particles the
|
||||
correlation should be calculated.
|
||||
segments (int, opt.):
|
||||
The number of segments the time window will be
|
||||
shifted
|
||||
skip (float, opt.):
|
||||
The fraction of the trajectory that will be skipped
|
||||
at the beginning, if this is None the start index
|
||||
of the frames slice will be used, which defaults
|
||||
to 0.1.
|
||||
window (float, opt.):
|
||||
The fraction of the simulation the time series will
|
||||
cover
|
||||
average (bool, opt.):
|
||||
If True, returns averaged correlation function
|
||||
points (int, opt.):
|
||||
The number of timeshifts for which the correlation
|
||||
should be calculated
|
||||
Returns:
|
||||
tuple:
|
||||
A list of length N that contains the timeshiftes of the frames at which
|
||||
the time series was calculated and a numpy array of shape (segments, N)
|
||||
that holds the (non-avaraged) correlation data
|
||||
|
||||
Example:
|
||||
Calculating the mean square displacement of a coordinate object
|
||||
named ``coords``:
|
||||
|
||||
>>> time, data = shifted_correlation(msd, coords)
|
||||
"""
|
||||
|
||||
def get_correlation(
|
||||
frames: CoordinateFrame,
|
||||
start_frame: CoordinateFrame,
|
||||
index: np.ndarray,
|
||||
shifted_idx: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
if len(index) == 0:
|
||||
correlation = np.zeros(len(shifted_idx))
|
||||
else:
|
||||
start = frames[start_frame][index]
|
||||
correlation = np.array(
|
||||
[function(start, frames[frame][index]) for frame in shifted_idx]
|
||||
)
|
||||
return correlation
|
||||
|
||||
def apply_selector(
|
||||
start_frame: CoordinateFrame,
|
||||
frames: CoordinateFrame,
|
||||
idx: np.ndarray,
|
||||
selector: Optional[Callable] = None,
|
||||
):
|
||||
shifted_idx = idx + start_frame
|
||||
|
||||
if selector is None:
|
||||
index = np.arange(len(frames[start_frame]))
|
||||
return get_correlation(frames, start_frame, index, shifted_idx)
|
||||
else:
|
||||
index = selector(frames[start_frame])
|
||||
if len(index) == 0:
|
||||
return np.zeros(len(shifted_idx))
|
||||
|
||||
elif (
|
||||
isinstance(index[0], int)
|
||||
or isinstance(index[0], bool)
|
||||
or isinstance(index[0], np.integer)
|
||||
or isinstance(index[0], np.bool_)
|
||||
):
|
||||
return get_correlation(frames, start_frame, index, shifted_idx)
|
||||
else:
|
||||
correlations = []
|
||||
for ind in index:
|
||||
if len(ind) == 0:
|
||||
correlations.append(np.zeros(len(shifted_idx)))
|
||||
|
||||
elif (
|
||||
isinstance(ind[0], int)
|
||||
or isinstance(ind[0], bool)
|
||||
or isinstance(ind[0], np.integer)
|
||||
or isinstance(ind[0], np.bool_)
|
||||
):
|
||||
correlations.append(
|
||||
get_correlation(frames, start_frame, ind, shifted_idx)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"selector has more than two dimensions or does not "
|
||||
"contain int or bool types"
|
||||
)
|
||||
return correlations
|
||||
|
||||
if 1 - skip < window:
|
||||
window = 1 - skip
|
||||
|
||||
start_frames = np.unique(
|
||||
start_frame_indices = np.unique(
|
||||
np.linspace(
|
||||
len(frames) * skip,
|
||||
len(frames) * (1 - window),
|
||||
@ -144,28 +199,44 @@ def shifted_correlation(
|
||||
)
|
||||
)
|
||||
|
||||
num_frames = int(len(frames) * window)
|
||||
ls = np.logspace(0, np.log10(num_frames + 1), num=points)
|
||||
idx = np.unique(np.int_(ls) - 1)
|
||||
t = np.array([frames[i].time for i in idx]) - frames[0].time
|
||||
|
||||
result = np.array(
|
||||
[
|
||||
apply_selector(start_frame, frames=frames, idx=idx, selector=selector)
|
||||
for start_frame in start_frames
|
||||
]
|
||||
num_frames_per_window = int(len(frames) * window)
|
||||
logspaced_indices = np.logspace(0, np.log10(num_frames_per_window + 1), num=points)
|
||||
logspaced_indices = np.unique(np.int_(logspaced_indices) - 1)
|
||||
logspaced_time = (
|
||||
np.array([frames[i].time for i in logspaced_indices]) - frames[0].time
|
||||
)
|
||||
|
||||
if selector is None:
|
||||
multi_selector = False
|
||||
else:
|
||||
selection = selector(frames[0])
|
||||
multi_selector = _is_multi_selector(selection)
|
||||
|
||||
result = []
|
||||
for start_frame_index in start_frame_indices:
|
||||
shifted_idx = logspaced_indices + start_frame_index
|
||||
start_frame = frames[start_frame_index]
|
||||
if selector is None:
|
||||
selection = np.arange(len(start_frame))
|
||||
else:
|
||||
selection = selector(start_frame)
|
||||
if multi_selector:
|
||||
result_segment = _calc_correlation_multi(
|
||||
frames, start_frame, function, selection, shifted_idx
|
||||
)
|
||||
else:
|
||||
result_segment = _calc_correlation(
|
||||
frames, start_frame, function, selection, shifted_idx
|
||||
)
|
||||
result.append(result_segment)
|
||||
result = np.array(result)
|
||||
|
||||
if average:
|
||||
clean_result = []
|
||||
for entry in result:
|
||||
if np.all(entry == 0):
|
||||
continue
|
||||
else:
|
||||
clean_result.append(entry)
|
||||
result = np.array(clean_result)
|
||||
result = np.average(result, axis=0)
|
||||
return t, result
|
||||
if multi_selector:
|
||||
result = _average_correlation_multi(result)
|
||||
else:
|
||||
result = _average_correlation(result)
|
||||
return logspaced_time, result
|
||||
|
||||
|
||||
def msd(
|
||||
@ -183,6 +254,12 @@ def msd(
|
||||
displacements = displacements_without_drift(start_frame, end_frame, trajectory)
|
||||
if axis == "all":
|
||||
return (displacements**2).sum(axis=1).mean()
|
||||
elif axis == "xy" or axis == "yx":
|
||||
return (displacements[:, [0, 1]] ** 2).sum(axis=1).mean()
|
||||
elif axis == "xz" or axis == "zx":
|
||||
return (displacements[:, [0, 2]] ** 2).sum(axis=1).mean()
|
||||
elif axis == "yz" or axis == "zy":
|
||||
return (displacements[:, [1, 2]] ** 2).sum(axis=1).mean()
|
||||
elif axis == "x":
|
||||
return (displacements[:, 0] ** 2).mean()
|
||||
elif axis == "y":
|
||||
@ -211,6 +288,15 @@ def isf(
|
||||
if axis == "all":
|
||||
distance = (displacements**2).sum(axis=1) ** 0.5
|
||||
return np.sinc(distance * q / np.pi).mean()
|
||||
elif axis == "xy" or axis == "yx":
|
||||
distance = (displacements[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
|
||||
return np.real(jn(0, distance * q)).mean()
|
||||
elif axis == "xz" or axis == "zx":
|
||||
distance = (displacements[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
|
||||
return np.real(jn(0, distance * q)).mean()
|
||||
elif axis == "yz" or axis == "zy":
|
||||
distance = (displacements[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
|
||||
return np.real(jn(0, distance * q)).mean()
|
||||
elif axis == "x":
|
||||
distance = np.abs(displacements[:, 0])
|
||||
return np.mean(np.cos(np.abs(q * distance)))
|
||||
@ -262,6 +348,12 @@ def van_hove_self(
|
||||
vectors = displacements_without_drift(start_frame, end_frame, trajectory)
|
||||
if axis == "all":
|
||||
delta_r = (vectors**2).sum(axis=1) ** 0.5
|
||||
elif axis == "xy" or axis == "yx":
|
||||
delta_r = (vectors[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
|
||||
elif axis == "xz" or axis == "zx":
|
||||
delta_r = (vectors[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
|
||||
elif axis == "yz" or axis == "zy":
|
||||
delta_r = (vectors[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
|
||||
elif axis == "x":
|
||||
delta_r = np.abs(vectors[:, 0])
|
||||
elif axis == "y":
|
||||
@ -423,6 +515,15 @@ def non_gaussian_parameter(
|
||||
if axis == "all":
|
||||
r = (vectors**2).sum(axis=1)
|
||||
dimensions = 3
|
||||
elif axis == "xy" or axis == "yx":
|
||||
r = (vectors[:, [0, 1]] ** 2).sum(axis=1)
|
||||
dimensions = 2
|
||||
elif axis == "xz" or axis == "zx":
|
||||
r = (vectors[:, [0, 2]] ** 2).sum(axis=1)
|
||||
dimensions = 2
|
||||
elif axis == "yz" or axis == "zy":
|
||||
r = (vectors[:, [1, 2]] ** 2).sum(axis=1)
|
||||
dimensions = 2
|
||||
elif axis == "x":
|
||||
r = vectors[:, 0] ** 2
|
||||
dimensions = 1
|
||||
|
@ -126,9 +126,6 @@ def rdf(
|
||||
particles_in_volume = int(
|
||||
np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1
|
||||
)
|
||||
print(atoms_a[:5])
|
||||
print(atoms_b[:5])
|
||||
print(" ")
|
||||
distances, indices = next_neighbors(
|
||||
atoms_a,
|
||||
atoms_b,
|
||||
@ -309,7 +306,7 @@ def next_neighbor_distribution(
|
||||
)[1]
|
||||
resname_nn = reference.residue_names[nn]
|
||||
count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1)
|
||||
return np.histogram(count_nn, bins=bins, normed=normed)[0]
|
||||
return np.histogram(count_nn, bins=bins, density=normed)[0]
|
||||
|
||||
|
||||
def hbonds(
|
||||
|
@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates
|
||||
from mdevaluate.pbc import pbc_points
|
||||
|
||||
|
||||
def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
|
||||
def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
|
||||
tree = KDTree(atoms)
|
||||
|
||||
dist, indices = tree.query(atoms, N + 1)
|
||||
@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray:
|
||||
|
||||
|
||||
def selector_ice(
|
||||
start_frame: CoordinateFrame,
|
||||
traj: Coordinates,
|
||||
oxygen_atoms_water: CoordinateFrame,
|
||||
chosen_ice_types: ArrayLike,
|
||||
combined: bool = True,
|
||||
next_neighbor_distance: float = 0.35,
|
||||
) -> NDArray:
|
||||
oxygen = traj.subset(atom_name="OW")
|
||||
atoms = oxygen[start_frame.step]
|
||||
atoms = atoms % np.diag(atoms.box)
|
||||
atoms_PBC = pbc_points(atoms, thickness=1)
|
||||
aij, indices = a_ij(atoms_PBC)
|
||||
atoms = oxygen_atoms_water
|
||||
atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2)
|
||||
aij, indices = calc_aij(atoms_PBC)
|
||||
tree = KDTree(atoms_PBC)
|
||||
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True)
|
||||
neighbors = tree.query_ball_point(
|
||||
atoms_PBC, next_neighbor_distance, return_length=True
|
||||
) - 1
|
||||
index_SOL = atoms_PBC.tolist().index(atoms[0].tolist())
|
||||
index_SOL = np.arange(index_SOL, index_SOL + len(atoms))
|
||||
ice_Types = classify_ice(aij, indices, neighbors, index_SOL)
|
||||
@ -117,9 +117,9 @@ def selector_ice(
|
||||
def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
|
||||
def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray:
|
||||
atoms_PBC = pbc_points(frame, thickness=1)
|
||||
aij, indices = a_ij(atoms_PBC)
|
||||
aij, indices = calc_aij(atoms_PBC)
|
||||
tree = KDTree(atoms_PBC)
|
||||
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True)
|
||||
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1
|
||||
index = selector(frame, atoms_PBC)
|
||||
ice_types_data = classify_ice(aij, indices, neighbors, index)
|
||||
ice_parts_data = count_ice_types(ice_types_data)
|
||||
@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
|
||||
def ice_clusters_traj(
|
||||
traj: Coordinates, segments: int = 10000, skip: float = 0.1
|
||||
) -> list:
|
||||
def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]:
|
||||
selection = selector_ice(frame, traj, [0, 1, 2])
|
||||
def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]:
|
||||
selection = selector_ice(frame, [0, 1, 2])
|
||||
if len(selection) == 0:
|
||||
return frame.time, []
|
||||
else:
|
||||
@ -194,6 +194,6 @@ def ice_clusters_traj(
|
||||
np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments))
|
||||
)
|
||||
all_clusters = [
|
||||
ice_clusters(traj[frame_index], traj) for frame_index in frame_indices
|
||||
ice_clusters(traj[frame_index]) for frame_index in frame_indices
|
||||
]
|
||||
return all_clusters
|
||||
|
@ -1,9 +1,9 @@
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike, NDArray
|
||||
from numpy.polynomial.polynomial import Polynomial as Poly
|
||||
import math
|
||||
from scipy.spatial import KDTree
|
||||
import pandas as pd
|
||||
import multiprocessing as mp
|
||||
@ -11,6 +11,56 @@ import multiprocessing as mp
|
||||
from ..coordinates import Coordinates
|
||||
|
||||
|
||||
def _pbc_points_reduced(
|
||||
coordinates: ArrayLike,
|
||||
pore_geometry: str,
|
||||
box: Optional[NDArray] = None,
|
||||
thickness: Optional[float] = None,
|
||||
) -> tuple[NDArray, NDArray]:
|
||||
if box is None:
|
||||
box = coordinates.box
|
||||
if pore_geometry == "cylindrical":
|
||||
grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]])
|
||||
indices = np.tile(np.arange(len(coordinates)), 3)
|
||||
elif pore_geometry == "slit":
|
||||
grid = np.array(
|
||||
[[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]]
|
||||
)
|
||||
indices = np.tile(np.arange(len(coordinates)), 9)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"pore_geometry is {pore_geometry}, should either be "
|
||||
f"'cylindrical' or 'slit'"
|
||||
)
|
||||
coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0)
|
||||
size = np.diag(box)
|
||||
|
||||
if thickness is not None:
|
||||
mask = np.all(coordinates_pbc > -thickness, axis=1)
|
||||
coordinates_pbc = coordinates_pbc[mask]
|
||||
indices = indices[mask]
|
||||
mask = np.all(coordinates_pbc < size + thickness, axis=1)
|
||||
coordinates_pbc = coordinates_pbc[mask]
|
||||
indices = indices[mask]
|
||||
|
||||
return coordinates_pbc, indices
|
||||
|
||||
|
||||
def _build_tree(points, box, r_max, pore_geometry):
|
||||
if np.all(np.diag(np.diag(box)) == box):
|
||||
tree = KDTree(points % box, boxsize=box)
|
||||
points_pbc_index = None
|
||||
else:
|
||||
points_pbc, points_pbc_index = _pbc_points_reduced(
|
||||
points,
|
||||
pore_geometry,
|
||||
box,
|
||||
thickness=r_max + 0.01,
|
||||
)
|
||||
tree = KDTree(points_pbc)
|
||||
return tree, points_pbc_index
|
||||
|
||||
|
||||
def occupation_matrix(
|
||||
trajectory: Coordinates,
|
||||
edge_length: float = 0.05,
|
||||
@ -28,11 +78,7 @@ def occupation_matrix(
|
||||
z_bins = np.arange(0, box[2][2] + edge_length, edge_length)
|
||||
bins = [x_bins, y_bins, z_bins]
|
||||
# Trajectory is split for parallel computing
|
||||
size = math.ceil(len(frame_indices) / nodes)
|
||||
indices = [
|
||||
np.arange(len(frame_indices))[i : i + size]
|
||||
for i in range(0, len(frame_indices), size)
|
||||
]
|
||||
indices = np.array_split(frame_indices, nodes)
|
||||
pool = mp.Pool(nodes)
|
||||
results = pool.map(
|
||||
partial(_calc_histogram, trajectory=trajectory, bins=bins), indices
|
||||
@ -72,19 +118,19 @@ def _calc_histogram(
|
||||
|
||||
|
||||
def find_maxima(
|
||||
occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05
|
||||
occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str
|
||||
) -> pd.DataFrame:
|
||||
maxima_df = occupation_df.copy()
|
||||
maxima_df["maxima"] = None
|
||||
points = np.array(maxima_df[["x", "y", "z"]])
|
||||
tree = KDTree(points, boxsize=box)
|
||||
all_neighbors = tree.query_ball_point(
|
||||
points, edge_length * 3 ** (1 / 2) + edge_length / 100
|
||||
)
|
||||
tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry)
|
||||
for i in range(len(maxima_df)):
|
||||
if maxima_df.loc[i, "maxima"] is not None:
|
||||
continue
|
||||
neighbors = np.array(all_neighbors[i])
|
||||
maxima_pos = maxima_df.loc[i, ["x", "y", "z"]]
|
||||
neighbors = np.array(tree.query_ball_point(maxima_pos, radius))
|
||||
if points_pbc_index is not None:
|
||||
neighbors = points_pbc_index[neighbors]
|
||||
neighbors = neighbors[neighbors != i]
|
||||
if len(neighbors) == 0:
|
||||
maxima_df.loc[i, "maxima"] = True
|
||||
@ -104,23 +150,39 @@ def _calc_energies(
|
||||
maxima_df: pd.DataFrame,
|
||||
bins: ArrayLike,
|
||||
box: NDArray,
|
||||
pore_geometry: str,
|
||||
T: float,
|
||||
nodes: int = 8,
|
||||
) -> NDArray:
|
||||
points = np.array(maxima_df[["x", "y", "z"]])
|
||||
tree = KDTree(points, boxsize=box)
|
||||
tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry)
|
||||
maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]]
|
||||
maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"])
|
||||
num_of_neighbors = np.max(
|
||||
tree.query_ball_point(maxima, bins[-1], return_length=True)
|
||||
)
|
||||
distances, indices = tree.query(
|
||||
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1]
|
||||
)
|
||||
split_maxima = []
|
||||
for i in range(0, len(maxima), 1000):
|
||||
split_maxima.append(maxima[i : i + 1000])
|
||||
|
||||
distances = []
|
||||
indices = []
|
||||
for maxima in split_maxima:
|
||||
distances_step, indices_step = tree.query(
|
||||
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes
|
||||
)
|
||||
distances.append(distances_step)
|
||||
indices.append(indices_step)
|
||||
distances = np.concatenate(distances)
|
||||
indices = np.concatenate(indices)
|
||||
all_energy_hist = []
|
||||
all_occupied_bins_hist = []
|
||||
if distances.ndim == 1:
|
||||
current_distances = distances[1:][distances[1:] <= bins[-1]]
|
||||
current_indices = indices[1:][distances[1:] <= bins[-1]]
|
||||
if points_pbc_index is None:
|
||||
current_indices = indices[1:][distances[1:] <= bins[-1]]
|
||||
else:
|
||||
current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]]
|
||||
energy = (
|
||||
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations)
|
||||
* T
|
||||
@ -131,8 +193,12 @@ def _calc_energies(
|
||||
return result
|
||||
for i, maxima_occupation in enumerate(maxima_occupations):
|
||||
current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]]
|
||||
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
|
||||
|
||||
if points_pbc_index is None:
|
||||
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
|
||||
else:
|
||||
current_indices = points_pbc_index[
|
||||
indices[i, 1:][distances[i, 1:] <= bins[-1]]
|
||||
]
|
||||
energy = (
|
||||
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation)
|
||||
* T
|
||||
@ -168,9 +234,12 @@ def distance_resolved_energies(
|
||||
distance_bins: ArrayLike,
|
||||
r_bins: ArrayLike,
|
||||
box: NDArray,
|
||||
pore_geometry: str,
|
||||
temperature: float,
|
||||
nodes: int = 8,
|
||||
) -> pd.DataFrame:
|
||||
results = []
|
||||
distances = []
|
||||
for i in range(len(distance_bins) - 1):
|
||||
maxima_indices = np.array(
|
||||
maxima_df.index[
|
||||
@ -179,11 +248,22 @@ def distance_resolved_energies(
|
||||
* (maxima_df["maxima"] == True)
|
||||
]
|
||||
)
|
||||
results.append(
|
||||
_calc_energies(maxima_indices, maxima_df, r_bins, box, temperature)
|
||||
)
|
||||
try:
|
||||
results.append(
|
||||
_calc_energies(
|
||||
maxima_indices,
|
||||
maxima_df,
|
||||
r_bins,
|
||||
box,
|
||||
pore_geometry,
|
||||
temperature,
|
||||
nodes,
|
||||
)
|
||||
)
|
||||
distances.append((distance_bins[i] + distance_bins[i + 1]) / 2)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
distances = (distance_bins[:-1] + distance_bins[1:]) / 2
|
||||
radii = (r_bins[:-1] + r_bins[1:]) / 2
|
||||
d = np.array([d for d in distances for r in radii])
|
||||
r = np.array([r for d in distances for r in radii])
|
||||
@ -192,7 +272,11 @@ def distance_resolved_energies(
|
||||
|
||||
|
||||
def find_energy_maxima(
|
||||
energy_df: pd.DataFrame, r_min: float, r_max: float
|
||||
energy_df: pd.DataFrame,
|
||||
r_min: float,
|
||||
r_max: float,
|
||||
r_eval: float = None,
|
||||
degree: int = 2,
|
||||
) -> pd.DataFrame:
|
||||
distances = []
|
||||
energies = []
|
||||
@ -201,6 +285,9 @@ def find_energy_maxima(
|
||||
x = np.array(data_d["r"])
|
||||
y = np.array(data_d["energy"])
|
||||
mask = (x >= r_min) * (x <= r_max)
|
||||
p3 = Poly.fit(x[mask], y[mask], deg=2)
|
||||
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
|
||||
p3 = Poly.fit(x[mask], y[mask], deg=degree)
|
||||
if r_eval is None:
|
||||
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
|
||||
else:
|
||||
energies.append(p3(r_eval))
|
||||
return pd.DataFrame({"d": distances, "energy": energies})
|
||||
|
@ -265,7 +265,7 @@ def LSI(
|
||||
)
|
||||
distributions = np.array(
|
||||
[
|
||||
LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None)
|
||||
LSI_distribution(trajectory[frame_index], bins, selector=None)
|
||||
for frame_index in frame_indices
|
||||
]
|
||||
)
|
||||
|
@ -156,7 +156,7 @@ def nojump(frame: CoordinateFrame, usecache: bool = True) -> CoordinateFrame:
|
||||
[m[i0 : abstep + 1].sum(axis=0) for m in reader.nojump_matrices]
|
||||
).T
|
||||
)
|
||||
* frame.box.diagonal()
|
||||
@ frame.box
|
||||
)
|
||||
|
||||
reader._nojump_cache[abstep] = delta
|
||||
@ -173,7 +173,7 @@ def nojump(frame: CoordinateFrame, usecache: bool = True) -> CoordinateFrame:
|
||||
]
|
||||
).T
|
||||
)
|
||||
* frame.box.diagonal()
|
||||
@ frame.box
|
||||
)
|
||||
return frame - delta
|
||||
|
||||
|
@ -152,7 +152,7 @@ def nojump_load_filename(reader: BaseReader):
|
||||
)
|
||||
if os.path.exists(full_path_fallback):
|
||||
return full_path_fallback
|
||||
if os.path.exists(fname) or is_writeable(directory):
|
||||
if os.path.exists(full_path) or is_writeable(directory):
|
||||
return full_path
|
||||
else:
|
||||
user_data_dir = os.path.join("/data/", os.environ["HOME"].split("/")[-1])
|
||||
@ -187,19 +187,33 @@ def nojump_save_filename(reader: BaseReader):
|
||||
|
||||
def parse_jumps(trajectory: Coordinates):
|
||||
prev = trajectory[0].whole
|
||||
box = prev.box.diagonal()
|
||||
box = prev.box
|
||||
SparseData = namedtuple("SparseData", ["data", "row", "col"])
|
||||
jump_data = (
|
||||
SparseData(data=array("b"), row=array("l"), col=array("l")),
|
||||
SparseData(data=array("b"), row=array("l"), col=array("l")),
|
||||
SparseData(data=array("b"), row=array("l"), col=array("l")),
|
||||
)
|
||||
|
||||
for i, curr in enumerate(trajectory):
|
||||
if i % 500 == 0:
|
||||
logger.debug("Parse jumps Step: %d", i)
|
||||
delta = ((curr - prev) / box).round().astype(np.int8)
|
||||
r3 = np.subtract(curr, prev)
|
||||
delta_z = np.array(np.rint(np.divide(r3[:, 2], box[2][2])), dtype=np.int8)
|
||||
r2 = np.subtract(
|
||||
r3,
|
||||
(np.rint(np.divide(r3[:, 2], box[2][2])))[:, np.newaxis]
|
||||
* box[2][np.newaxis, :],
|
||||
)
|
||||
delta_y = np.array(np.rint(np.divide(r2[:, 1], box[1][1])), dtype=np.int8)
|
||||
r1 = np.subtract(
|
||||
r2,
|
||||
(np.rint(np.divide(r2[:, 1], box[1][1])))[:, np.newaxis]
|
||||
* box[1][np.newaxis, :],
|
||||
)
|
||||
delta_x = np.array(np.rint(np.divide(r1[:, 0], box[0][0])), dtype=np.int8)
|
||||
delta = np.array([delta_x, delta_y, delta_z]).T
|
||||
prev = curr
|
||||
box = prev.box
|
||||
for d in range(3):
|
||||
(col,) = np.where(delta[:, d] != 0)
|
||||
jump_data[d].col.extend(col)
|
||||
|
@ -177,7 +177,7 @@ def coherent_sum(
|
||||
func: Callable[[ArrayLike, ArrayLike], float],
|
||||
coord_a: ArrayLike,
|
||||
coord_b: ArrayLike,
|
||||
) -> float:
|
||||
) -> NDArray:
|
||||
"""
|
||||
Perform a coherent sum over two arrays :math:`A, B`.
|
||||
|
||||
|
57
test/test_correlation.py
Normal file
57
test/test_correlation.py
Normal file
@ -0,0 +1,57 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
import mdevaluate
|
||||
from mdevaluate import correlation
|
||||
import numpy as np
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trajectory(request):
|
||||
return mdevaluate.open(os.path.join(os.path.dirname(__file__), "data/water"))
|
||||
|
||||
|
||||
def test_shifted_correlation(trajectory):
|
||||
test_array = np.array([100, 82, 65, 49, 39, 29, 20, 13, 7])
|
||||
OW = trajectory.subset(atom_name="OW")
|
||||
t, result = correlation.shifted_correlation(
|
||||
correlation.isf, OW, segments=10, skip=0.1, points=10
|
||||
)
|
||||
assert (np.array(result * 100, dtype=int) == test_array).all()
|
||||
|
||||
|
||||
def test_shifted_correlation_no_average(trajectory):
|
||||
t, result = correlation.shifted_correlation(
|
||||
correlation.isf, trajectory, segments=10, skip=0.1, points=5, average=False
|
||||
)
|
||||
assert result.shape == (10, 5)
|
||||
|
||||
|
||||
def test_shifted_correlation_selector(trajectory):
|
||||
test_array = np.array([100, 82, 64, 48, 37, 28, 19, 11, 5])
|
||||
|
||||
def selector(frame):
|
||||
index = np.argwhere((frame[:, 0] >= 0) * (frame[:, 0] < 1))
|
||||
return index.flatten()
|
||||
|
||||
OW = trajectory.subset(atom_name="OW")
|
||||
t, result = correlation.shifted_correlation(
|
||||
correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
|
||||
)
|
||||
assert (np.array(result * 100, dtype=int) == test_array).all()
|
||||
|
||||
|
||||
def test_shifted_correlation_multi_selector(trajectory):
|
||||
def selector(frame):
|
||||
indices = []
|
||||
for i in range(3):
|
||||
x = frame[:, 0].flatten()
|
||||
index = np.argwhere((x >= i) * (x < i + 1))
|
||||
indices.append(index.flatten())
|
||||
return indices
|
||||
|
||||
OW = trajectory.subset(atom_name="OW")
|
||||
t, result = correlation.shifted_correlation(
|
||||
correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
|
||||
)
|
||||
assert result.shape == (3, 9)
|
@ -13,42 +13,24 @@ def trajectory(request):
|
||||
|
||||
|
||||
def test_get_fel(trajectory):
|
||||
test_array = np.array(
|
||||
[
|
||||
174.46253634,
|
||||
174.60905476,
|
||||
178.57658092,
|
||||
182.43001192,
|
||||
180.57916378,
|
||||
176.49886217,
|
||||
178.96018547,
|
||||
181.13561782,
|
||||
178.31026314,
|
||||
176.08903996,
|
||||
180.71215345,
|
||||
181.59703135,
|
||||
180.34329368,
|
||||
187.02474488,
|
||||
197.99167477,
|
||||
214.05788031,
|
||||
245.58571282,
|
||||
287.52457507,
|
||||
331.53492965,
|
||||
]
|
||||
)
|
||||
test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.])
|
||||
|
||||
OW = trajectory.subset(atom_name="OW")
|
||||
|
||||
box = np.diag(trajectory[0].box)
|
||||
box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
|
||||
occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000)
|
||||
maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05)
|
||||
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2)
|
||||
r_bins = np.arange(0, 2, 0.02)
|
||||
distance_bins = np.arange(0.05, 2.05, 0.1)
|
||||
box = trajectory[0].box
|
||||
box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
|
||||
occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10)
|
||||
radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100
|
||||
maxima_matrix = fel.find_maxima(
|
||||
occupation_matrix,
|
||||
box=box_voxels,
|
||||
radius=radius_maxima,
|
||||
pore_geometry="cylindrical",
|
||||
)
|
||||
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2)
|
||||
r_bins = np.arange(0, 0.5, 0.02)
|
||||
distance_bins = np.arange(1.8, 1.9, 0.01)
|
||||
energy_df = fel.distance_resolved_energies(
|
||||
maxima_matrix, distance_bins, r_bins, box, 225
|
||||
maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225
|
||||
)
|
||||
result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15)
|
||||
|
||||
assert (np.round(np.array(result["energy"])) == np.round(test_array)).all()
|
||||
|
Reference in New Issue
Block a user