27 Commits

Author SHA1 Message Date
adae920527 Fixed formatting 2024-06-14 10:12:50 +02:00
11793d7960 Added autosave to shifted_correlation 2024-06-14 10:12:18 +02:00
55b06fa61d Improved performance and split up shifted_correlation 2024-06-14 10:11:20 +02:00
3aa91d7482 Updated version number and type hint 2024-06-14 10:10:58 +02:00
7d8c5d849d Added tests for shifted_correlation 2024-06-14 10:10:37 +02:00
c09549902a Added Robins adjustments for FEL 2024-05-27 14:27:09 +02:00
b7bb8cb379 Merge branch 'refs/heads/mdeval_dev' 2024-05-02 16:18:18 +02:00
33c4756e34 Fixed frame-index selection in occupancy calculation. 2024-05-02 16:17:54 +02:00
7b9f8b6773 Merge branch 'mdeval_dev' 2024-03-05 13:58:15 +01:00
c89cead81c Moved KDTree build up in its own function and made find_maxima query for each point separately. 2024-03-05 13:57:55 +01:00
31eb145a13 Merge branch 'mdeval_dev'
# Conflicts:
#	src/mdevaluate/coordinates.py
2024-02-26 14:20:12 +01:00
af3758cbef Fixed iter in Coordinates 2024-02-26 14:18:23 +01:00
93d020a4de Updated fel test to run faster 2024-02-26 14:14:29 +01:00
b5395098ce Fixed iter of Coordinates after last change 2024-02-26 13:57:44 +01:00
5e80701562 Coordinates returns now correct length after slicing 2024-02-26 13:50:46 +01:00
363e420cd8 Fixed chill ice_cluster_traj() function 2024-02-16 11:11:17 +01:00
6b77ef78e1 Fixed chill selector_ice() function 2024-02-15 11:49:20 +01:00
0c940115af Fixed LSI 2024-02-08 17:24:44 +01:00
b0f29907df Updated fel to triclinic box case 2024-02-08 17:24:22 +01:00
37bf496b21 Fixed reading nojump_matrices from non-writable directories 2024-02-06 13:29:38 +01:00
befaef2dfa Fixed van Hove calculation in plane 2024-02-06 09:38:38 +01:00
8ea7da5d2f Removed prints from number_of_neighbors 2024-01-31 15:36:51 +01:00
b405842452 Changed version number 2024-01-31 11:32:52 +01:00
f5cf453d61 Fixed isf for 2d case 2024-01-31 11:32:29 +01:00
4394f70530 Fixed pyproject.toml 2024-01-31 09:02:57 +01:00
298da3818d Changed parameter normed to density in np.histogram 2024-01-31 09:02:36 +01:00
d9278eed83 Removed prints from rdf 2024-01-31 08:59:39 +01:00
11 changed files with 418 additions and 228 deletions

View File

@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "mdevaluate" name = "mdevaluate"
version = "24.01" version = "24.06"
dependencies = [ dependencies = [
"mdanalysis", "mdanalysis",
"pandas", "pandas",
"dask", "dask",
"pathos", "pathos",
"tables" "tables",
"pyedr" "pyedr"
] ]

View File

@ -218,7 +218,7 @@ class Coordinates:
self.get_frame.clear_cache() self.get_frame.clear_cache()
def __iter__(self): def __iter__(self):
for i in range(len(self))[self._slice]: for i in range(len(self.frames))[self._slice]:
yield self[i] yield self[i]
@singledispatchmethod @singledispatchmethod
@ -232,7 +232,7 @@ class Coordinates:
return sliced return sliced
def __len__(self): def __len__(self):
return len(self.frames) return len(self.frames[self._slice])
def __checksum__(self): def __checksum__(self):
return checksum(self.frames, self.atom_filter, self._slice, self.mode) return checksum(self.frames, self.atom_filter, self._slice, self.mode)
@ -692,10 +692,6 @@ def number_of_neighbors(
elif not distinct: elif not distinct:
dnn = 1 dnn = 1
print(atoms[:5])
print(query_atoms[:5])
print(" ")
box = atoms.box box = atoms.box
if np.all(np.diag(np.diag(box)) == box): if np.all(np.diag(np.diag(box)) == box):
atoms = atoms % np.diag(box) atoms = atoms % np.diag(box)

View File

@ -13,9 +13,95 @@ from .pbc import pbc_diff, pbc_points
from .coordinates import Coordinates, CoordinateFrame, displacements_without_drift from .coordinates import Coordinates, CoordinateFrame, displacements_without_drift
def log_indices(first: int, last: int, num: int = 100) -> np.ndarray: def _is_multi_selector(selection):
ls = np.logspace(0, np.log10(last - first + 1), num=num) if len(selection) == 0:
return np.unique(np.int_(ls) - 1 + first) return False
elif (
isinstance(selection[0], int)
or isinstance(selection[0], bool)
or isinstance(selection[0], np.integer)
or isinstance(selection[0], np.bool_)
):
return False
else:
for indices in selection:
if len(indices) == 0:
continue
elif (
isinstance(indices[0], int)
or isinstance(indices[0], bool)
or isinstance(indices[0], np.integer)
or isinstance(indices[0], np.bool_)
):
return True
else:
raise ValueError(
"selector has more than two dimensions or does not "
"contain int or bool types"
)
def _calc_correlation(
frames: Coordinates,
start_frame: CoordinateFrame,
function: Callable,
selection: np.ndarray,
shifted_idx: np.ndarray,
) -> np.ndarray:
if len(selection) == 0:
correlation = np.zeros(len(shifted_idx))
else:
start = start_frame[selection]
correlation = np.array(
[
function(start, frames[frame_index][selection])
for frame_index in shifted_idx
]
)
return correlation
def _calc_correlation_multi(
frames: Coordinates,
start_frame: CoordinateFrame,
function: Callable,
selection: np.ndarray,
shifted_idx: np.ndarray,
) -> np.ndarray:
correlations = np.zeros((len(selection), len(shifted_idx)))
for i, frame_index in enumerate(shifted_idx):
frame = frames[frame_index]
for j, current_selection in enumerate(selection):
if len(selection) == 0:
correlations[j, i] = 0
else:
correlations[j, i] = function(
start_frame[current_selection], frame[current_selection]
)
return correlations
def _average_correlation(result):
averaged_result = []
for n in range(result.shape[1]):
clean_result = []
for entry in result[:, n]:
if np.all(entry == 0):
continue
else:
clean_result.append(entry)
averaged_result.append(np.average(np.array(clean_result), axis=0))
return np.array(averaged_result)
def _average_correlation_multi(result):
clean_result = []
for entry in result:
if np.all(entry == 0):
continue
else:
clean_result.append(entry)
return np.average(np.array(clean_result), axis=0)
@autosave_data(2) @autosave_data(2)
@ -28,113 +114,82 @@ def shifted_correlation(
window: float = 0.5, window: float = 0.5,
average: bool = True, average: bool = True,
points: int = 100, points: int = 100,
) -> (np.ndarray, np.ndarray): ) -> tuple[np.ndarray, np.ndarray]:
"""Compute a time-dependent correlation function for a given trajectory.
To improve statistics, multiple (possibly overlapping) windows will be
layed over the whole trajectory and the correlation is computed for them separately.
The start frames of the windows are spaced linearly over the valid region of
the trajectory (skipping frames in the beginning given by skip parameter).
The points within each window are spaced logarithmically.
Only a certain subset of the given atoms may be selected for each window
individually using a selector function.
Note that this function is specifically optimized for multi selectors, which select
multiple selection sets per window, for which the correlation is to be computed
separately.
Arguments
---------
function:
The (correlation) function to evaluate.
Should be of the form (CoordinateFrame, CoordinateFrame) -> float
frames:
Trajectory to evaluate on
selector: (optional)
Selection function so select only certain selection sets for each start frame.
Should be of the form
(CoordinateFrame) -> list[A]
where A is something you can index an ndarray with.
For example a list of indices or a bool array.
Must return the same number of selection sets for every frame.
segments:
Number of start frames
skip:
Percentage of trajectory to skip from the start
window:
Length of each segment given as percentage of trajectory
average:
Whether to return averaged results.
See below for details on the returned ndarray.
points:
Number of points per segment
Returns
-------
times: ndarray
1d array of time differences to start frame
result: ndarray
2d ndarray of averaged (or non-averaged) correlations.
When average==True (default) the returned array will be of the shape (S, P)
where S is the number of selection sets and P the number of points per window.
For selection sets that where empty for all start frames all data points will be
zero.
When average==False the returned array will be of shape (W, S) with
dtype=object. The elements are either ndarrays of shape (P,) containing the
correlation data for the specific window and selection set or None if the
corresponding selection set was empty.
W is the number of segments (windows).
S and P are the same as for average==True.
""" """
Calculate the time series for a correlation function.
The times at which the correlation is calculated are determined by
a logarithmic distribution.
Args:
function: The function that should be correlated
frames: The coordinates of the simulation data
selector (opt.):
A function that returns the indices depending on
the staring frame for which particles the
correlation should be calculated.
segments (int, opt.):
The number of segments the time window will be
shifted
skip (float, opt.):
The fraction of the trajectory that will be skipped
at the beginning, if this is None the start index
of the frames slice will be used, which defaults
to 0.1.
window (float, opt.):
The fraction of the simulation the time series will
cover
average (bool, opt.):
If True, returns averaged correlation function
points (int, opt.):
The number of timeshifts for which the correlation
should be calculated
Returns:
tuple:
A list of length N that contains the timeshiftes of the frames at which
the time series was calculated and a numpy array of shape (segments, N)
that holds the (non-avaraged) correlation data
Example:
Calculating the mean square displacement of a coordinate object
named ``coords``:
>>> time, data = shifted_correlation(msd, coords)
"""
def get_correlation(
frames: CoordinateFrame,
start_frame: CoordinateFrame,
index: np.ndarray,
shifted_idx: np.ndarray,
) -> np.ndarray:
if len(index) == 0:
correlation = np.zeros(len(shifted_idx))
else:
start = frames[start_frame][index]
correlation = np.array(
[function(start, frames[frame][index]) for frame in shifted_idx]
)
return correlation
def apply_selector(
start_frame: CoordinateFrame,
frames: CoordinateFrame,
idx: np.ndarray,
selector: Optional[Callable] = None,
):
shifted_idx = idx + start_frame
if selector is None:
index = np.arange(len(frames[start_frame]))
return get_correlation(frames, start_frame, index, shifted_idx)
else:
index = selector(frames[start_frame])
if len(index) == 0:
return np.zeros(len(shifted_idx))
elif (
isinstance(index[0], int)
or isinstance(index[0], bool)
or isinstance(index[0], np.integer)
or isinstance(index[0], np.bool_)
):
return get_correlation(frames, start_frame, index, shifted_idx)
else:
correlations = []
for ind in index:
if len(ind) == 0:
correlations.append(np.zeros(len(shifted_idx)))
elif (
isinstance(ind[0], int)
or isinstance(ind[0], bool)
or isinstance(ind[0], np.integer)
or isinstance(ind[0], np.bool_)
):
correlations.append(
get_correlation(frames, start_frame, ind, shifted_idx)
)
else:
raise ValueError(
"selector has more than two dimensions or does not "
"contain int or bool types"
)
return correlations
if 1 - skip < window: if 1 - skip < window:
window = 1 - skip window = 1 - skip
start_frames = np.unique( start_frame_indices = np.unique(
np.linspace( np.linspace(
len(frames) * skip, len(frames) * skip,
len(frames) * (1 - window), len(frames) * (1 - window),
@ -144,28 +199,44 @@ def shifted_correlation(
) )
) )
num_frames = int(len(frames) * window) num_frames_per_window = int(len(frames) * window)
ls = np.logspace(0, np.log10(num_frames + 1), num=points) logspaced_indices = np.logspace(0, np.log10(num_frames_per_window + 1), num=points)
idx = np.unique(np.int_(ls) - 1) logspaced_indices = np.unique(np.int_(logspaced_indices) - 1)
t = np.array([frames[i].time for i in idx]) - frames[0].time logspaced_time = (
np.array([frames[i].time for i in logspaced_indices]) - frames[0].time
result = np.array(
[
apply_selector(start_frame, frames=frames, idx=idx, selector=selector)
for start_frame in start_frames
]
) )
if average: if selector is None:
clean_result = [] multi_selector = False
for entry in result:
if np.all(entry == 0):
continue
else: else:
clean_result.append(entry) selection = selector(frames[0])
result = np.array(clean_result) multi_selector = _is_multi_selector(selection)
result = np.average(result, axis=0)
return t, result result = []
for start_frame_index in start_frame_indices:
shifted_idx = logspaced_indices + start_frame_index
start_frame = frames[start_frame_index]
if selector is None:
selection = np.arange(len(start_frame))
else:
selection = selector(start_frame)
if multi_selector:
result_segment = _calc_correlation_multi(
frames, start_frame, function, selection, shifted_idx
)
else:
result_segment = _calc_correlation(
frames, start_frame, function, selection, shifted_idx
)
result.append(result_segment)
result = np.array(result)
if average:
if multi_selector:
result = _average_correlation_multi(result)
else:
result = _average_correlation(result)
return logspaced_time, result
def msd( def msd(
@ -219,13 +290,13 @@ def isf(
return np.sinc(distance * q / np.pi).mean() return np.sinc(distance * q / np.pi).mean()
elif axis == "xy" or axis == "yx": elif axis == "xy" or axis == "yx":
distance = (displacements[:, [0, 1]] ** 2).sum(axis=1) ** 0.5 distance = (displacements[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
return np.real(jn(0, distance * q / np.pi)).mean() return np.real(jn(0, distance * q)).mean()
elif axis == "xz" or axis == "zx": elif axis == "xz" or axis == "zx":
distance = (displacements[:, [0, 2]] ** 2).sum(axis=1) ** 0.5 distance = (displacements[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
return np.real(jn(0, distance * q / np.pi)).mean() return np.real(jn(0, distance * q)).mean()
elif axis == "yz" or axis == "zy": elif axis == "yz" or axis == "zy":
distance = (displacements[:, [1, 2]] ** 2).sum(axis=1) ** 0.5 distance = (displacements[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
return np.real(jn(0, distance * q / np.pi)).mean() return np.real(jn(0, distance * q)).mean()
elif axis == "x": elif axis == "x":
distance = np.abs(displacements[:, 0]) distance = np.abs(displacements[:, 0])
return np.mean(np.cos(np.abs(q * distance))) return np.mean(np.cos(np.abs(q * distance)))
@ -278,11 +349,11 @@ def van_hove_self(
if axis == "all": if axis == "all":
delta_r = (vectors**2).sum(axis=1) ** 0.5 delta_r = (vectors**2).sum(axis=1) ** 0.5
elif axis == "xy" or axis == "yx": elif axis == "xy" or axis == "yx":
return (vectors[:, [0, 1]]**2).sum(axis=1) ** 0.5 delta_r = (vectors[:, [0, 1]] ** 2).sum(axis=1) ** 0.5
elif axis == "xz" or axis == "zx": elif axis == "xz" or axis == "zx":
return (vectors[:, [0, 2]]**2).sum(axis=1) ** 0.5 delta_r = (vectors[:, [0, 2]] ** 2).sum(axis=1) ** 0.5
elif axis == "yz" or axis == "zy": elif axis == "yz" or axis == "zy":
return (vectors[:, [1, 2]]**2).sum(axis=1) ** 0.5 delta_r = (vectors[:, [1, 2]] ** 2).sum(axis=1) ** 0.5
elif axis == "x": elif axis == "x":
delta_r = np.abs(vectors[:, 0]) delta_r = np.abs(vectors[:, 0])
elif axis == "y": elif axis == "y":

View File

@ -126,9 +126,6 @@ def rdf(
particles_in_volume = int( particles_in_volume = int(
np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1 np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1
) )
print(atoms_a[:5])
print(atoms_b[:5])
print(" ")
distances, indices = next_neighbors( distances, indices = next_neighbors(
atoms_a, atoms_a,
atoms_b, atoms_b,
@ -309,7 +306,7 @@ def next_neighbor_distribution(
)[1] )[1]
resname_nn = reference.residue_names[nn] resname_nn = reference.residue_names[nn]
count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1) count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1)
return np.histogram(count_nn, bins=bins, normed=normed)[0] return np.histogram(count_nn, bins=bins, density=normed)[0]
def hbonds( def hbonds(

View File

@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates
from mdevaluate.pbc import pbc_points from mdevaluate.pbc import pbc_points
def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]: def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
tree = KDTree(atoms) tree = KDTree(atoms)
dist, indices = tree.query(atoms, N + 1) dist, indices = tree.query(atoms, N + 1)
@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray:
def selector_ice( def selector_ice(
start_frame: CoordinateFrame, oxygen_atoms_water: CoordinateFrame,
traj: Coordinates,
chosen_ice_types: ArrayLike, chosen_ice_types: ArrayLike,
combined: bool = True, combined: bool = True,
next_neighbor_distance: float = 0.35,
) -> NDArray: ) -> NDArray:
oxygen = traj.subset(atom_name="OW") atoms = oxygen_atoms_water
atoms = oxygen[start_frame.step] atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2)
atoms = atoms % np.diag(atoms.box) aij, indices = calc_aij(atoms_PBC)
atoms_PBC = pbc_points(atoms, thickness=1)
aij, indices = a_ij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(
atoms_PBC, next_neighbor_distance, return_length=True
) - 1
index_SOL = atoms_PBC.tolist().index(atoms[0].tolist()) index_SOL = atoms_PBC.tolist().index(atoms[0].tolist())
index_SOL = np.arange(index_SOL, index_SOL + len(atoms)) index_SOL = np.arange(index_SOL, index_SOL + len(atoms))
ice_Types = classify_ice(aij, indices, neighbors, index_SOL) ice_Types = classify_ice(aij, indices, neighbors, index_SOL)
@ -117,9 +117,9 @@ def selector_ice(
def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame: def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray: def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray:
atoms_PBC = pbc_points(frame, thickness=1) atoms_PBC = pbc_points(frame, thickness=1)
aij, indices = a_ij(atoms_PBC) aij, indices = calc_aij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1
index = selector(frame, atoms_PBC) index = selector(frame, atoms_PBC)
ice_types_data = classify_ice(aij, indices, neighbors, index) ice_types_data = classify_ice(aij, indices, neighbors, index)
ice_parts_data = count_ice_types(ice_types_data) ice_parts_data = count_ice_types(ice_types_data)
@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_clusters_traj( def ice_clusters_traj(
traj: Coordinates, segments: int = 10000, skip: float = 0.1 traj: Coordinates, segments: int = 10000, skip: float = 0.1
) -> list: ) -> list:
def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]: def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]:
selection = selector_ice(frame, traj, [0, 1, 2]) selection = selector_ice(frame, [0, 1, 2])
if len(selection) == 0: if len(selection) == 0:
return frame.time, [] return frame.time, []
else: else:
@ -194,6 +194,6 @@ def ice_clusters_traj(
np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments)) np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments))
) )
all_clusters = [ all_clusters = [
ice_clusters(traj[frame_index], traj) for frame_index in frame_indices ice_clusters(traj[frame_index]) for frame_index in frame_indices
] ]
return all_clusters return all_clusters

View File

@ -1,9 +1,9 @@
from functools import partial from functools import partial
from typing import Optional
import numpy as np import numpy as np
from numpy.typing import ArrayLike, NDArray from numpy.typing import ArrayLike, NDArray
from numpy.polynomial.polynomial import Polynomial as Poly from numpy.polynomial.polynomial import Polynomial as Poly
import math
from scipy.spatial import KDTree from scipy.spatial import KDTree
import pandas as pd import pandas as pd
import multiprocessing as mp import multiprocessing as mp
@ -11,6 +11,56 @@ import multiprocessing as mp
from ..coordinates import Coordinates from ..coordinates import Coordinates
def _pbc_points_reduced(
coordinates: ArrayLike,
pore_geometry: str,
box: Optional[NDArray] = None,
thickness: Optional[float] = None,
) -> tuple[NDArray, NDArray]:
if box is None:
box = coordinates.box
if pore_geometry == "cylindrical":
grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]])
indices = np.tile(np.arange(len(coordinates)), 3)
elif pore_geometry == "slit":
grid = np.array(
[[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]]
)
indices = np.tile(np.arange(len(coordinates)), 9)
else:
raise ValueError(
f"pore_geometry is {pore_geometry}, should either be "
f"'cylindrical' or 'slit'"
)
coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0)
size = np.diag(box)
if thickness is not None:
mask = np.all(coordinates_pbc > -thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
mask = np.all(coordinates_pbc < size + thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
return coordinates_pbc, indices
def _build_tree(points, box, r_max, pore_geometry):
if np.all(np.diag(np.diag(box)) == box):
tree = KDTree(points % box, boxsize=box)
points_pbc_index = None
else:
points_pbc, points_pbc_index = _pbc_points_reduced(
points,
pore_geometry,
box,
thickness=r_max + 0.01,
)
tree = KDTree(points_pbc)
return tree, points_pbc_index
def occupation_matrix( def occupation_matrix(
trajectory: Coordinates, trajectory: Coordinates,
edge_length: float = 0.05, edge_length: float = 0.05,
@ -28,11 +78,7 @@ def occupation_matrix(
z_bins = np.arange(0, box[2][2] + edge_length, edge_length) z_bins = np.arange(0, box[2][2] + edge_length, edge_length)
bins = [x_bins, y_bins, z_bins] bins = [x_bins, y_bins, z_bins]
# Trajectory is split for parallel computing # Trajectory is split for parallel computing
size = math.ceil(len(frame_indices) / nodes) indices = np.array_split(frame_indices, nodes)
indices = [
np.arange(len(frame_indices))[i : i + size]
for i in range(0, len(frame_indices), size)
]
pool = mp.Pool(nodes) pool = mp.Pool(nodes)
results = pool.map( results = pool.map(
partial(_calc_histogram, trajectory=trajectory, bins=bins), indices partial(_calc_histogram, trajectory=trajectory, bins=bins), indices
@ -72,19 +118,19 @@ def _calc_histogram(
def find_maxima( def find_maxima(
occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05 occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str
) -> pd.DataFrame: ) -> pd.DataFrame:
maxima_df = occupation_df.copy() maxima_df = occupation_df.copy()
maxima_df["maxima"] = None maxima_df["maxima"] = None
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry)
all_neighbors = tree.query_ball_point(
points, edge_length * 3 ** (1 / 2) + edge_length / 100
)
for i in range(len(maxima_df)): for i in range(len(maxima_df)):
if maxima_df.loc[i, "maxima"] is not None: if maxima_df.loc[i, "maxima"] is not None:
continue continue
neighbors = np.array(all_neighbors[i]) maxima_pos = maxima_df.loc[i, ["x", "y", "z"]]
neighbors = np.array(tree.query_ball_point(maxima_pos, radius))
if points_pbc_index is not None:
neighbors = points_pbc_index[neighbors]
neighbors = neighbors[neighbors != i] neighbors = neighbors[neighbors != i]
if len(neighbors) == 0: if len(neighbors) == 0:
maxima_df.loc[i, "maxima"] = True maxima_df.loc[i, "maxima"] = True
@ -104,23 +150,39 @@ def _calc_energies(
maxima_df: pd.DataFrame, maxima_df: pd.DataFrame,
bins: ArrayLike, bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
T: float, T: float,
nodes: int = 8,
) -> NDArray: ) -> NDArray:
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry)
maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]] maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]]
maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"]) maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"])
num_of_neighbors = np.max( num_of_neighbors = np.max(
tree.query_ball_point(maxima, bins[-1], return_length=True) tree.query_ball_point(maxima, bins[-1], return_length=True)
) )
distances, indices = tree.query( split_maxima = []
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1] for i in range(0, len(maxima), 1000):
split_maxima.append(maxima[i : i + 1000])
distances = []
indices = []
for maxima in split_maxima:
distances_step, indices_step = tree.query(
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes
) )
distances.append(distances_step)
indices.append(indices_step)
distances = np.concatenate(distances)
indices = np.concatenate(indices)
all_energy_hist = [] all_energy_hist = []
all_occupied_bins_hist = [] all_occupied_bins_hist = []
if distances.ndim == 1: if distances.ndim == 1:
current_distances = distances[1:][distances[1:] <= bins[-1]] current_distances = distances[1:][distances[1:] <= bins[-1]]
if points_pbc_index is None:
current_indices = indices[1:][distances[1:] <= bins[-1]] current_indices = indices[1:][distances[1:] <= bins[-1]]
else:
current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations)
* T * T
@ -131,8 +193,12 @@ def _calc_energies(
return result return result
for i, maxima_occupation in enumerate(maxima_occupations): for i, maxima_occupation in enumerate(maxima_occupations):
current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]] current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]]
if points_pbc_index is None:
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]] current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
else:
current_indices = points_pbc_index[
indices[i, 1:][distances[i, 1:] <= bins[-1]]
]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation)
* T * T
@ -168,9 +234,12 @@ def distance_resolved_energies(
distance_bins: ArrayLike, distance_bins: ArrayLike,
r_bins: ArrayLike, r_bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
temperature: float, temperature: float,
nodes: int = 8,
) -> pd.DataFrame: ) -> pd.DataFrame:
results = [] results = []
distances = []
for i in range(len(distance_bins) - 1): for i in range(len(distance_bins) - 1):
maxima_indices = np.array( maxima_indices = np.array(
maxima_df.index[ maxima_df.index[
@ -179,11 +248,22 @@ def distance_resolved_energies(
* (maxima_df["maxima"] == True) * (maxima_df["maxima"] == True)
] ]
) )
try:
results.append( results.append(
_calc_energies(maxima_indices, maxima_df, r_bins, box, temperature) _calc_energies(
maxima_indices,
maxima_df,
r_bins,
box,
pore_geometry,
temperature,
nodes,
) )
)
distances.append((distance_bins[i] + distance_bins[i + 1]) / 2)
except ValueError:
pass
distances = (distance_bins[:-1] + distance_bins[1:]) / 2
radii = (r_bins[:-1] + r_bins[1:]) / 2 radii = (r_bins[:-1] + r_bins[1:]) / 2
d = np.array([d for d in distances for r in radii]) d = np.array([d for d in distances for r in radii])
r = np.array([r for d in distances for r in radii]) r = np.array([r for d in distances for r in radii])
@ -192,7 +272,11 @@ def distance_resolved_energies(
def find_energy_maxima( def find_energy_maxima(
energy_df: pd.DataFrame, r_min: float, r_max: float energy_df: pd.DataFrame,
r_min: float,
r_max: float,
r_eval: float = None,
degree: int = 2,
) -> pd.DataFrame: ) -> pd.DataFrame:
distances = [] distances = []
energies = [] energies = []
@ -201,6 +285,9 @@ def find_energy_maxima(
x = np.array(data_d["r"]) x = np.array(data_d["r"])
y = np.array(data_d["energy"]) y = np.array(data_d["energy"])
mask = (x >= r_min) * (x <= r_max) mask = (x >= r_min) * (x <= r_max)
p3 = Poly.fit(x[mask], y[mask], deg=2) p3 = Poly.fit(x[mask], y[mask], deg=degree)
if r_eval is None:
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
else:
energies.append(p3(r_eval))
return pd.DataFrame({"d": distances, "energy": energies}) return pd.DataFrame({"d": distances, "energy": energies})

View File

@ -265,7 +265,7 @@ def LSI(
) )
distributions = np.array( distributions = np.array(
[ [
LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None) LSI_distribution(trajectory[frame_index], bins, selector=None)
for frame_index in frame_indices for frame_index in frame_indices
] ]
) )

View File

@ -152,7 +152,7 @@ def nojump_load_filename(reader: BaseReader):
) )
if os.path.exists(full_path_fallback): if os.path.exists(full_path_fallback):
return full_path_fallback return full_path_fallback
if os.path.exists(fname) or is_writeable(directory): if os.path.exists(full_path) or is_writeable(directory):
return full_path return full_path
else: else:
user_data_dir = os.path.join("/data/", os.environ["HOME"].split("/")[-1]) user_data_dir = os.path.join("/data/", os.environ["HOME"].split("/")[-1])

View File

@ -177,7 +177,7 @@ def coherent_sum(
func: Callable[[ArrayLike, ArrayLike], float], func: Callable[[ArrayLike, ArrayLike], float],
coord_a: ArrayLike, coord_a: ArrayLike,
coord_b: ArrayLike, coord_b: ArrayLike,
) -> float: ) -> NDArray:
""" """
Perform a coherent sum over two arrays :math:`A, B`. Perform a coherent sum over two arrays :math:`A, B`.

57
test/test_correlation.py Normal file
View File

@ -0,0 +1,57 @@
import os
import pytest
import mdevaluate
from mdevaluate import correlation
import numpy as np
@pytest.fixture
def trajectory(request):
return mdevaluate.open(os.path.join(os.path.dirname(__file__), "data/water"))
def test_shifted_correlation(trajectory):
test_array = np.array([100, 82, 65, 49, 39, 29, 20, 13, 7])
OW = trajectory.subset(atom_name="OW")
t, result = correlation.shifted_correlation(
correlation.isf, OW, segments=10, skip=0.1, points=10
)
assert (np.array(result * 100, dtype=int) == test_array).all()
def test_shifted_correlation_no_average(trajectory):
t, result = correlation.shifted_correlation(
correlation.isf, trajectory, segments=10, skip=0.1, points=5, average=False
)
assert result.shape == (10, 5)
def test_shifted_correlation_selector(trajectory):
test_array = np.array([100, 82, 64, 48, 37, 28, 19, 11, 5])
def selector(frame):
index = np.argwhere((frame[:, 0] >= 0) * (frame[:, 0] < 1))
return index.flatten()
OW = trajectory.subset(atom_name="OW")
t, result = correlation.shifted_correlation(
correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
)
assert (np.array(result * 100, dtype=int) == test_array).all()
def test_shifted_correlation_multi_selector(trajectory):
def selector(frame):
indices = []
for i in range(3):
x = frame[:, 0].flatten()
index = np.argwhere((x >= i) * (x < i + 1))
indices.append(index.flatten())
return indices
OW = trajectory.subset(atom_name="OW")
t, result = correlation.shifted_correlation(
correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector
)
assert result.shape == (3, 9)

View File

@ -13,42 +13,24 @@ def trajectory(request):
def test_get_fel(trajectory): def test_get_fel(trajectory):
test_array = np.array( test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.])
[
174.46253634,
174.60905476,
178.57658092,
182.43001192,
180.57916378,
176.49886217,
178.96018547,
181.13561782,
178.31026314,
176.08903996,
180.71215345,
181.59703135,
180.34329368,
187.02474488,
197.99167477,
214.05788031,
245.58571282,
287.52457507,
331.53492965,
]
)
OW = trajectory.subset(atom_name="OW") OW = trajectory.subset(atom_name="OW")
box = trajectory[0].box
box = np.diag(trajectory[0].box) box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05] occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10)
occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000) radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100
maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05) maxima_matrix = fel.find_maxima(
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2) occupation_matrix,
r_bins = np.arange(0, 2, 0.02) box=box_voxels,
distance_bins = np.arange(0.05, 2.05, 0.1) radius=radius_maxima,
pore_geometry="cylindrical",
)
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2)
r_bins = np.arange(0, 0.5, 0.02)
distance_bins = np.arange(1.8, 1.9, 0.01)
energy_df = fel.distance_resolved_energies( energy_df = fel.distance_resolved_energies(
maxima_matrix, distance_bins, r_bins, box, 225 maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225
) )
result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15) result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15)
assert (np.round(np.array(result["energy"])) == np.round(test_array)).all() assert (np.round(np.array(result["energy"])) == np.round(test_array)).all()