Compare commits
	
		
			29 Commits
		
	
	
		
			77771738ab
			...
			mdeval_dev
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| adae920527 | |||
| 11793d7960 | |||
| 55b06fa61d | |||
| 3aa91d7482 | |||
| 7d8c5d849d | |||
| c09549902a | |||
| b7bb8cb379 | |||
| 33c4756e34 | |||
| 7b9f8b6773 | |||
| c89cead81c | |||
| 31eb145a13 | |||
| af3758cbef | |||
| 93d020a4de | |||
| b5395098ce | |||
| 5e80701562 | |||
| 363e420cd8 | |||
| 6b77ef78e1 | |||
| 0c940115af | |||
| b0f29907df | |||
| 37bf496b21 | |||
| befaef2dfa | |||
| 8ea7da5d2f | |||
| b405842452 | |||
| f5cf453d61 | |||
| 4394f70530 | |||
| 298da3818d | |||
| d9278eed83 | |||
| 787882810c | |||
| f527d25864 | 
| @@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta" | ||||
|  | ||||
| [project] | ||||
| name = "mdevaluate" | ||||
| version = "24.01" | ||||
| version = "24.06" | ||||
| dependencies = [ | ||||
|     "mdanalysis", | ||||
|     "pandas", | ||||
|     "dask", | ||||
|     "pathos", | ||||
|     "tables" | ||||
|     "tables", | ||||
|     "pyedr" | ||||
| ] | ||||
|   | ||||
| @@ -3,4 +3,5 @@ pandas | ||||
| dask | ||||
| pathos | ||||
| tables | ||||
| pytest | ||||
| pytest | ||||
| pyedr | ||||
| @@ -218,7 +218,7 @@ class Coordinates: | ||||
|             self.get_frame.clear_cache() | ||||
|  | ||||
|     def __iter__(self): | ||||
|         for i in range(len(self))[self._slice]: | ||||
|         for i in range(len(self.frames))[self._slice]: | ||||
|             yield self[i] | ||||
|  | ||||
|     @singledispatchmethod | ||||
| @@ -232,7 +232,7 @@ class Coordinates: | ||||
|         return sliced | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self.frames) | ||||
|         return len(self.frames[self._slice]) | ||||
|  | ||||
|     def __checksum__(self): | ||||
|         return checksum(self.frames, self.atom_filter, self._slice, self.mode) | ||||
| @@ -692,10 +692,6 @@ def number_of_neighbors( | ||||
|     elif not distinct: | ||||
|         dnn = 1 | ||||
|  | ||||
|     print(atoms[:5]) | ||||
|     print(query_atoms[:5]) | ||||
|     print(" ") | ||||
|  | ||||
|     box = atoms.box | ||||
|     if np.all(np.diag(np.diag(box)) == box): | ||||
|         atoms = atoms % np.diag(box) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ from typing import Callable, Optional | ||||
|  | ||||
| import numpy as np | ||||
| from numpy.typing import ArrayLike | ||||
| from scipy.special import legendre | ||||
| from scipy.special import legendre, jn | ||||
| import dask.array as darray | ||||
| from functools import partial | ||||
| from scipy.spatial import KDTree | ||||
| @@ -13,9 +13,95 @@ from .pbc import pbc_diff, pbc_points | ||||
| from .coordinates import Coordinates, CoordinateFrame, displacements_without_drift | ||||
|  | ||||
|  | ||||
| def log_indices(first: int, last: int, num: int = 100) -> np.ndarray: | ||||
|     ls = np.logspace(0, np.log10(last - first + 1), num=num) | ||||
|     return np.unique(np.int_(ls) - 1 + first) | ||||
| def _is_multi_selector(selection): | ||||
|     if len(selection) == 0: | ||||
|         return False | ||||
|     elif ( | ||||
|         isinstance(selection[0], int) | ||||
|         or isinstance(selection[0], bool) | ||||
|         or isinstance(selection[0], np.integer) | ||||
|         or isinstance(selection[0], np.bool_) | ||||
|     ): | ||||
|         return False | ||||
|     else: | ||||
|         for indices in selection: | ||||
|             if len(indices) == 0: | ||||
|                 continue | ||||
|             elif ( | ||||
|                 isinstance(indices[0], int) | ||||
|                 or isinstance(indices[0], bool) | ||||
|                 or isinstance(indices[0], np.integer) | ||||
|                 or isinstance(indices[0], np.bool_) | ||||
|             ): | ||||
|                 return True | ||||
|             else: | ||||
|                 raise ValueError( | ||||
|                     "selector has more than two dimensions or does not " | ||||
|                     "contain int or bool types" | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| def _calc_correlation( | ||||
|     frames: Coordinates, | ||||
|     start_frame: CoordinateFrame, | ||||
|     function: Callable, | ||||
|     selection: np.ndarray, | ||||
|     shifted_idx: np.ndarray, | ||||
| ) -> np.ndarray: | ||||
|     if len(selection) == 0: | ||||
|         correlation = np.zeros(len(shifted_idx)) | ||||
|     else: | ||||
|         start = start_frame[selection] | ||||
|         correlation = np.array( | ||||
|             [ | ||||
|                 function(start, frames[frame_index][selection]) | ||||
|                 for frame_index in shifted_idx | ||||
|             ] | ||||
|         ) | ||||
|     return correlation | ||||
|  | ||||
|  | ||||
| def _calc_correlation_multi( | ||||
|     frames: Coordinates, | ||||
|     start_frame: CoordinateFrame, | ||||
|     function: Callable, | ||||
|     selection: np.ndarray, | ||||
|     shifted_idx: np.ndarray, | ||||
| ) -> np.ndarray: | ||||
|     correlations = np.zeros((len(selection), len(shifted_idx))) | ||||
|     for i, frame_index in enumerate(shifted_idx): | ||||
|         frame = frames[frame_index] | ||||
|         for j, current_selection in enumerate(selection): | ||||
|             if len(selection) == 0: | ||||
|                 correlations[j, i] = 0 | ||||
|             else: | ||||
|                 correlations[j, i] = function( | ||||
|                     start_frame[current_selection], frame[current_selection] | ||||
|                 ) | ||||
|     return correlations | ||||
|  | ||||
|  | ||||
| def _average_correlation(result): | ||||
|     averaged_result = [] | ||||
|     for n in range(result.shape[1]): | ||||
|         clean_result = [] | ||||
|         for entry in result[:, n]: | ||||
|             if np.all(entry == 0): | ||||
|                 continue | ||||
|             else: | ||||
|                 clean_result.append(entry) | ||||
|         averaged_result.append(np.average(np.array(clean_result), axis=0)) | ||||
|     return np.array(averaged_result) | ||||
|  | ||||
|  | ||||
| def _average_correlation_multi(result): | ||||
|     clean_result = [] | ||||
|     for entry in result: | ||||
|         if np.all(entry == 0): | ||||
|             continue | ||||
|         else: | ||||
|             clean_result.append(entry) | ||||
|     return np.average(np.array(clean_result), axis=0) | ||||
|  | ||||
|  | ||||
| @autosave_data(2) | ||||
| @@ -28,113 +114,82 @@ def shifted_correlation( | ||||
|     window: float = 0.5, | ||||
|     average: bool = True, | ||||
|     points: int = 100, | ||||
| ) -> (np.ndarray, np.ndarray): | ||||
| ) -> tuple[np.ndarray, np.ndarray]: | ||||
|     """Compute a time-dependent correlation function for a given trajectory. | ||||
|  | ||||
|     To improve statistics, multiple (possibly overlapping) windows will be | ||||
|     layed over the whole trajectory and the correlation is computed for them separately. | ||||
|     The start frames of the windows are spaced linearly over the valid region of | ||||
|     the trajectory (skipping frames in the beginning given by skip parameter). | ||||
|  | ||||
|     The points within each window are spaced logarithmically. | ||||
|  | ||||
|     Only a certain subset of the given atoms may be selected for each window | ||||
|     individually using a selector function. | ||||
|  | ||||
|     Note that this function is specifically optimized for multi selectors, which select | ||||
|     multiple selection sets per window, for which the correlation is to be computed | ||||
|     separately. | ||||
|  | ||||
|  | ||||
|     Arguments | ||||
|     --------- | ||||
|     function: | ||||
|         The (correlation) function to evaluate. | ||||
|         Should be of the form (CoordinateFrame, CoordinateFrame) -> float | ||||
|  | ||||
|     frames: | ||||
|         Trajectory to evaluate on | ||||
|  | ||||
|     selector: (optional) | ||||
|         Selection function so select only certain selection sets for each start frame. | ||||
|         Should be of the form | ||||
|             (CoordinateFrame) -> list[A] | ||||
|         where A is something you can index an ndarray with. | ||||
|         For example a list of indices or a bool array. | ||||
|         Must return the same number of selection sets for every frame. | ||||
|  | ||||
|     segments: | ||||
|         Number of start frames | ||||
|  | ||||
|     skip: | ||||
|         Percentage of trajectory to skip from the start | ||||
|  | ||||
|     window: | ||||
|         Length of each segment given as percentage of trajectory | ||||
|  | ||||
|     average: | ||||
|         Whether to return averaged results. | ||||
|         See below for details on the returned ndarray. | ||||
|  | ||||
|     points: | ||||
|         Number of points per segment | ||||
|  | ||||
|  | ||||
|     Returns | ||||
|     ------- | ||||
|     times: ndarray | ||||
|         1d array of time differences to start frame | ||||
|     result: ndarray | ||||
|         2d ndarray of averaged (or non-averaged) correlations. | ||||
|  | ||||
|         When average==True (default) the returned array will be of the shape (S, P) | ||||
|         where S is the number of selection sets and P the number of points per window. | ||||
|         For selection sets that where empty for all start frames all data points will be | ||||
|         zero. | ||||
|  | ||||
|         When average==False the returned array will be of shape (W, S) with | ||||
|         dtype=object. The elements are either ndarrays of shape (P,) containing the | ||||
|         correlation data for the specific window and selection set or None if the | ||||
|         corresponding selection set was empty. | ||||
|         W is the number of segments (windows). | ||||
|         S and P are the same as for average==True. | ||||
|  | ||||
|     """ | ||||
|     Calculate the time series for a correlation function. | ||||
|  | ||||
|     The times at which the correlation is calculated are determined by | ||||
|     a logarithmic distribution. | ||||
|  | ||||
|     Args: | ||||
|         function: The function that should be correlated | ||||
|         frames: The coordinates of the simulation data | ||||
|         selector (opt.): | ||||
|                     A function that returns the indices depending on | ||||
|                     the staring frame for which particles the | ||||
|                     correlation should be calculated. | ||||
|         segments (int, opt.): | ||||
|                     The number of segments the time window will be | ||||
|                     shifted | ||||
|         skip (float, opt.): | ||||
|                     The fraction of the trajectory that will be skipped | ||||
|                     at the beginning, if this is None the start index | ||||
|                     of the frames slice will be used, which defaults | ||||
|                     to 0.1. | ||||
|         window (float, opt.): | ||||
|                     The fraction of the simulation the time series will | ||||
|                     cover | ||||
|         average (bool, opt.): | ||||
|                     If True, returns averaged correlation function | ||||
|         points (int, opt.): | ||||
|                     The number of timeshifts for which the correlation | ||||
|                     should be calculated | ||||
|     Returns: | ||||
|         tuple: | ||||
|             A list of length N that contains the timeshiftes of the frames at which | ||||
|             the time series was calculated and a numpy array of shape (segments, N) | ||||
|             that holds the (non-avaraged) correlation data | ||||
|  | ||||
|     Example: | ||||
|         Calculating the mean square displacement of a coordinate object | ||||
|         named ``coords``: | ||||
|  | ||||
|         >>> time, data = shifted_correlation(msd, coords) | ||||
|     """ | ||||
|  | ||||
|     def get_correlation( | ||||
|         frames: CoordinateFrame, | ||||
|         start_frame: CoordinateFrame, | ||||
|         index: np.ndarray, | ||||
|         shifted_idx: np.ndarray, | ||||
|     ) -> np.ndarray: | ||||
|         if len(index) == 0: | ||||
|             correlation = np.zeros(len(shifted_idx)) | ||||
|         else: | ||||
|             start = frames[start_frame][index] | ||||
|             correlation = np.array( | ||||
|                 [function(start, frames[frame][index]) for frame in shifted_idx] | ||||
|             ) | ||||
|         return correlation | ||||
|  | ||||
|     def apply_selector( | ||||
|         start_frame: CoordinateFrame, | ||||
|         frames: CoordinateFrame, | ||||
|         idx: np.ndarray, | ||||
|         selector: Optional[Callable] = None, | ||||
|     ): | ||||
|         shifted_idx = idx + start_frame | ||||
|  | ||||
|         if selector is None: | ||||
|             index = np.arange(len(frames[start_frame])) | ||||
|             return get_correlation(frames, start_frame, index, shifted_idx) | ||||
|         else: | ||||
|             index = selector(frames[start_frame]) | ||||
|             if len(index) == 0: | ||||
|                 return np.zeros(len(shifted_idx)) | ||||
|  | ||||
|             elif ( | ||||
|                 isinstance(index[0], int) | ||||
|                 or isinstance(index[0], bool) | ||||
|                 or isinstance(index[0], np.integer) | ||||
|                 or isinstance(index[0], np.bool_) | ||||
|             ): | ||||
|                 return get_correlation(frames, start_frame, index, shifted_idx) | ||||
|             else: | ||||
|                 correlations = [] | ||||
|                 for ind in index: | ||||
|                     if len(ind) == 0: | ||||
|                         correlations.append(np.zeros(len(shifted_idx))) | ||||
|  | ||||
|                     elif ( | ||||
|                         isinstance(ind[0], int) | ||||
|                         or isinstance(ind[0], bool) | ||||
|                         or isinstance(ind[0], np.integer) | ||||
|                         or isinstance(ind[0], np.bool_) | ||||
|                     ): | ||||
|                         correlations.append( | ||||
|                             get_correlation(frames, start_frame, ind, shifted_idx) | ||||
|                         ) | ||||
|                     else: | ||||
|                         raise ValueError( | ||||
|                             "selector has more than two dimensions or does not " | ||||
|                             "contain int or bool types" | ||||
|                         ) | ||||
|                 return correlations | ||||
|  | ||||
|     if 1 - skip < window: | ||||
|         window = 1 - skip | ||||
|  | ||||
|     start_frames = np.unique( | ||||
|     start_frame_indices = np.unique( | ||||
|         np.linspace( | ||||
|             len(frames) * skip, | ||||
|             len(frames) * (1 - window), | ||||
| @@ -144,28 +199,44 @@ def shifted_correlation( | ||||
|         ) | ||||
|     ) | ||||
|  | ||||
|     num_frames = int(len(frames) * window) | ||||
|     ls = np.logspace(0, np.log10(num_frames + 1), num=points) | ||||
|     idx = np.unique(np.int_(ls) - 1) | ||||
|     t = np.array([frames[i].time for i in idx]) - frames[0].time | ||||
|  | ||||
|     result = np.array( | ||||
|         [ | ||||
|             apply_selector(start_frame, frames=frames, idx=idx, selector=selector) | ||||
|             for start_frame in start_frames | ||||
|         ] | ||||
|     num_frames_per_window = int(len(frames) * window) | ||||
|     logspaced_indices = np.logspace(0, np.log10(num_frames_per_window + 1), num=points) | ||||
|     logspaced_indices = np.unique(np.int_(logspaced_indices) - 1) | ||||
|     logspaced_time = ( | ||||
|         np.array([frames[i].time for i in logspaced_indices]) - frames[0].time | ||||
|     ) | ||||
|  | ||||
|     if selector is None: | ||||
|         multi_selector = False | ||||
|     else: | ||||
|         selection = selector(frames[0]) | ||||
|         multi_selector = _is_multi_selector(selection) | ||||
|  | ||||
|     result = [] | ||||
|     for start_frame_index in start_frame_indices: | ||||
|         shifted_idx = logspaced_indices + start_frame_index | ||||
|         start_frame = frames[start_frame_index] | ||||
|         if selector is None: | ||||
|             selection = np.arange(len(start_frame)) | ||||
|         else: | ||||
|             selection = selector(start_frame) | ||||
|         if multi_selector: | ||||
|             result_segment = _calc_correlation_multi( | ||||
|                 frames, start_frame, function, selection, shifted_idx | ||||
|             ) | ||||
|         else: | ||||
|             result_segment = _calc_correlation( | ||||
|                 frames, start_frame, function, selection, shifted_idx | ||||
|             ) | ||||
|         result.append(result_segment) | ||||
|     result = np.array(result) | ||||
|  | ||||
|     if average: | ||||
|         clean_result = [] | ||||
|         for entry in result: | ||||
|             if np.all(entry == 0): | ||||
|                 continue | ||||
|             else: | ||||
|                 clean_result.append(entry) | ||||
|         result = np.array(clean_result) | ||||
|         result = np.average(result, axis=0) | ||||
|     return t, result | ||||
|         if multi_selector: | ||||
|             result = _average_correlation_multi(result) | ||||
|         else: | ||||
|             result = _average_correlation(result) | ||||
|     return logspaced_time, result | ||||
|  | ||||
|  | ||||
| def msd( | ||||
| @@ -183,6 +254,12 @@ def msd( | ||||
|         displacements = displacements_without_drift(start_frame, end_frame, trajectory) | ||||
|     if axis == "all": | ||||
|         return (displacements**2).sum(axis=1).mean() | ||||
|     elif axis == "xy" or axis == "yx": | ||||
|         return (displacements[:, [0, 1]] ** 2).sum(axis=1).mean() | ||||
|     elif axis == "xz" or axis == "zx": | ||||
|         return (displacements[:, [0, 2]] ** 2).sum(axis=1).mean() | ||||
|     elif axis == "yz" or axis == "zy": | ||||
|         return (displacements[:, [1, 2]] ** 2).sum(axis=1).mean() | ||||
|     elif axis == "x": | ||||
|         return (displacements[:, 0] ** 2).mean() | ||||
|     elif axis == "y": | ||||
| @@ -211,6 +288,15 @@ def isf( | ||||
|     if axis == "all": | ||||
|         distance = (displacements**2).sum(axis=1) ** 0.5 | ||||
|         return np.sinc(distance * q / np.pi).mean() | ||||
|     elif axis == "xy" or axis == "yx": | ||||
|         distance = (displacements[:, [0, 1]] ** 2).sum(axis=1) ** 0.5 | ||||
|         return np.real(jn(0, distance * q)).mean() | ||||
|     elif axis == "xz" or axis == "zx": | ||||
|         distance = (displacements[:, [0, 2]] ** 2).sum(axis=1) ** 0.5 | ||||
|         return np.real(jn(0, distance * q)).mean() | ||||
|     elif axis == "yz" or axis == "zy": | ||||
|         distance = (displacements[:, [1, 2]] ** 2).sum(axis=1) ** 0.5 | ||||
|         return np.real(jn(0, distance * q)).mean() | ||||
|     elif axis == "x": | ||||
|         distance = np.abs(displacements[:, 0]) | ||||
|         return np.mean(np.cos(np.abs(q * distance))) | ||||
| @@ -262,6 +348,12 @@ def van_hove_self( | ||||
|         vectors = displacements_without_drift(start_frame, end_frame, trajectory) | ||||
|     if axis == "all": | ||||
|         delta_r = (vectors**2).sum(axis=1) ** 0.5 | ||||
|     elif axis == "xy" or axis == "yx": | ||||
|         delta_r = (vectors[:, [0, 1]] ** 2).sum(axis=1) ** 0.5 | ||||
|     elif axis == "xz" or axis == "zx": | ||||
|         delta_r = (vectors[:, [0, 2]] ** 2).sum(axis=1) ** 0.5 | ||||
|     elif axis == "yz" or axis == "zy": | ||||
|         delta_r = (vectors[:, [1, 2]] ** 2).sum(axis=1) ** 0.5 | ||||
|     elif axis == "x": | ||||
|         delta_r = np.abs(vectors[:, 0]) | ||||
|     elif axis == "y": | ||||
| @@ -423,6 +515,15 @@ def non_gaussian_parameter( | ||||
|     if axis == "all": | ||||
|         r = (vectors**2).sum(axis=1) | ||||
|         dimensions = 3 | ||||
|     elif axis == "xy" or axis == "yx": | ||||
|         r = (vectors[:, [0, 1]] ** 2).sum(axis=1) | ||||
|         dimensions = 2 | ||||
|     elif axis == "xz" or axis == "zx": | ||||
|         r = (vectors[:, [0, 2]] ** 2).sum(axis=1) | ||||
|         dimensions = 2 | ||||
|     elif axis == "yz" or axis == "zy": | ||||
|         r = (vectors[:, [1, 2]] ** 2).sum(axis=1) | ||||
|         dimensions = 2 | ||||
|     elif axis == "x": | ||||
|         r = vectors[:, 0] ** 2 | ||||
|         dimensions = 1 | ||||
|   | ||||
| @@ -126,9 +126,6 @@ def rdf( | ||||
|     particles_in_volume = int( | ||||
|         np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1 | ||||
|     ) | ||||
|     print(atoms_a[:5]) | ||||
|     print(atoms_b[:5]) | ||||
|     print(" ") | ||||
|     distances, indices = next_neighbors( | ||||
|         atoms_a, | ||||
|         atoms_b, | ||||
| @@ -309,7 +306,7 @@ def next_neighbor_distribution( | ||||
|     )[1] | ||||
|     resname_nn = reference.residue_names[nn] | ||||
|     count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1) | ||||
|     return np.histogram(count_nn, bins=bins, normed=normed)[0] | ||||
|     return np.histogram(count_nn, bins=bins, density=normed)[0] | ||||
|  | ||||
|  | ||||
| def hbonds( | ||||
|   | ||||
| @@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates | ||||
| from mdevaluate.pbc import pbc_points | ||||
|  | ||||
|  | ||||
| def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]: | ||||
| def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]: | ||||
|     tree = KDTree(atoms) | ||||
|  | ||||
|     dist, indices = tree.query(atoms, N + 1) | ||||
| @@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray: | ||||
|  | ||||
|  | ||||
| def selector_ice( | ||||
|     start_frame: CoordinateFrame, | ||||
|     traj: Coordinates, | ||||
|     oxygen_atoms_water: CoordinateFrame, | ||||
|     chosen_ice_types: ArrayLike, | ||||
|     combined: bool = True, | ||||
|     next_neighbor_distance: float = 0.35, | ||||
| ) -> NDArray: | ||||
|     oxygen = traj.subset(atom_name="OW") | ||||
|     atoms = oxygen[start_frame.step] | ||||
|     atoms = atoms % np.diag(atoms.box) | ||||
|     atoms_PBC = pbc_points(atoms, thickness=1) | ||||
|     aij, indices = a_ij(atoms_PBC) | ||||
|     atoms = oxygen_atoms_water | ||||
|     atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2) | ||||
|     aij, indices = calc_aij(atoms_PBC) | ||||
|     tree = KDTree(atoms_PBC) | ||||
|     neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) | ||||
|     neighbors = tree.query_ball_point( | ||||
|         atoms_PBC, next_neighbor_distance, return_length=True | ||||
|     ) - 1 | ||||
|     index_SOL = atoms_PBC.tolist().index(atoms[0].tolist()) | ||||
|     index_SOL = np.arange(index_SOL, index_SOL + len(atoms)) | ||||
|     ice_Types = classify_ice(aij, indices, neighbors, index_SOL) | ||||
| @@ -117,9 +117,9 @@ def selector_ice( | ||||
| def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame: | ||||
|     def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray: | ||||
|         atoms_PBC = pbc_points(frame, thickness=1) | ||||
|         aij, indices = a_ij(atoms_PBC) | ||||
|         aij, indices = calc_aij(atoms_PBC) | ||||
|         tree = KDTree(atoms_PBC) | ||||
|         neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) | ||||
|         neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1 | ||||
|         index = selector(frame, atoms_PBC) | ||||
|         ice_types_data = classify_ice(aij, indices, neighbors, index) | ||||
|         ice_parts_data = count_ice_types(ice_types_data) | ||||
| @@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame: | ||||
| def ice_clusters_traj( | ||||
|     traj: Coordinates, segments: int = 10000, skip: float = 0.1 | ||||
| ) -> list: | ||||
|     def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]: | ||||
|         selection = selector_ice(frame, traj, [0, 1, 2]) | ||||
|     def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]: | ||||
|         selection = selector_ice(frame, [0, 1, 2]) | ||||
|         if len(selection) == 0: | ||||
|             return frame.time, [] | ||||
|         else: | ||||
| @@ -194,6 +194,6 @@ def ice_clusters_traj( | ||||
|         np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments)) | ||||
|     ) | ||||
|     all_clusters = [ | ||||
|         ice_clusters(traj[frame_index], traj) for frame_index in frame_indices | ||||
|         ice_clusters(traj[frame_index]) for frame_index in frame_indices | ||||
|     ] | ||||
|     return all_clusters | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| from functools import partial | ||||
| from typing import Optional | ||||
|  | ||||
| import numpy as np | ||||
| from numpy.typing import ArrayLike, NDArray | ||||
| from numpy.polynomial.polynomial import Polynomial as Poly | ||||
| import math | ||||
| from scipy.spatial import KDTree | ||||
| import pandas as pd | ||||
| import multiprocessing as mp | ||||
| @@ -11,6 +11,56 @@ import multiprocessing as mp | ||||
| from ..coordinates import Coordinates | ||||
|  | ||||
|  | ||||
| def _pbc_points_reduced( | ||||
|     coordinates: ArrayLike, | ||||
|     pore_geometry: str, | ||||
|     box: Optional[NDArray] = None, | ||||
|     thickness: Optional[float] = None, | ||||
| ) -> tuple[NDArray, NDArray]: | ||||
|     if box is None: | ||||
|         box = coordinates.box | ||||
|     if pore_geometry == "cylindrical": | ||||
|         grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]]) | ||||
|         indices = np.tile(np.arange(len(coordinates)), 3) | ||||
|     elif pore_geometry == "slit": | ||||
|         grid = np.array( | ||||
|             [[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]] | ||||
|         ) | ||||
|         indices = np.tile(np.arange(len(coordinates)), 9) | ||||
|     else: | ||||
|         raise ValueError( | ||||
|             f"pore_geometry is {pore_geometry}, should either be " | ||||
|             f"'cylindrical' or 'slit'" | ||||
|         ) | ||||
|     coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0) | ||||
|     size = np.diag(box) | ||||
|  | ||||
|     if thickness is not None: | ||||
|         mask = np.all(coordinates_pbc > -thickness, axis=1) | ||||
|         coordinates_pbc = coordinates_pbc[mask] | ||||
|         indices = indices[mask] | ||||
|         mask = np.all(coordinates_pbc < size + thickness, axis=1) | ||||
|         coordinates_pbc = coordinates_pbc[mask] | ||||
|         indices = indices[mask] | ||||
|  | ||||
|     return coordinates_pbc, indices | ||||
|  | ||||
|  | ||||
| def _build_tree(points, box, r_max, pore_geometry): | ||||
|     if np.all(np.diag(np.diag(box)) == box): | ||||
|         tree = KDTree(points % box, boxsize=box) | ||||
|         points_pbc_index = None | ||||
|     else: | ||||
|         points_pbc, points_pbc_index = _pbc_points_reduced( | ||||
|             points, | ||||
|             pore_geometry, | ||||
|             box, | ||||
|             thickness=r_max + 0.01, | ||||
|         ) | ||||
|         tree = KDTree(points_pbc) | ||||
|     return tree, points_pbc_index | ||||
|  | ||||
|  | ||||
| def occupation_matrix( | ||||
|     trajectory: Coordinates, | ||||
|     edge_length: float = 0.05, | ||||
| @@ -28,11 +78,7 @@ def occupation_matrix( | ||||
|     z_bins = np.arange(0, box[2][2] + edge_length, edge_length) | ||||
|     bins = [x_bins, y_bins, z_bins] | ||||
|     # Trajectory is split for parallel computing | ||||
|     size = math.ceil(len(frame_indices) / nodes) | ||||
|     indices = [ | ||||
|         np.arange(len(frame_indices))[i : i + size] | ||||
|         for i in range(0, len(frame_indices), size) | ||||
|     ] | ||||
|     indices = np.array_split(frame_indices, nodes) | ||||
|     pool = mp.Pool(nodes) | ||||
|     results = pool.map( | ||||
|         partial(_calc_histogram, trajectory=trajectory, bins=bins), indices | ||||
| @@ -72,19 +118,19 @@ def _calc_histogram( | ||||
|  | ||||
|  | ||||
| def find_maxima( | ||||
|     occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05 | ||||
|     occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str | ||||
| ) -> pd.DataFrame: | ||||
|     maxima_df = occupation_df.copy() | ||||
|     maxima_df["maxima"] = None | ||||
|     points = np.array(maxima_df[["x", "y", "z"]]) | ||||
|     tree = KDTree(points, boxsize=box) | ||||
|     all_neighbors = tree.query_ball_point( | ||||
|         points, edge_length * 3 ** (1 / 2) + edge_length / 100 | ||||
|     ) | ||||
|     tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry) | ||||
|     for i in range(len(maxima_df)): | ||||
|         if maxima_df.loc[i, "maxima"] is not None: | ||||
|             continue | ||||
|         neighbors = np.array(all_neighbors[i]) | ||||
|         maxima_pos = maxima_df.loc[i, ["x", "y", "z"]] | ||||
|         neighbors = np.array(tree.query_ball_point(maxima_pos, radius)) | ||||
|         if points_pbc_index is not None: | ||||
|             neighbors = points_pbc_index[neighbors] | ||||
|         neighbors = neighbors[neighbors != i] | ||||
|         if len(neighbors) == 0: | ||||
|             maxima_df.loc[i, "maxima"] = True | ||||
| @@ -104,23 +150,39 @@ def _calc_energies( | ||||
|     maxima_df: pd.DataFrame, | ||||
|     bins: ArrayLike, | ||||
|     box: NDArray, | ||||
|     pore_geometry: str, | ||||
|     T: float, | ||||
|     nodes: int = 8, | ||||
| ) -> NDArray: | ||||
|     points = np.array(maxima_df[["x", "y", "z"]]) | ||||
|     tree = KDTree(points, boxsize=box) | ||||
|     tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry) | ||||
|     maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]] | ||||
|     maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"]) | ||||
|     num_of_neighbors = np.max( | ||||
|         tree.query_ball_point(maxima, bins[-1], return_length=True) | ||||
|     ) | ||||
|     distances, indices = tree.query( | ||||
|         maxima, k=num_of_neighbors, distance_upper_bound=bins[-1] | ||||
|     ) | ||||
|     split_maxima = [] | ||||
|     for i in range(0, len(maxima), 1000): | ||||
|         split_maxima.append(maxima[i : i + 1000]) | ||||
|  | ||||
|     distances = [] | ||||
|     indices = [] | ||||
|     for maxima in split_maxima: | ||||
|         distances_step, indices_step = tree.query( | ||||
|             maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes | ||||
|         ) | ||||
|         distances.append(distances_step) | ||||
|         indices.append(indices_step) | ||||
|     distances = np.concatenate(distances) | ||||
|     indices = np.concatenate(indices) | ||||
|     all_energy_hist = [] | ||||
|     all_occupied_bins_hist = [] | ||||
|     if distances.ndim == 1: | ||||
|         current_distances = distances[1:][distances[1:] <= bins[-1]] | ||||
|         current_indices = indices[1:][distances[1:] <= bins[-1]] | ||||
|         if points_pbc_index is None: | ||||
|             current_indices = indices[1:][distances[1:] <= bins[-1]] | ||||
|         else: | ||||
|             current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]] | ||||
|         energy = ( | ||||
|             -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations) | ||||
|             * T | ||||
| @@ -131,8 +193,12 @@ def _calc_energies( | ||||
|         return result | ||||
|     for i, maxima_occupation in enumerate(maxima_occupations): | ||||
|         current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]] | ||||
|         current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]] | ||||
|  | ||||
|         if points_pbc_index is None: | ||||
|             current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]] | ||||
|         else: | ||||
|             current_indices = points_pbc_index[ | ||||
|                 indices[i, 1:][distances[i, 1:] <= bins[-1]] | ||||
|             ] | ||||
|         energy = ( | ||||
|             -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation) | ||||
|             * T | ||||
| @@ -168,9 +234,12 @@ def distance_resolved_energies( | ||||
|     distance_bins: ArrayLike, | ||||
|     r_bins: ArrayLike, | ||||
|     box: NDArray, | ||||
|     pore_geometry: str, | ||||
|     temperature: float, | ||||
|     nodes: int = 8, | ||||
| ) -> pd.DataFrame: | ||||
|     results = [] | ||||
|     distances = [] | ||||
|     for i in range(len(distance_bins) - 1): | ||||
|         maxima_indices = np.array( | ||||
|             maxima_df.index[ | ||||
| @@ -179,11 +248,22 @@ def distance_resolved_energies( | ||||
|                 * (maxima_df["maxima"] == True) | ||||
|             ] | ||||
|         ) | ||||
|         results.append( | ||||
|             _calc_energies(maxima_indices, maxima_df, r_bins, box, temperature) | ||||
|         ) | ||||
|         try: | ||||
|             results.append( | ||||
|                 _calc_energies( | ||||
|                     maxima_indices, | ||||
|                     maxima_df, | ||||
|                     r_bins, | ||||
|                     box, | ||||
|                     pore_geometry, | ||||
|                     temperature, | ||||
|                     nodes, | ||||
|                 ) | ||||
|             ) | ||||
|             distances.append((distance_bins[i] + distance_bins[i + 1]) / 2) | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
|     distances = (distance_bins[:-1] + distance_bins[1:]) / 2 | ||||
|     radii = (r_bins[:-1] + r_bins[1:]) / 2 | ||||
|     d = np.array([d for d in distances for r in radii]) | ||||
|     r = np.array([r for d in distances for r in radii]) | ||||
| @@ -192,7 +272,11 @@ def distance_resolved_energies( | ||||
|  | ||||
|  | ||||
| def find_energy_maxima( | ||||
|     energy_df: pd.DataFrame, r_min: float, r_max: float | ||||
|     energy_df: pd.DataFrame, | ||||
|     r_min: float, | ||||
|     r_max: float, | ||||
|     r_eval: float = None, | ||||
|     degree: int = 2, | ||||
| ) -> pd.DataFrame: | ||||
|     distances = [] | ||||
|     energies = [] | ||||
| @@ -201,6 +285,9 @@ def find_energy_maxima( | ||||
|         x = np.array(data_d["r"]) | ||||
|         y = np.array(data_d["energy"]) | ||||
|         mask = (x >= r_min) * (x <= r_max) | ||||
|         p3 = Poly.fit(x[mask], y[mask], deg=2) | ||||
|         energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) | ||||
|         p3 = Poly.fit(x[mask], y[mask], deg=degree) | ||||
|         if r_eval is None: | ||||
|             energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) | ||||
|         else: | ||||
|             energies.append(p3(r_eval)) | ||||
|     return pd.DataFrame({"d": distances, "energy": energies}) | ||||
|   | ||||
| @@ -265,7 +265,7 @@ def LSI( | ||||
|     ) | ||||
|     distributions = np.array( | ||||
|         [ | ||||
|             LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None) | ||||
|             LSI_distribution(trajectory[frame_index], bins, selector=None) | ||||
|             for frame_index in frame_indices | ||||
|         ] | ||||
|     ) | ||||
|   | ||||
| @@ -152,7 +152,7 @@ def nojump_load_filename(reader: BaseReader): | ||||
|         ) | ||||
|         if os.path.exists(full_path_fallback): | ||||
|             return full_path_fallback | ||||
|     if os.path.exists(fname) or is_writeable(directory): | ||||
|     if os.path.exists(full_path) or is_writeable(directory): | ||||
|         return full_path | ||||
|     else: | ||||
|         user_data_dir = os.path.join("/data/", os.environ["HOME"].split("/")[-1]) | ||||
|   | ||||
| @@ -177,7 +177,7 @@ def coherent_sum( | ||||
|     func: Callable[[ArrayLike, ArrayLike], float], | ||||
|     coord_a: ArrayLike, | ||||
|     coord_b: ArrayLike, | ||||
| ) -> float: | ||||
| ) -> NDArray: | ||||
|     """ | ||||
|     Perform a coherent sum over two arrays :math:`A, B`. | ||||
|  | ||||
|   | ||||
							
								
								
									
										57
									
								
								test/test_correlation.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								test/test_correlation.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| import os | ||||
| import pytest | ||||
|  | ||||
| import mdevaluate | ||||
| from mdevaluate import correlation | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def trajectory(request): | ||||
|     return mdevaluate.open(os.path.join(os.path.dirname(__file__), "data/water")) | ||||
|  | ||||
|  | ||||
| def test_shifted_correlation(trajectory): | ||||
|     test_array = np.array([100, 82, 65, 49, 39, 29, 20, 13, 7]) | ||||
|     OW = trajectory.subset(atom_name="OW") | ||||
|     t, result = correlation.shifted_correlation( | ||||
|         correlation.isf, OW, segments=10, skip=0.1, points=10 | ||||
|     ) | ||||
|     assert (np.array(result * 100, dtype=int) == test_array).all() | ||||
|  | ||||
|  | ||||
| def test_shifted_correlation_no_average(trajectory): | ||||
|     t, result = correlation.shifted_correlation( | ||||
|         correlation.isf, trajectory, segments=10, skip=0.1, points=5, average=False | ||||
|     ) | ||||
|     assert result.shape == (10, 5) | ||||
|  | ||||
|  | ||||
| def test_shifted_correlation_selector(trajectory): | ||||
|     test_array = np.array([100, 82, 64, 48, 37, 28, 19, 11, 5]) | ||||
|  | ||||
|     def selector(frame): | ||||
|         index = np.argwhere((frame[:, 0] >= 0) * (frame[:, 0] < 1)) | ||||
|         return index.flatten() | ||||
|  | ||||
|     OW = trajectory.subset(atom_name="OW") | ||||
|     t, result = correlation.shifted_correlation( | ||||
|         correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector | ||||
|     ) | ||||
|     assert (np.array(result * 100, dtype=int) == test_array).all() | ||||
|  | ||||
|  | ||||
| def test_shifted_correlation_multi_selector(trajectory): | ||||
|     def selector(frame): | ||||
|         indices = [] | ||||
|         for i in range(3): | ||||
|             x = frame[:, 0].flatten() | ||||
|             index = np.argwhere((x >= i) * (x < i + 1)) | ||||
|             indices.append(index.flatten()) | ||||
|         return indices | ||||
|  | ||||
|     OW = trajectory.subset(atom_name="OW") | ||||
|     t, result = correlation.shifted_correlation( | ||||
|         correlation.isf, OW, segments=10, skip=0.1, points=10, selector=selector | ||||
|     ) | ||||
|     assert result.shape == (3, 9) | ||||
| @@ -13,42 +13,24 @@ def trajectory(request): | ||||
|  | ||||
|  | ||||
| def test_get_fel(trajectory): | ||||
|     test_array = np.array( | ||||
|         [ | ||||
|             174.46253634, | ||||
|             174.60905476, | ||||
|             178.57658092, | ||||
|             182.43001192, | ||||
|             180.57916378, | ||||
|             176.49886217, | ||||
|             178.96018547, | ||||
|             181.13561782, | ||||
|             178.31026314, | ||||
|             176.08903996, | ||||
|             180.71215345, | ||||
|             181.59703135, | ||||
|             180.34329368, | ||||
|             187.02474488, | ||||
|             197.99167477, | ||||
|             214.05788031, | ||||
|             245.58571282, | ||||
|             287.52457507, | ||||
|             331.53492965, | ||||
|         ] | ||||
|     ) | ||||
|     test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.]) | ||||
|  | ||||
|     OW = trajectory.subset(atom_name="OW") | ||||
|  | ||||
|     box = np.diag(trajectory[0].box) | ||||
|     box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05] | ||||
|     occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000) | ||||
|     maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05) | ||||
|     maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2) | ||||
|     r_bins = np.arange(0, 2, 0.02) | ||||
|     distance_bins = np.arange(0.05, 2.05, 0.1) | ||||
|     box = trajectory[0].box | ||||
|     box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05] | ||||
|     occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10) | ||||
|     radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100 | ||||
|     maxima_matrix = fel.find_maxima( | ||||
|         occupation_matrix, | ||||
|         box=box_voxels, | ||||
|         radius=radius_maxima, | ||||
|         pore_geometry="cylindrical", | ||||
|     ) | ||||
|     maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2) | ||||
|     r_bins = np.arange(0, 0.5, 0.02) | ||||
|     distance_bins = np.arange(1.8, 1.9, 0.01) | ||||
|     energy_df = fel.distance_resolved_energies( | ||||
|         maxima_matrix, distance_bins, r_bins, box, 225 | ||||
|         maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225 | ||||
|     ) | ||||
|     result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15) | ||||
|  | ||||
|     assert (np.round(np.array(result["energy"])) == np.round(test_array)).all() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user