Files
mdevaluate/src/mdevaluate/distribution.py

441 lines
15 KiB
Python

from typing import Callable, Optional, Union, Tuple, List
import numpy as np
from numpy.typing import ArrayLike, NDArray
from scipy import spatial
from scipy.spatial import KDTree
from scipy.sparse.csgraph import connected_components
from .coordinates import (
rotate_axis,
polar_coordinates,
Coordinates,
CoordinateFrame,
next_neighbors,
number_of_neighbors,
)
from .autosave import autosave_data
from .pbc import pbc_diff, pbc_points
@autosave_data(nargs=2, kwargs_keys=("coordinates_b",))
def time_average(
function: Callable,
coordinates: Coordinates,
coordinates_b: Optional[Coordinates] = None,
skip: float = 0.1,
segments: int = 100,
) -> NDArray:
"""
Compute the time average of a function.
Args:
function:
The function that will be averaged, it has to accept exactly one argument
which is the current atom set (or two if coordinates_b is provided)
coordinates: The coordinates object of the simulation
coordinates_b: Additional coordinates object of the simulation
skip:
segments:
"""
frame_indices = np.unique(
np.int_(
np.linspace(len(coordinates) * skip, len(coordinates) - 1, num=segments)
)
)
if coordinates_b is None:
result = [function(coordinates[frame_index]) for frame_index in frame_indices]
else:
result = [
function(coordinates[frame_index], coordinates_b[frame_index])
for frame_index in frame_indices
]
return np.mean(result, axis=0)
@autosave_data(nargs=2, kwargs_keys=("coordinates_b",))
def time_distribution(
function: Callable,
coordinates: Coordinates,
coordinates_b: Optional[Coordinates] = None,
skip: float = 0,
segments: int = 100,
) -> Tuple[NDArray, List]:
"""
Compute the time distribution of a function.
Args:
function:
The function that will be averaged, it has to accept exactly one argument
which is the current atom set (or two if coordinates_b is provided)
coordinates: The coordinates object of the simulation
coordinates_b: Additional coordinates object of the simulation
skip:
segments:
"""
frame_indices = np.unique(
np.int_(
np.linspace(len(coordinates) * skip, len(coordinates) - 1, num=segments)
)
)
times = np.array([coordinates[frame_index].time for frame_index in frame_indices])
if coordinates_b is None:
result = [function(coordinates[frame_index]) for frame_index in frame_indices]
else:
result = [
function(coordinates[frame_index], coordinates_b[frame_index])
for frame_index in frame_indices
]
return times, result
def rdf(
atoms_a: CoordinateFrame,
atoms_b: Optional[CoordinateFrame] = None,
bins: Optional[ArrayLike] = None,
remove_intra: bool = False,
**kwargs
) -> NDArray:
r"""
Compute the radial pair distribution of one or two sets of atoms.
.. math::
g_{AB}(r) = \frac{1}{\langle \rho_B\rangle N_A}\sum\limits_{i\in A}^{N_A}
\sum\limits_{j\in B}^{N_B}\frac{\delta(r_{ij} -r)}{4\pi r^2}
For use with :func:`time_average`, define bins through the use of
:func:`~functools.partial`, the atom sets are passed to :func:`time_average`, if a
second set of atoms should be used specify it as ``coordinates_b`` and it will be
passed to this function.
Args:
atoms_a: First set of atoms, used internally
atoms_b (opt.): Second set of atoms, used internal
bins: Bins of the radial distribution function
remove_intra: removes contributions from intra molecular pairs
"""
distinct = True
if atoms_b is None:
atoms_b = atoms_a
distinct = False
elif np.array_equal(atoms_a, atoms_b):
distinct = False
if bins is None:
bins = np.arange(0, 1, 0.01)
particles_in_volume = int(
np.max(number_of_neighbors(atoms_a, query_atoms=atoms_b, r_max=bins[-1])) * 1.1
)
distances, indices = next_neighbors(
atoms_a,
atoms_b,
number_of_neighbors=particles_in_volume,
distance_upper_bound=bins[-1],
distinct=distinct,
**kwargs
)
if remove_intra:
new_distances = []
for entry in list(zip(atoms_a.residue_ids, distances, indices)):
mask = entry[1] < np.inf
new_distances.append(
entry[1][mask][atoms_b.residue_ids[entry[2][mask]] != entry[0]]
)
distances = np.concatenate(new_distances)
else:
distances = [d for dist in distances for d in dist]
hist, bins = np.histogram(distances, bins=bins, range=(0, bins[-1]), density=False)
hist = hist / len(atoms_b)
hist = hist / (4 / 3 * np.pi * bins[1:] ** 3 - 4 / 3 * np.pi * bins[:-1] ** 3)
n = len(atoms_a) / np.prod(np.diag(atoms_a.box))
hist = hist / n
return hist
def distance_distribution(
atoms: CoordinateFrame, bins: Union[int, ArrayLike]
) -> NDArray:
connection_vectors = atoms[:-1, :] - atoms[1:, :]
connection_lengths = (connection_vectors**2).sum(axis=1) ** 0.5
return np.histogram(connection_lengths, bins)[0]
def tetrahedral_order(
atoms: CoordinateFrame, reference_atoms: CoordinateFrame = None
) -> NDArray:
if reference_atoms is None:
reference_atoms = atoms
indices = next_neighbors(
reference_atoms,
query_atoms=atoms,
number_of_neighbors=4,
)[1]
neighbors = reference_atoms[indices]
neighbors_1, neighbors_2, neighbors_3, neighbors_4 = (
neighbors[:, 0, :],
neighbors[:, 1, :],
neighbors[:, 2, :],
neighbors[:, 3, :],
)
# Connection vectors
neighbors_1 = pbc_diff(neighbors_1, atoms, box=atoms.box)
neighbors_2 = pbc_diff(neighbors_2, atoms, box=atoms.box)
neighbors_3 = pbc_diff(neighbors_3, atoms, box=atoms.box)
neighbors_4 = pbc_diff(neighbors_4, atoms, box=atoms.box)
# Normed Connection vectors
neighbors_1 /= np.linalg.norm(neighbors_1, axis=-1).reshape(-1, 1)
neighbors_2 /= np.linalg.norm(neighbors_2, axis=-1).reshape(-1, 1)
neighbors_3 /= np.linalg.norm(neighbors_3, axis=-1).reshape(-1, 1)
neighbors_4 /= np.linalg.norm(neighbors_4, axis=-1).reshape(-1, 1)
a_1_2 = ((neighbors_1 * neighbors_2).sum(axis=1) + 1 / 3) ** 2
a_1_3 = ((neighbors_1 * neighbors_3).sum(axis=1) + 1 / 3) ** 2
a_1_4 = ((neighbors_1 * neighbors_4).sum(axis=1) + 1 / 3) ** 2
a_2_3 = ((neighbors_2 * neighbors_3).sum(axis=1) + 1 / 3) ** 2
a_2_4 = ((neighbors_2 * neighbors_4).sum(axis=1) + 1 / 3) ** 2
a_3_4 = ((neighbors_3 * neighbors_4).sum(axis=1) + 1 / 3) ** 2
q = 1 - 3 / 8 * (a_1_2 + a_1_3 + a_1_4 + a_2_3 + a_2_4 + a_3_4)
return q
def tetrahedral_order_distribution(
atoms: CoordinateFrame,
reference_atoms: Optional[CoordinateFrame] = None,
bins: Optional[ArrayLike] = None,
) -> NDArray:
assert bins is not None, "Bin edges of the distribution have to be specified."
Q = tetrahedral_order(atoms, reference_atoms=reference_atoms)
return np.histogram(Q, bins=bins)[0]
def radial_density(
atoms: CoordinateFrame,
bins: Optional[ArrayLike] = None,
symmetry_axis: ArrayLike = (0, 0, 1),
origin: Optional[ArrayLike] = None,
) -> NDArray:
"""
Calculate the radial density distribution.
This function is meant to be used with time_average.
Args:
atoms:
Set of coordinates.
bins (opt.):
Bin specification that is passed to numpy.histogram. This needs to be
a list of bin edges if the function is used within time_average.
symmetry_axis (opt.):
Vector of the symmetry axis, around which the radial density is calculated,
default is z-axis.
origin (opt.):
Origin of the rotational symmetry, e.g. center of the pore.
"""
if origin is None:
origin = np.diag(atoms.box) / 2
if bins is None:
bins = np.arange(0, np.min(np.diag(atoms.box) / 2), 0.01)
length = np.diag(atoms.box) * symmetry_axis
cartesian = rotate_axis(atoms - origin, symmetry_axis)
radius, _ = polar_coordinates(cartesian[:, 0], cartesian[:, 1])
hist = np.histogram(radius, bins=bins)[0]
volume = np.pi * (bins[1:] ** 2 - bins[:-1] ** 2) * length
return hist / volume
def shell_density(
atoms: CoordinateFrame,
shell_radius: float,
bins: ArrayLike,
shell_thickness: float = 0.5,
symmetry_axis: ArrayLike = (0, 0, 1),
origin: Optional[ArrayLike] = None,
) -> NDArray:
"""
Compute the density distribution on a cylindrical shell.
Args:
atoms: The coordinates of the atoms
shell_radius: Inner radius of the shell
bins: Histogramm bins, this has to be a two-dimensional list of bins: [angle, z]
shell_thickness (opt.): Thicknes of the shell, default is 0.5
symmetry_axis (opt.): The symmtery axis of the pore, the coordinates will be
rotated such that this axis is the z-axis
origin (opt.): Origin of the pore, the coordinates will be moved such that this
is the new origin.
Returns:
Two-dimensional density distribution of the atoms in the defined shell.
"""
if origin is None:
origin = np.diag(atoms.box) / 2
cartesian = rotate_axis(atoms - origin, symmetry_axis)
radius, theta = polar_coordinates(cartesian[:, 0], cartesian[:, 1])
shell_indices = (shell_radius <= radius) & (
radius <= shell_radius + shell_thickness
)
hist = np.histogram2d(theta[shell_indices], cartesian[shell_indices, 2], bins)[0]
return hist
def next_neighbor_distribution(
atoms: CoordinateFrame,
reference: Optional[CoordinateFrame] = None,
number_of_neighbors: int = 4,
bins: Optional[ArrayLike] = None,
normed: bool = True,
) -> NDArray:
"""
Compute the distribution of next neighbors with the same residue name.
"""
assert bins is not None, "Bins have to be specified."
if reference is None:
reference = atoms
nn = next_neighbors(
reference, query_atoms=atoms, number_of_neighbors=number_of_neighbors
)[1]
resname_nn = reference.residue_names[nn]
count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1)
return np.histogram(count_nn, bins=bins, density=normed)[0]
def hbonds(
atoms: CoordinateFrame,
donator_indices: ArrayLike,
hydrogen_indices: ArrayLike,
acceptor_indices: ArrayLike,
DA_lim: float = 0.35,
HA_lim: float = 0.35,
max_angle_deg: float = 30,
full_output: bool = False,
) -> Union[NDArray, tuple[NDArray, NDArray, NDArray]]:
"""
Compute h-bond pairs
Args:
atoms: Set of all coordinates for a frame.
donator_indices: Set of indices for donators.
hydrogen_indices: Set of indices for hydrogen atoms. Should have the same
length as D.
acceptor_indices: Set of indices for acceptors.
DA_lim (opt.): Minimum distance between donator and acceptor.
HA_lim (opt.): Minimum distance between hydrogen and acceptor.
max_angle_deg (opt.): Maximum angle in degree for the HDA angle.
full_output (opt.): Returns additionally the cosine of the
angles and the DA distances
Return:
List of (D,A)-pairs in hbonds.
"""
def dist_DltA(
D: CoordinateFrame, A: CoordinateFrame, max_dist: float = 0.35
) -> NDArray:
ppoints, pind = pbc_points(D, thickness=max_dist + 0.1, index=True)
Dtree = spatial.cKDTree(ppoints)
Atree = spatial.cKDTree(A)
pairs = Dtree.sparse_distance_matrix(Atree, max_dist, output_type="ndarray")
pairs = np.asarray(pairs.tolist())
pairs = np.int_(pairs[pairs[:, 2] > 0][:, :2])
pairs[:, 0] = pind[pairs[:, 0]]
return pairs
def dist_AltD(
D: CoordinateFrame, A: CoordinateFrame, max_dist: float = 0.35
) -> NDArray:
ppoints, pind = pbc_points(A, thickness=max_dist + 0.1, index=True)
Atree = spatial.cKDTree(ppoints)
Dtree = spatial.cKDTree(D)
pairs = Atree.sparse_distance_matrix(Dtree, max_dist, output_type="ndarray")
pairs = np.asarray(pairs.tolist())
pairs = np.int_(pairs[pairs[:, 2] > 0][:, :2])
pairs = pairs[:, ::-1]
pairs[:, 1] = pind[pairs[:, 1]]
return pairs
D = atoms[donator_indices]
H = atoms[hydrogen_indices]
A = atoms[acceptor_indices]
min_cos = np.cos(max_angle_deg * np.pi / 180)
box = D.box
if len(D) <= len(A):
pairs = dist_DltA(D, A, DA_lim)
else:
pairs = dist_AltD(D, A, DA_lim)
vDH = pbc_diff(D[pairs[:, 0]], H[pairs[:, 0]], box)
vDA = pbc_diff(D[pairs[:, 0]], A[pairs[:, 1]], box)
vHA = pbc_diff(H[pairs[:, 0]], A[pairs[:, 1]], box)
angles_cos = np.clip(
np.einsum("ij,ij->i", vDH, vDA)
/ np.linalg.norm(vDH, axis=1)
/ np.linalg.norm(vDA, axis=1),
-1,
1,
)
is_bond = (
(angles_cos >= min_cos)
& (np.sum(vHA**2, axis=-1) <= HA_lim**2)
& (np.sum(vDA**2, axis=-1) <= DA_lim**2)
)
if full_output:
return (
pairs[is_bond],
angles_cos[is_bond],
np.sum(vDA[is_bond] ** 2, axis=-1) ** 0.5,
)
else:
return pairs[is_bond]
def calc_cluster_sizes(atoms: CoordinateFrame, r_max: float = 0.35) -> NDArray:
frame_PBC, indices_PBC = pbc_points(atoms, thickness=r_max + 0.1, index=True)
tree = KDTree(frame_PBC)
matrix = tree.sparse_distance_matrix(tree, r_max, output_type="ndarray")
new_matrix = np.zeros((len(atoms), len(atoms)))
for entry in matrix:
if entry[2] > 0:
new_matrix[indices_PBC[entry[0]], indices_PBC[entry[1]]] = 1
n_components, labels = connected_components(new_matrix, directed=False)
cluster_sizes = []
for i in range(0, np.max(labels) + 1):
cluster_sizes.append(np.sum(labels == i))
return np.array(cluster_sizes).flatten()
def gyration_radius(position: CoordinateFrame) -> NDArray:
r"""
Calculates a list of all radii of gyration of all molecules given in the coordinate
frame, weighted with the masses of the individual atoms.
Args:
position: Coordinate frame object
..math::
R_G = \left(\frac{\sum_{i=1}^{n} m_i |\vec{r_i}
- \vec{r_{COM}}|^2 }{\sum_{i=1}^{n} m_i }
\rigth)^{\frac{1}{2}}
"""
gyration_radii = np.array([])
for resid in np.unique(position.residue_ids):
pos = position.whole[position.residue_ids == resid]
mass = position.masses[position.residue_ids == resid][:, np.newaxis]
COM = 1 / mass.sum() * (mass * position).sum(axis=0)
r_sq = ((pbc_diff(pos, COM, pos.box.diagonal())) ** 2).sum(1)[:, np.newaxis]
g_radius = ((r_sq * mass).sum() / mass.sum()) ** 0.5
gyration_radii = np.append(gyration_radii, g_radius)
return gyration_radii