Compare commits

...

14 Commits

5 changed files with 145 additions and 76 deletions

View File

@ -218,7 +218,7 @@ class Coordinates:
self.get_frame.clear_cache() self.get_frame.clear_cache()
def __iter__(self): def __iter__(self):
for i in range(len(self))[self._slice]: for i in range(len(self.frames))[self._slice]:
yield self[i] yield self[i]
@singledispatchmethod @singledispatchmethod
@ -232,7 +232,7 @@ class Coordinates:
return sliced return sliced
def __len__(self): def __len__(self):
return len(self.frames) return len(self.frames[self._slice])
def __checksum__(self): def __checksum__(self):
return checksum(self.frames, self.atom_filter, self._slice, self.mode) return checksum(self.frames, self.atom_filter, self._slice, self.mode)

View File

@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates
from mdevaluate.pbc import pbc_points from mdevaluate.pbc import pbc_points
def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]: def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
tree = KDTree(atoms) tree = KDTree(atoms)
dist, indices = tree.query(atoms, N + 1) dist, indices = tree.query(atoms, N + 1)
@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray:
def selector_ice( def selector_ice(
start_frame: CoordinateFrame, oxygen_atoms_water: CoordinateFrame,
traj: Coordinates,
chosen_ice_types: ArrayLike, chosen_ice_types: ArrayLike,
combined: bool = True, combined: bool = True,
next_neighbor_distance: float = 0.35,
) -> NDArray: ) -> NDArray:
oxygen = traj.subset(atom_name="OW") atoms = oxygen_atoms_water
atoms = oxygen[start_frame.step] atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2)
atoms = atoms % np.diag(atoms.box) aij, indices = calc_aij(atoms_PBC)
atoms_PBC = pbc_points(atoms, thickness=1)
aij, indices = a_ij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(
atoms_PBC, next_neighbor_distance, return_length=True
) - 1
index_SOL = atoms_PBC.tolist().index(atoms[0].tolist()) index_SOL = atoms_PBC.tolist().index(atoms[0].tolist())
index_SOL = np.arange(index_SOL, index_SOL + len(atoms)) index_SOL = np.arange(index_SOL, index_SOL + len(atoms))
ice_Types = classify_ice(aij, indices, neighbors, index_SOL) ice_Types = classify_ice(aij, indices, neighbors, index_SOL)
@ -117,9 +117,9 @@ def selector_ice(
def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame: def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray: def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray:
atoms_PBC = pbc_points(frame, thickness=1) atoms_PBC = pbc_points(frame, thickness=1)
aij, indices = a_ij(atoms_PBC) aij, indices = calc_aij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1
index = selector(frame, atoms_PBC) index = selector(frame, atoms_PBC)
ice_types_data = classify_ice(aij, indices, neighbors, index) ice_types_data = classify_ice(aij, indices, neighbors, index)
ice_parts_data = count_ice_types(ice_types_data) ice_parts_data = count_ice_types(ice_types_data)
@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_clusters_traj( def ice_clusters_traj(
traj: Coordinates, segments: int = 10000, skip: float = 0.1 traj: Coordinates, segments: int = 10000, skip: float = 0.1
) -> list: ) -> list:
def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]: def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]:
selection = selector_ice(frame, traj, [0, 1, 2]) selection = selector_ice(frame, [0, 1, 2])
if len(selection) == 0: if len(selection) == 0:
return frame.time, [] return frame.time, []
else: else:
@ -194,6 +194,6 @@ def ice_clusters_traj(
np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments)) np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments))
) )
all_clusters = [ all_clusters = [
ice_clusters(traj[frame_index], traj) for frame_index in frame_indices ice_clusters(traj[frame_index]) for frame_index in frame_indices
] ]
return all_clusters return all_clusters

View File

@ -1,9 +1,9 @@
from functools import partial from functools import partial
from typing import Optional
import numpy as np import numpy as np
from numpy.typing import ArrayLike, NDArray from numpy.typing import ArrayLike, NDArray
from numpy.polynomial.polynomial import Polynomial as Poly from numpy.polynomial.polynomial import Polynomial as Poly
import math
from scipy.spatial import KDTree from scipy.spatial import KDTree
import pandas as pd import pandas as pd
import multiprocessing as mp import multiprocessing as mp
@ -11,6 +11,56 @@ import multiprocessing as mp
from ..coordinates import Coordinates from ..coordinates import Coordinates
def _pbc_points_reduced(
coordinates: ArrayLike,
pore_geometry: str,
box: Optional[NDArray] = None,
thickness: Optional[float] = None,
) -> tuple[NDArray, NDArray]:
if box is None:
box = coordinates.box
if pore_geometry == "cylindrical":
grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]])
indices = np.tile(np.arange(len(coordinates)), 3)
elif pore_geometry == "slit":
grid = np.array(
[[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]]
)
indices = np.tile(np.arange(len(coordinates)), 9)
else:
raise ValueError(
f"pore_geometry is {pore_geometry}, should either be "
f"'cylindrical' or 'slit'"
)
coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0)
size = np.diag(box)
if thickness is not None:
mask = np.all(coordinates_pbc > -thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
mask = np.all(coordinates_pbc < size + thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
return coordinates_pbc, indices
def _build_tree(points, box, r_max, pore_geometry):
if np.all(np.diag(np.diag(box)) == box):
tree = KDTree(points % box, boxsize=box)
points_pbc_index = None
else:
points_pbc, points_pbc_index = _pbc_points_reduced(
points,
pore_geometry,
box,
thickness=r_max + 0.01,
)
tree = KDTree(points_pbc)
return tree, points_pbc_index
def occupation_matrix( def occupation_matrix(
trajectory: Coordinates, trajectory: Coordinates,
edge_length: float = 0.05, edge_length: float = 0.05,
@ -28,11 +78,7 @@ def occupation_matrix(
z_bins = np.arange(0, box[2][2] + edge_length, edge_length) z_bins = np.arange(0, box[2][2] + edge_length, edge_length)
bins = [x_bins, y_bins, z_bins] bins = [x_bins, y_bins, z_bins]
# Trajectory is split for parallel computing # Trajectory is split for parallel computing
size = math.ceil(len(frame_indices) / nodes) indices = np.array_split(frame_indices, nodes)
indices = [
np.arange(len(frame_indices))[i : i + size]
for i in range(0, len(frame_indices), size)
]
pool = mp.Pool(nodes) pool = mp.Pool(nodes)
results = pool.map( results = pool.map(
partial(_calc_histogram, trajectory=trajectory, bins=bins), indices partial(_calc_histogram, trajectory=trajectory, bins=bins), indices
@ -72,19 +118,19 @@ def _calc_histogram(
def find_maxima( def find_maxima(
occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05 occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str
) -> pd.DataFrame: ) -> pd.DataFrame:
maxima_df = occupation_df.copy() maxima_df = occupation_df.copy()
maxima_df["maxima"] = None maxima_df["maxima"] = None
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry)
all_neighbors = tree.query_ball_point(
points, edge_length * 3 ** (1 / 2) + edge_length / 100
)
for i in range(len(maxima_df)): for i in range(len(maxima_df)):
if maxima_df.loc[i, "maxima"] is not None: if maxima_df.loc[i, "maxima"] is not None:
continue continue
neighbors = np.array(all_neighbors[i]) maxima_pos = maxima_df.loc[i, ["x", "y", "z"]]
neighbors = np.array(tree.query_ball_point(maxima_pos, radius))
if points_pbc_index is not None:
neighbors = points_pbc_index[neighbors]
neighbors = neighbors[neighbors != i] neighbors = neighbors[neighbors != i]
if len(neighbors) == 0: if len(neighbors) == 0:
maxima_df.loc[i, "maxima"] = True maxima_df.loc[i, "maxima"] = True
@ -104,23 +150,39 @@ def _calc_energies(
maxima_df: pd.DataFrame, maxima_df: pd.DataFrame,
bins: ArrayLike, bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
T: float, T: float,
nodes: int = 8,
) -> NDArray: ) -> NDArray:
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry)
maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]] maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]]
maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"]) maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"])
num_of_neighbors = np.max( num_of_neighbors = np.max(
tree.query_ball_point(maxima, bins[-1], return_length=True) tree.query_ball_point(maxima, bins[-1], return_length=True)
) )
distances, indices = tree.query( split_maxima = []
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1] for i in range(0, len(maxima), 1000):
) split_maxima.append(maxima[i : i + 1000])
distances = []
indices = []
for maxima in split_maxima:
distances_step, indices_step = tree.query(
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes
)
distances.append(distances_step)
indices.append(indices_step)
distances = np.concatenate(distances)
indices = np.concatenate(indices)
all_energy_hist = [] all_energy_hist = []
all_occupied_bins_hist = [] all_occupied_bins_hist = []
if distances.ndim == 1: if distances.ndim == 1:
current_distances = distances[1:][distances[1:] <= bins[-1]] current_distances = distances[1:][distances[1:] <= bins[-1]]
current_indices = indices[1:][distances[1:] <= bins[-1]] if points_pbc_index is None:
current_indices = indices[1:][distances[1:] <= bins[-1]]
else:
current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations)
* T * T
@ -131,8 +193,12 @@ def _calc_energies(
return result return result
for i, maxima_occupation in enumerate(maxima_occupations): for i, maxima_occupation in enumerate(maxima_occupations):
current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]] current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]]
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]] if points_pbc_index is None:
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
else:
current_indices = points_pbc_index[
indices[i, 1:][distances[i, 1:] <= bins[-1]]
]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation)
* T * T
@ -168,9 +234,12 @@ def distance_resolved_energies(
distance_bins: ArrayLike, distance_bins: ArrayLike,
r_bins: ArrayLike, r_bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
temperature: float, temperature: float,
nodes: int = 8,
) -> pd.DataFrame: ) -> pd.DataFrame:
results = [] results = []
distances = []
for i in range(len(distance_bins) - 1): for i in range(len(distance_bins) - 1):
maxima_indices = np.array( maxima_indices = np.array(
maxima_df.index[ maxima_df.index[
@ -179,11 +248,22 @@ def distance_resolved_energies(
* (maxima_df["maxima"] == True) * (maxima_df["maxima"] == True)
] ]
) )
results.append( try:
_calc_energies(maxima_indices, maxima_df, r_bins, box, temperature) results.append(
) _calc_energies(
maxima_indices,
maxima_df,
r_bins,
box,
pore_geometry,
temperature,
nodes,
)
)
distances.append((distance_bins[i] + distance_bins[i + 1]) / 2)
except ValueError:
pass
distances = (distance_bins[:-1] + distance_bins[1:]) / 2
radii = (r_bins[:-1] + r_bins[1:]) / 2 radii = (r_bins[:-1] + r_bins[1:]) / 2
d = np.array([d for d in distances for r in radii]) d = np.array([d for d in distances for r in radii])
r = np.array([r for d in distances for r in radii]) r = np.array([r for d in distances for r in radii])
@ -192,7 +272,11 @@ def distance_resolved_energies(
def find_energy_maxima( def find_energy_maxima(
energy_df: pd.DataFrame, r_min: float, r_max: float energy_df: pd.DataFrame,
r_min: float,
r_max: float,
r_eval: float = None,
degree: int = 2,
) -> pd.DataFrame: ) -> pd.DataFrame:
distances = [] distances = []
energies = [] energies = []
@ -201,6 +285,9 @@ def find_energy_maxima(
x = np.array(data_d["r"]) x = np.array(data_d["r"])
y = np.array(data_d["energy"]) y = np.array(data_d["energy"])
mask = (x >= r_min) * (x <= r_max) mask = (x >= r_min) * (x <= r_max)
p3 = Poly.fit(x[mask], y[mask], deg=2) p3 = Poly.fit(x[mask], y[mask], deg=degree)
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) if r_eval is None:
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
else:
energies.append(p3(r_eval))
return pd.DataFrame({"d": distances, "energy": energies}) return pd.DataFrame({"d": distances, "energy": energies})

View File

@ -265,7 +265,7 @@ def LSI(
) )
distributions = np.array( distributions = np.array(
[ [
LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None) LSI_distribution(trajectory[frame_index], bins, selector=None)
for frame_index in frame_indices for frame_index in frame_indices
] ]
) )

View File

@ -13,42 +13,24 @@ def trajectory(request):
def test_get_fel(trajectory): def test_get_fel(trajectory):
test_array = np.array( test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.])
[
174.46253634,
174.60905476,
178.57658092,
182.43001192,
180.57916378,
176.49886217,
178.96018547,
181.13561782,
178.31026314,
176.08903996,
180.71215345,
181.59703135,
180.34329368,
187.02474488,
197.99167477,
214.05788031,
245.58571282,
287.52457507,
331.53492965,
]
)
OW = trajectory.subset(atom_name="OW") OW = trajectory.subset(atom_name="OW")
box = trajectory[0].box
box = np.diag(trajectory[0].box) box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05] occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10)
occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000) radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100
maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05) maxima_matrix = fel.find_maxima(
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2) occupation_matrix,
r_bins = np.arange(0, 2, 0.02) box=box_voxels,
distance_bins = np.arange(0.05, 2.05, 0.1) radius=radius_maxima,
pore_geometry="cylindrical",
)
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2)
r_bins = np.arange(0, 0.5, 0.02)
distance_bins = np.arange(1.8, 1.9, 0.01)
energy_df = fel.distance_resolved_energies( energy_df = fel.distance_resolved_energies(
maxima_matrix, distance_bins, r_bins, box, 225 maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225
) )
result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15) result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15)
assert (np.round(np.array(result["energy"])) == np.round(test_array)).all() assert (np.round(np.array(result["energy"])) == np.round(test_array)).all()