16 Commits

Author SHA1 Message Date
90bd90a608 Merge pull request 'Added some ordering to checksums from FunctionType since these could depending on input fail to be deterministic' (#2) from fix_nondeterministic_checksum into main
Reviewed-on: #2
2025-06-16 18:45:48 +00:00
67d3e70a66 Added some ordering to checksums from FunctionType since these could depending on input fail to be deterministic 2025-06-16 20:09:50 +02:00
c09549902a Added Robins adjustments for FEL 2024-05-27 14:27:09 +02:00
b7bb8cb379 Merge branch 'refs/heads/mdeval_dev' 2024-05-02 16:18:18 +02:00
33c4756e34 Fixed frame-index selection in occupancy calculation. 2024-05-02 16:17:54 +02:00
7b9f8b6773 Merge branch 'mdeval_dev' 2024-03-05 13:58:15 +01:00
c89cead81c Moved KDTree build up in its own function and made find_maxima query for each point separately. 2024-03-05 13:57:55 +01:00
31eb145a13 Merge branch 'mdeval_dev'
# Conflicts:
#	src/mdevaluate/coordinates.py
2024-02-26 14:20:12 +01:00
af3758cbef Fixed iter in Coordinates 2024-02-26 14:18:23 +01:00
93d020a4de Updated fel test to run faster 2024-02-26 14:14:29 +01:00
b5395098ce Fixed iter of Coordinates after last change 2024-02-26 13:57:44 +01:00
5e80701562 Coordinates returns now correct length after slicing 2024-02-26 13:50:46 +01:00
363e420cd8 Fixed chill ice_cluster_traj() function 2024-02-16 11:11:17 +01:00
6b77ef78e1 Fixed chill selector_ice() function 2024-02-15 11:49:20 +01:00
0c940115af Fixed LSI 2024-02-08 17:24:44 +01:00
b0f29907df Updated fel to triclinic box case 2024-02-08 17:24:22 +01:00
6 changed files with 148 additions and 77 deletions

View File

@ -73,7 +73,9 @@ def checksum(*args, csum=None):
elif isinstance(arg, FunctionType): elif isinstance(arg, FunctionType):
csum.update(strip_comments(inspect.getsource(arg)).encode()) csum.update(strip_comments(inspect.getsource(arg)).encode())
c = inspect.getclosurevars(arg) c = inspect.getclosurevars(arg)
for v in {**c.nonlocals, **c.globals}.values(): merged = {**c.nonlocals, **c.globals}
for key in sorted(merged): # deterministic ordering
v = merged[key]
if v is not arg: if v is not arg:
checksum(v, csum=csum) checksum(v, csum=csum)
elif isinstance(arg, functools.partial): elif isinstance(arg, functools.partial):

View File

@ -218,7 +218,7 @@ class Coordinates:
self.get_frame.clear_cache() self.get_frame.clear_cache()
def __iter__(self): def __iter__(self):
for i in range(len(self))[self._slice]: for i in range(len(self.frames))[self._slice]:
yield self[i] yield self[i]
@singledispatchmethod @singledispatchmethod
@ -232,7 +232,7 @@ class Coordinates:
return sliced return sliced
def __len__(self): def __len__(self):
return len(self.frames) return len(self.frames[self._slice])
def __checksum__(self): def __checksum__(self):
return checksum(self.frames, self.atom_filter, self._slice, self.mode) return checksum(self.frames, self.atom_filter, self._slice, self.mode)

View File

@ -11,7 +11,7 @@ from mdevaluate.coordinates import CoordinateFrame, Coordinates
from mdevaluate.pbc import pbc_points from mdevaluate.pbc import pbc_points
def a_ij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]: def calc_aij(atoms: ArrayLike, N: int = 4, l: int = 3) -> tuple[NDArray, NDArray]:
tree = KDTree(atoms) tree = KDTree(atoms)
dist, indices = tree.query(atoms, N + 1) dist, indices = tree.query(atoms, N + 1)
@ -84,18 +84,18 @@ def count_ice_types(iceTypes: NDArray) -> NDArray:
def selector_ice( def selector_ice(
start_frame: CoordinateFrame, oxygen_atoms_water: CoordinateFrame,
traj: Coordinates,
chosen_ice_types: ArrayLike, chosen_ice_types: ArrayLike,
combined: bool = True, combined: bool = True,
next_neighbor_distance: float = 0.35,
) -> NDArray: ) -> NDArray:
oxygen = traj.subset(atom_name="OW") atoms = oxygen_atoms_water
atoms = oxygen[start_frame.step] atoms_PBC = pbc_points(atoms, thickness=next_neighbor_distance * 2.2)
atoms = atoms % np.diag(atoms.box) aij, indices = calc_aij(atoms_PBC)
atoms_PBC = pbc_points(atoms, thickness=1)
aij, indices = a_ij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(
atoms_PBC, next_neighbor_distance, return_length=True
) - 1
index_SOL = atoms_PBC.tolist().index(atoms[0].tolist()) index_SOL = atoms_PBC.tolist().index(atoms[0].tolist())
index_SOL = np.arange(index_SOL, index_SOL + len(atoms)) index_SOL = np.arange(index_SOL, index_SOL + len(atoms))
ice_Types = classify_ice(aij, indices, neighbors, index_SOL) ice_Types = classify_ice(aij, indices, neighbors, index_SOL)
@ -117,9 +117,9 @@ def selector_ice(
def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame: def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray: def ice_types_distribution(frame: CoordinateFrame, selector: Callable) -> NDArray:
atoms_PBC = pbc_points(frame, thickness=1) atoms_PBC = pbc_points(frame, thickness=1)
aij, indices = a_ij(atoms_PBC) aij, indices = calc_aij(atoms_PBC)
tree = KDTree(atoms_PBC) tree = KDTree(atoms_PBC)
neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) neighbors = tree.query_ball_point(atoms_PBC, 0.35, return_length=True) - 1
index = selector(frame, atoms_PBC) index = selector(frame, atoms_PBC)
ice_types_data = classify_ice(aij, indices, neighbors, index) ice_types_data = classify_ice(aij, indices, neighbors, index)
ice_parts_data = count_ice_types(ice_types_data) ice_parts_data = count_ice_types(ice_types_data)
@ -161,8 +161,8 @@ def ice_types(trajectory: Coordinates, segments: int = 10000) -> pd.DataFrame:
def ice_clusters_traj( def ice_clusters_traj(
traj: Coordinates, segments: int = 10000, skip: float = 0.1 traj: Coordinates, segments: int = 10000, skip: float = 0.1
) -> list: ) -> list:
def ice_clusters(frame: CoordinateFrame, traj: Coordinates) -> Tuple[float, list]: def ice_clusters(frame: CoordinateFrame) -> Tuple[float, list]:
selection = selector_ice(frame, traj, [0, 1, 2]) selection = selector_ice(frame, [0, 1, 2])
if len(selection) == 0: if len(selection) == 0:
return frame.time, [] return frame.time, []
else: else:
@ -194,6 +194,6 @@ def ice_clusters_traj(
np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments)) np.int_(np.linspace(len(traj) * skip, len(traj) - 1, num=segments))
) )
all_clusters = [ all_clusters = [
ice_clusters(traj[frame_index], traj) for frame_index in frame_indices ice_clusters(traj[frame_index]) for frame_index in frame_indices
] ]
return all_clusters return all_clusters

View File

@ -1,9 +1,9 @@
from functools import partial from functools import partial
from typing import Optional
import numpy as np import numpy as np
from numpy.typing import ArrayLike, NDArray from numpy.typing import ArrayLike, NDArray
from numpy.polynomial.polynomial import Polynomial as Poly from numpy.polynomial.polynomial import Polynomial as Poly
import math
from scipy.spatial import KDTree from scipy.spatial import KDTree
import pandas as pd import pandas as pd
import multiprocessing as mp import multiprocessing as mp
@ -11,6 +11,56 @@ import multiprocessing as mp
from ..coordinates import Coordinates from ..coordinates import Coordinates
def _pbc_points_reduced(
coordinates: ArrayLike,
pore_geometry: str,
box: Optional[NDArray] = None,
thickness: Optional[float] = None,
) -> tuple[NDArray, NDArray]:
if box is None:
box = coordinates.box
if pore_geometry == "cylindrical":
grid = np.array([[i, j, k] for k in [-1, 0, 1] for j in [0] for i in [0]])
indices = np.tile(np.arange(len(coordinates)), 3)
elif pore_geometry == "slit":
grid = np.array(
[[i, j, k] for k in [0] for j in [1, 0, -1] for i in [-1, 0, 1]]
)
indices = np.tile(np.arange(len(coordinates)), 9)
else:
raise ValueError(
f"pore_geometry is {pore_geometry}, should either be "
f"'cylindrical' or 'slit'"
)
coordinates_pbc = np.concatenate([coordinates + v @ box for v in grid], axis=0)
size = np.diag(box)
if thickness is not None:
mask = np.all(coordinates_pbc > -thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
mask = np.all(coordinates_pbc < size + thickness, axis=1)
coordinates_pbc = coordinates_pbc[mask]
indices = indices[mask]
return coordinates_pbc, indices
def _build_tree(points, box, r_max, pore_geometry):
if np.all(np.diag(np.diag(box)) == box):
tree = KDTree(points % box, boxsize=box)
points_pbc_index = None
else:
points_pbc, points_pbc_index = _pbc_points_reduced(
points,
pore_geometry,
box,
thickness=r_max + 0.01,
)
tree = KDTree(points_pbc)
return tree, points_pbc_index
def occupation_matrix( def occupation_matrix(
trajectory: Coordinates, trajectory: Coordinates,
edge_length: float = 0.05, edge_length: float = 0.05,
@ -28,11 +78,7 @@ def occupation_matrix(
z_bins = np.arange(0, box[2][2] + edge_length, edge_length) z_bins = np.arange(0, box[2][2] + edge_length, edge_length)
bins = [x_bins, y_bins, z_bins] bins = [x_bins, y_bins, z_bins]
# Trajectory is split for parallel computing # Trajectory is split for parallel computing
size = math.ceil(len(frame_indices) / nodes) indices = np.array_split(frame_indices, nodes)
indices = [
np.arange(len(frame_indices))[i : i + size]
for i in range(0, len(frame_indices), size)
]
pool = mp.Pool(nodes) pool = mp.Pool(nodes)
results = pool.map( results = pool.map(
partial(_calc_histogram, trajectory=trajectory, bins=bins), indices partial(_calc_histogram, trajectory=trajectory, bins=bins), indices
@ -72,19 +118,19 @@ def _calc_histogram(
def find_maxima( def find_maxima(
occupation_df: pd.DataFrame, box: ArrayLike, edge_length: float = 0.05 occupation_df: pd.DataFrame, box: ArrayLike, radius: float, pore_geometry: str
) -> pd.DataFrame: ) -> pd.DataFrame:
maxima_df = occupation_df.copy() maxima_df = occupation_df.copy()
maxima_df["maxima"] = None maxima_df["maxima"] = None
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, radius, pore_geometry)
all_neighbors = tree.query_ball_point(
points, edge_length * 3 ** (1 / 2) + edge_length / 100
)
for i in range(len(maxima_df)): for i in range(len(maxima_df)):
if maxima_df.loc[i, "maxima"] is not None: if maxima_df.loc[i, "maxima"] is not None:
continue continue
neighbors = np.array(all_neighbors[i]) maxima_pos = maxima_df.loc[i, ["x", "y", "z"]]
neighbors = np.array(tree.query_ball_point(maxima_pos, radius))
if points_pbc_index is not None:
neighbors = points_pbc_index[neighbors]
neighbors = neighbors[neighbors != i] neighbors = neighbors[neighbors != i]
if len(neighbors) == 0: if len(neighbors) == 0:
maxima_df.loc[i, "maxima"] = True maxima_df.loc[i, "maxima"] = True
@ -104,23 +150,39 @@ def _calc_energies(
maxima_df: pd.DataFrame, maxima_df: pd.DataFrame,
bins: ArrayLike, bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
T: float, T: float,
nodes: int = 8,
) -> NDArray: ) -> NDArray:
points = np.array(maxima_df[["x", "y", "z"]]) points = np.array(maxima_df[["x", "y", "z"]])
tree = KDTree(points, boxsize=box) tree, points_pbc_index = _build_tree(points, box, bins[-1], pore_geometry)
maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]] maxima = maxima_df.loc[maxima_indices, ["x", "y", "z"]]
maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"]) maxima_occupations = np.array(maxima_df.loc[maxima_indices, "occupation"])
num_of_neighbors = np.max( num_of_neighbors = np.max(
tree.query_ball_point(maxima, bins[-1], return_length=True) tree.query_ball_point(maxima, bins[-1], return_length=True)
) )
distances, indices = tree.query( split_maxima = []
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1] for i in range(0, len(maxima), 1000):
split_maxima.append(maxima[i : i + 1000])
distances = []
indices = []
for maxima in split_maxima:
distances_step, indices_step = tree.query(
maxima, k=num_of_neighbors, distance_upper_bound=bins[-1], workers=nodes
) )
distances.append(distances_step)
indices.append(indices_step)
distances = np.concatenate(distances)
indices = np.concatenate(indices)
all_energy_hist = [] all_energy_hist = []
all_occupied_bins_hist = [] all_occupied_bins_hist = []
if distances.ndim == 1: if distances.ndim == 1:
current_distances = distances[1:][distances[1:] <= bins[-1]] current_distances = distances[1:][distances[1:] <= bins[-1]]
if points_pbc_index is None:
current_indices = indices[1:][distances[1:] <= bins[-1]] current_indices = indices[1:][distances[1:] <= bins[-1]]
else:
current_indices = points_pbc_index[indices[1:][distances[1:] <= bins[-1]]]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupations)
* T * T
@ -131,8 +193,12 @@ def _calc_energies(
return result return result
for i, maxima_occupation in enumerate(maxima_occupations): for i, maxima_occupation in enumerate(maxima_occupations):
current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]] current_distances = distances[i, 1:][distances[i, 1:] <= bins[-1]]
if points_pbc_index is None:
current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]] current_indices = indices[i, 1:][distances[i, 1:] <= bins[-1]]
else:
current_indices = points_pbc_index[
indices[i, 1:][distances[i, 1:] <= bins[-1]]
]
energy = ( energy = (
-np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation) -np.log(maxima_df.loc[current_indices, "occupation"] / maxima_occupation)
* T * T
@ -168,9 +234,12 @@ def distance_resolved_energies(
distance_bins: ArrayLike, distance_bins: ArrayLike,
r_bins: ArrayLike, r_bins: ArrayLike,
box: NDArray, box: NDArray,
pore_geometry: str,
temperature: float, temperature: float,
nodes: int = 8,
) -> pd.DataFrame: ) -> pd.DataFrame:
results = [] results = []
distances = []
for i in range(len(distance_bins) - 1): for i in range(len(distance_bins) - 1):
maxima_indices = np.array( maxima_indices = np.array(
maxima_df.index[ maxima_df.index[
@ -179,11 +248,22 @@ def distance_resolved_energies(
* (maxima_df["maxima"] == True) * (maxima_df["maxima"] == True)
] ]
) )
try:
results.append( results.append(
_calc_energies(maxima_indices, maxima_df, r_bins, box, temperature) _calc_energies(
maxima_indices,
maxima_df,
r_bins,
box,
pore_geometry,
temperature,
nodes,
) )
)
distances.append((distance_bins[i] + distance_bins[i + 1]) / 2)
except ValueError:
pass
distances = (distance_bins[:-1] + distance_bins[1:]) / 2
radii = (r_bins[:-1] + r_bins[1:]) / 2 radii = (r_bins[:-1] + r_bins[1:]) / 2
d = np.array([d for d in distances for r in radii]) d = np.array([d for d in distances for r in radii])
r = np.array([r for d in distances for r in radii]) r = np.array([r for d in distances for r in radii])
@ -192,7 +272,11 @@ def distance_resolved_energies(
def find_energy_maxima( def find_energy_maxima(
energy_df: pd.DataFrame, r_min: float, r_max: float energy_df: pd.DataFrame,
r_min: float,
r_max: float,
r_eval: float = None,
degree: int = 2,
) -> pd.DataFrame: ) -> pd.DataFrame:
distances = [] distances = []
energies = [] energies = []
@ -201,6 +285,9 @@ def find_energy_maxima(
x = np.array(data_d["r"]) x = np.array(data_d["r"])
y = np.array(data_d["energy"]) y = np.array(data_d["energy"])
mask = (x >= r_min) * (x <= r_max) mask = (x >= r_min) * (x <= r_max)
p3 = Poly.fit(x[mask], y[mask], deg=2) p3 = Poly.fit(x[mask], y[mask], deg=degree)
if r_eval is None:
energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) energies.append(np.max(p3(np.linspace(r_min, r_max, 1000))))
else:
energies.append(p3(r_eval))
return pd.DataFrame({"d": distances, "energy": energies}) return pd.DataFrame({"d": distances, "energy": energies})

View File

@ -265,7 +265,7 @@ def LSI(
) )
distributions = np.array( distributions = np.array(
[ [
LSI_distribution(trajectory[frame_index], trajectory, bins, selector=None) LSI_distribution(trajectory[frame_index], bins, selector=None)
for frame_index in frame_indices for frame_index in frame_indices
] ]
) )

View File

@ -13,42 +13,24 @@ def trajectory(request):
def test_get_fel(trajectory): def test_get_fel(trajectory):
test_array = np.array( test_array = np.array([210., 214., 209., 192., 200., 193., 230., 218., 266.])
[
174.46253634,
174.60905476,
178.57658092,
182.43001192,
180.57916378,
176.49886217,
178.96018547,
181.13561782,
178.31026314,
176.08903996,
180.71215345,
181.59703135,
180.34329368,
187.02474488,
197.99167477,
214.05788031,
245.58571282,
287.52457507,
331.53492965,
]
)
OW = trajectory.subset(atom_name="OW") OW = trajectory.subset(atom_name="OW")
box = trajectory[0].box
box = np.diag(trajectory[0].box) box_voxels = (np.diag(box) // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05]
box_voxels = (box // [0.05, 0.05, 0.05] + [1, 1, 1]) * [0.05, 0.05, 0.05] occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=10)
occupation_matrix = fel.occupation_matrix(OW, skip=0, segments=1000) radius_maxima = 0.05 * 3 ** (1 / 2) + 0.05 / 100
maxima_matrix = fel.find_maxima(occupation_matrix, box=box_voxels, edge_length=0.05) maxima_matrix = fel.find_maxima(
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", box / 2) occupation_matrix,
r_bins = np.arange(0, 2, 0.02) box=box_voxels,
distance_bins = np.arange(0.05, 2.05, 0.1) radius=radius_maxima,
pore_geometry="cylindrical",
)
maxima_matrix = fel.add_distances(maxima_matrix, "cylindrical", np.diag(box) / 2)
r_bins = np.arange(0, 0.5, 0.02)
distance_bins = np.arange(1.8, 1.9, 0.01)
energy_df = fel.distance_resolved_energies( energy_df = fel.distance_resolved_energies(
maxima_matrix, distance_bins, r_bins, box, 225 maxima_matrix, distance_bins, r_bins, box, "cylindrical", 225
) )
result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15) result = fel.find_energy_maxima(energy_df, r_min=0.05, r_max=0.15)
assert (np.round(np.array(result["energy"])) == np.round(test_array)).all() assert (np.round(np.array(result["energy"])) == np.round(test_array)).all()