From d8154d3c38408854e5784c4a0b2ebcfb38e5ef8c Mon Sep 17 00:00:00 2001 From: Sebastian Kloth Date: Tue, 5 Dec 2023 17:08:57 +0100 Subject: [PATCH] Initial commit of free_energy_landscape --- src/mdevaluate/__init__.py | 1 + src/mdevaluate/free_energy_landscape.py | 456 ++++++++++++++++++++++++ 2 files changed, 457 insertions(+) create mode 100644 src/mdevaluate/free_energy_landscape.py diff --git a/src/mdevaluate/__init__.py b/src/mdevaluate/__init__.py index f353f67..dd18ac6 100644 --- a/src/mdevaluate/__init__.py +++ b/src/mdevaluate/__init__.py @@ -13,6 +13,7 @@ from . import autosave from . import reader from . import chill from . import system +from . import free_energy_landscape from .logging import logger diff --git a/src/mdevaluate/free_energy_landscape.py b/src/mdevaluate/free_energy_landscape.py new file mode 100644 index 0000000..93abbb9 --- /dev/null +++ b/src/mdevaluate/free_energy_landscape.py @@ -0,0 +1,456 @@ +import numpy as np +import math +import scipy +import cmath +import pandas as pd + +from functools import partial +import os.path +import multiprocessing as mp +from scipy import spatial as sp + +VALID_GEOMETRY = {"cylindrical", "slab"} + + +def get_fel( + traj, + path, + geometry, + temperature, + edge=0.05, + radiusmin=0.05, + radiusmax=2.05, + z=[-np.inf, np.inf], + overwrite=False, +): + """ + The main method of this script. Will calculate the energy difference based on radius. + + This method will calculate the relative energy of different minima based on radius + to the pore center. + After that it will save those results in a .npy file in the filepath give by the + "path" parameter and will also try to load from there. + + + Parameters: + traj : The trajectory of the system to be evaluated + path : The save and load location of the files + geometry : Either "cylindrical" or "slab". The geometry of your system. Other types + currently not supported + temperature: The temperature of your system. Needed for the energy difference + edge (opt.) : The length of the cubes in which your system will be divided + radiusmin (opt.) : The radius where the calculation begins. Will create a bin of + +- 0.05 of that number. + radiusmax (opt.) : The radius where the calculation ends. Will create a bin of + +- 0.05 of that number. + z (opt.) : The evaluated slice of the trajectory for the energy landscape. + + Returns: + list: A list of the energy difference based on radius + + """ + if geometry not in VALID_GEOMETRY: + raise ValueError("results: status must be one of %r." % VALID_GEOMETRY) + EnergyDifference = [] + + if (os.path.exists(f"{path}/radiiData.npy")) & (not overwrite): + data = np.load(f"{path}/radiiData.npy") + bins = np.load(f"{path}/radiiBins.npy") + # Here the different standard geometries are inserted + else: + if geometry == "cylindrical": + bins, data = shortAllRadii( + traj, path, edge=edge, radiusmin=radiusmin, radiusmax=radiusmax, z=z + ) + if geometry == "slab": + bins, data = shortAllRadiiSlab( + traj, path, edge=edge, radiusmin=radiusmin, radiusmax=radiusmax, z=z + ) + np.save(f"{path}/radiiData", data) + np.save(f"{path}/radiiBins", bins) + + return CalculateEnergyDifference(data, bins, temperature) + + +def fill_bins(traj, path, edge=0.05): + # If available Matrix is directly loaded + if os.path.exists(f"{path}/Matrix{edge}.npy"): + matbin = np.load(f"{path}/Matrix{edge}.npy") + return matbin + + pool = mp.Pool(8) + size = math.ceil(len(traj) / 8) + + # Trajectory is split for parallel computing + indices = list(Chunksplit(np.arange(0, len(traj), 1), size)) + # indices = list(Chunksplit(np.arange(len(traj)-80, len(traj), 1), size)) + fill = partial(help_fill, traj=traj) + results = pool.map(fill, indices) + boxmat = traj[0].box + + a = math.ceil(boxmat[0][0] / 0.05) + b = math.ceil(boxmat[1][1] / 0.05) + c = math.ceil(boxmat[2][2] / 0.05) + matbin = np.zeros((a, b, c)) + + pool.close() + + for mat in results: + matbin = matbin + mat + np.save(file=f"{path}/Matrix{edge}", arr=matbin) + return matbin + + +def help_fill(numberlist, traj, edge=0.05): + boxmat = traj[0].box + + a = math.ceil(boxmat[0][0] / edge) + b = math.ceil(boxmat[1][1] / edge) + c = math.ceil(boxmat[2][2] / edge) + matbin = np.zeros((a, b, c)) + temp = np.array([[]]).reshape(0, 3) + + # Trajectory is split in chunks of 1000 frames to increase efficency while + # keeping ram usage low + h = 1000 + while h < len(numberlist): + temp = np.array([[]]).reshape(0, 3) + x = numberlist[h - 1000] + y = numberlist[h] + for j in traj.pbc[x:y]: + l = np.floor(j / edge).astype("int32") + temp = np.concatenate((temp, np.array(l))) + # Positions are counted for whole chunk + unique, counts = np.unique(temp, return_counts=True, axis=0) + m = 0 + # Count is combined into matrix with position in Matrix corresponding to + # position in system + for z in unique.astype("int"): + a = z[0] + b = z[1] + c = z[2] + matbin[a][b][c] += counts[m] + m += 1 + h += 1000 + # The last few frames of the system are seperately calculated + x = numberlist[h - 1000] + y = numberlist[-1] + temp = np.array([[]]).reshape(0, 3) + for j in traj.pbc[x : y + 1]: + l = np.floor(j / edge).astype("int32") + temp = np.concatenate((temp, np.array(l))) + + unique, counts = np.unique(temp, return_counts=True, axis=0) + m = 0 + for z in unique.astype("int"): + a = z[0] + b = z[1] + c = z[2] + matbin[a][b][c] += counts[m] + m += 1 + return matbin + + +def calculate_maxima(matbin): + i = 0 + j = 0 + k = 0 + maxima = [] + + z = [-1, 0, 1] + + max_i = matbin.shape[0] + max_j = matbin.shape[1] + max_k = matbin.shape[2] + + # For each element in the matrix all sourrounding 26 elements are compared to + # determine the minimum. + # Algorithm can definitely improved but is so fast that its not necessary. + + while i < max_i: + while j < max_j: + while k < max_k: + a = matbin[i][j][k] + b = True + for l in z: + for m in z: + for n in z: + if (l != 0) or (m != 0) or (n != 0): + b = b and ( + a + > matbin[(i + l) % max_i][(j + m) % max_j][ + (k + n) % max_k + ] + ) + + if b: + maxima.append([i, j, k]) + + k += 1 + k = 0 + j += 1 + j = 0 + i += 1 + return maxima + + +def ListOfCoordinates(matbin): + # Matrix elements are translated back into postion vectors + max_i = matbin.shape[0] + max_j = matbin.shape[1] + max_k = matbin.shape[2] + + coord = np.zeros((max_i, max_j, max_k, 3)) + + i = 0 + j = 0 + k = 0 + while i < max_i: + while j < max_j: + while k < max_k: + coord[i][j][k] = [i, j, k] + k += 1 + k = 0 + j += 1 + j = 0 + i += 1 + return coord + + +def calculateTree(coord, box): + coordlist = np.reshape(coord, (-1, 3)) + tree = sp.cKDTree(coordlist, boxsize=box) + return tree + + +def SphereQuotient( + matbin, + maxima, + coordlist, + tree, + radius, + box, + edge, + unitdist, + z=np.array([-np.inf, np.inf]), +): + # Here pore center is assumed to be system center + diff = np.diag(box)[0:2] / 2 + + # Distance to the z-axis + distance = np.linalg.norm(np.array(maxima)[:, 0:2] * edge - diff, axis=1) + + # selection with given parameters + mask = ( + (distance > radius - 0.05) + & (distance < radius + 0.05) + & (np.array(maxima)[:, 2] * edge > z[0]) + & (np.array(maxima)[:, 2] * edge < z[1]) + ) + + maxima_masked = np.array(maxima)[mask] + + # Distances between maxima and other cubes in the system + coordlist = coordlist.reshape(-1, 3) + numOfNeigbour = tree.query_ball_point(maxima[0], unitdist, return_length=True) + d, neighbourlist = tree.query(maxima_masked, k=numOfNeigbour, workers=-1) + + i = 0 + + average = 0 + + y = [] + num = [] + for neighbours in neighbourlist: + current = maxima_masked[i] + + # energy between minimum and all sourrounding cubes is calculated + energy = -np.log( + matbin[current[0], current[1], current[2]] + / matbin.flatten()[neighbours[1:]] + ) + + v, b = np.histogram( + d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], + bins=np.arange(1, 40.5, 0.5), + weights=energy[(energy < np.Inf) & (energy > -np.Inf)], + ) + + # For averaging purposes number weighted is also the calculated + k, l = np.histogram( + d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], + bins=np.arange(1, 40.5, 0.5), + ) + y.append(v) + num.append(k) + i += 1 + + # energy is averaged over minima + quotient = np.sum(y, axis=0) / np.sum(num, axis=0) + if len(neighbourlist) == 0: + return np.arange(1, 40.5, 0.5) * edge, np.arange(1, 40, 0.5) * 0 + return b * edge, quotient + + +def SphereQuotientSlab( + matbin, maxima, coordlist, tree, radius, box, edge, unitdist, z=[-np.inf, np.inf] +): + # Same as SphereQuotient bur for Slabs + diff = box[2, 2] / 2 + distance = abs(np.array(maxima)[:, 2] * edge - diff) + mask = ( + (distance > radius - 0.05) + & (distance < radius + 0.05) + & (np.array(maxima)[:, 2] > z[0]) + & (np.array(maxima)[:, 2] < z[1]) + ) + + maxima_masked = np.array(maxima)[mask] + + coordlist = coordlist.reshape(-1, 3) + numOfNeigbour = tree.query_ball_point(maxima[0], unitdist, return_length=True) + d, neighbourlist = tree.query(maxima_masked, k=numOfNeigbour, workers=-1) + + i = 0 + + average = 0 + + y = [] + num = [] + for neighbours in neighbourlist: + current = maxima_masked[i] + + energy = -np.log( + matbin[current[0], current[1], current[2]] + / matbin.flatten()[neighbours[1:]] + ) + + # Energy is ordered according to distance to the minimum + v, z = np.histogram( + d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], + bins=unitdist * 2 - 2, + weights=energy[(energy < np.Inf) & (energy > -np.Inf)], + ) + + k, l = np.histogram( + d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], + bins=unitdist * 2 - 2, + ) + + y.append(v) + + num.append(k) + i += 1 + + quotient = np.sum(y, axis=0) / np.sum(num, axis=0) + if len(neighbourlist) == 0: + return np.arange(1, 40.5, 0.5), np.arange(1, 40, 0.5) * 0 + return z * edge, quotient + + +def shortAllRadii( + traj, path, edge=0.05, radiusmin=0.05, radiusmax=2.05, z=[-np.inf, np.inf] +): + # Shorthand function cylindrical systems + matbin = fill_bins(traj, path, edge) + maxima = calculate_maxima(matbin) + coordinates = ListOfCoordinates(matbin) + tree = calculateTree(coordinates, box=matbin.shape) + bins = [] + data = [] + for radius in np.arange(radiusmin, radiusmax, 0.1): + b, d = SphereQuotient( + matbin, + maxima, + coordinates, + tree, + radius=radius, + box=traj[0].box, + edge=edge, + unitdist=40, + z=z, + ) + bins.append(b) + data.append(d) + return bins, data + + +def shortAllRadiiSlab( + traj, path, edge=0.05, radiusmin=0.05, radiusmax=2.05, z=[-np.inf, np.inf] +): + # Shorthand function for Slab systems + matbin = fill_bins(traj, path, edge) + maxima = calculate_maxima(matbin) + coordinates = ListOfCoordinates(matbin) + tree = calculateTree(coordinates, box=matbin.shape) + bins = [] + data = [] + for radius in np.arange(radiusmin, radiusmax, 0.1): + c, d = SphereQuotientSlab( + matbin, + maxima, + coordinates, + tree, + radius=radius, + box=traj[0].box, + edge=edge, + unitdist=40, + z=z, + ) + bins.append(c) + data.append(d) + return bins, data + + +def CalculateEnergyDifference(data, bins, temperature): + """ + Calculates the energy difference between local energy minimum and the + minimum in the center + """ + difference = [] + i = 0 + while i < len(data): + # + q = ( + GetMinimum(data[0], bins[0])[1] - GetMinimum(data[i], bins[0])[1] + ) * temperature + difference.append(q) + i += 1 + return difference + + +def Chunksplit(list_a, chunk_size): + for i in range(0, len(list_a), chunk_size): + yield list_a[i : i + chunk_size] + + +def GetMinimum(data, bins): + # Fits a polynom of order 3 to determine the energy minimum and analytically + # calculates the minimum location + y = data[:10] + x = bins[1:11] + + popt, pcov = scipy.optimize.curve_fit(Pol3, x, y, maxfev=80000) + + a, b = SolveQuadratic(3 * popt[0], 2 * popt[1], popt[2]) + + if 6 * popt[0] * a + 2 * popt[1] > 0: + if (np.real(a) < 0) or (np.real(a) > 0.3): + return np.real(a), np.average(y) + return np.real(a), np.real(Pol3(a, *popt)) + else: + if (np.real(b) < 0) or (np.real(b) > 0.3): + return np.real(b), np.average(y) + return np.real(b), np.real(Pol3(b, *popt)) + + +def SolveQuadratic(a, b, c): + d = (b**2) - (4 * a * c) + + sol1 = (-b - cmath.sqrt(d)) / (2 * a) + sol2 = (-b + cmath.sqrt(d)) / (2 * a) + return sol1, sol2 + + +def Pol3(x, a, b, c, d): + return a * x**3 + b * x**2 + c * x + d