diff --git a/src/mdevaluate/extra/free_energy_landscape.py b/src/mdevaluate/extra/free_energy_landscape.py index 94a595a..e9da30e 100644 --- a/src/mdevaluate/extra/free_energy_landscape.py +++ b/src/mdevaluate/extra/free_energy_landscape.py @@ -57,15 +57,15 @@ def occupation_matrix( def _calc_histogram( - numberlist: ArrayLike, trajectory: Coordinates, bins: ArrayLike + indices: ArrayLike, trajectory: Coordinates, bins: ArrayLike ) -> NDArray: matbin = None - for index in range(0, len(numberlist), 1000): + for index in range(0, len(indices), 1000): try: - indices = numberlist[index : index + 1000] + current_indices = indices[index : index + 1000] except IndexError: - indices = numberlist[index:] - frames = np.concatenate(np.array([trajectory.pbc[i] for i in indices])) + current_indices = indices[index:] + frames = np.concatenate(np.array([trajectory.pbc[i] for i in current_indices])) hist, _ = np.histogramdd(frames, bins=bins) if matbin is None: matbin = hist @@ -202,436 +202,6 @@ def find_energy_maxima( x = np.array(data_d["r"]) y = np.array(data_d["energy"]) mask = (x >= r_min) * (x <= r_max) - print(x[mask], y[mask]) p3 = Poly.fit(x[mask], y[mask], deg=2) - print(p3) energies.append(np.max(p3(np.linspace(r_min, r_max, 1000)))) return pd.DataFrame({"d": distances, "energy": energies}) - - -# DEPRECATED CODE STARTING FROM HERE -# LEFT FOR TESTING ONLY -VALID_GEOMETRY = {"cylindrical", "slab"} - - -def get_fel( - traj, - path, - geometry, - temperature, - edge=0.05, - radiusmin=0.05, - radiusmax=2.05, - z=[-np.inf, np.inf], - overwrite=False, -): - """ - The main method of this script. Will calculate the energy difference based on radius. - - This method will calculate the relative energy of different minima based on radius - to the pore center. - After that it will save those results in a .npy file in the filepath give by the - "path" parameter and will also try to load from there. - - - Parameters: - traj : The trajectory of the system to be evaluated - path : The save and load location of the files - geometry : Either "cylindrical" or "slab". The geometry of your system. Other types - currently not supported - temperature: The temperature of your system. Needed for the energy difference - edge (opt.) : The length of the cubes in which your system will be divided - radiusmin (opt.) : The radius where the calculation begins. Will create a bin of - +- 0.05 of that number. - radiusmax (opt.) : The radius where the calculation ends. Will create a bin of - +- 0.05 of that number. - z (opt.) : The evaluated slice of the trajectory for the energy landscape. - - Returns: - list: A list of the energy difference based on radius - - """ - if geometry not in VALID_GEOMETRY: - raise ValueError("results: status must be one of %r." % VALID_GEOMETRY) - - if (os.path.exists(f"{path}/radiiData.npy")) & (not overwrite): - data = np.load(f"{path}/radiiData.npy") - bins = np.load(f"{path}/radiiBins.npy") - # Here the different standard geometries are inserted - else: - if geometry == "cylindrical": - bins, data = short_all_radii( - traj, path, edge=edge, radiusmin=radiusmin, radiusmax=radiusmax, z=z - ) - elif geometry == "slab": - bins, data = short_all_radii_slab( - traj, path, edge=edge, radiusmin=radiusmin, radiusmax=radiusmax, z=z - ) - np.save(f"{path}/radiiData", data) - np.save(f"{path}/radiiBins", bins) - - energy_differences = np.array(calculate_energy_difference(data, bins, temperature)) - d = np.linspace(radiusmin, radiusmax, len(energy_differences)) - return d, energy_differences - - -def fill_bins(traj, path, edge=0.05): - # If available Matrix is directly loaded - if os.path.exists(f"{path}/Matrix{edge}.npy"): - matbin = np.load(f"{path}/Matrix{edge}.npy") - return matbin - - pool = mp.Pool(8) - size = math.ceil(len(traj) / 8) - - # Trajectory is split for parallel computing - indices = list(chunksplit(np.arange(0, len(traj), 1), size)) - # indices = list(Chunksplit(np.arange(len(traj)-80, len(traj), 1), size)) - fill = partial(help_fill, traj=traj) - results = pool.map(fill, indices) - boxmat = traj[0].box - - a = math.ceil(boxmat[0][0] / 0.05) - b = math.ceil(boxmat[1][1] / 0.05) - c = math.ceil(boxmat[2][2] / 0.05) - matbin = np.zeros((a, b, c)) - - pool.close() - - for mat in results: - matbin = matbin + mat - np.save(file=f"{path}/Matrix{edge}", arr=matbin) - return matbin - - -def help_fill(numberlist, traj, edge=0.05): - boxmat = traj[0].box - - a = math.ceil(boxmat[0][0] / edge) - b = math.ceil(boxmat[1][1] / edge) - c = math.ceil(boxmat[2][2] / edge) - matbin = np.zeros((a, b, c)) - temp = np.array([[]]).reshape(0, 3) - - # Trajectory is split in chunks of 1000 frames to increase efficency while - # keeping ram usage low - h = 1000 - while h < len(numberlist): - temp = np.array([[]]).reshape(0, 3) - x = numberlist[h - 1000] - y = numberlist[h] - for j in traj.pbc[x:y]: - l = np.floor(j / edge).astype("int32") - temp = np.concatenate((temp, np.array(l))) - # Positions are counted for whole chunk - unique, counts = np.unique(temp, return_counts=True, axis=0) - m = 0 - # Count is combined into matrix with position in Matrix corresponding to - # position in system - for z in unique.astype("int"): - a = z[0] - b = z[1] - c = z[2] - matbin[a][b][c] += counts[m] - m += 1 - h += 1000 - # The last few frames of the system are seperately calculated - x = numberlist[h - 1000] - y = numberlist[-1] - temp = np.array([[]]).reshape(0, 3) - for j in traj.pbc[x : y + 1]: - l = np.floor(j / edge).astype("int32") - temp = np.concatenate((temp, np.array(l))) - - unique, counts = np.unique(temp, return_counts=True, axis=0) - m = 0 - for z in unique.astype("int"): - a = z[0] - b = z[1] - c = z[2] - matbin[a][b][c] += counts[m] - m += 1 - return matbin - - -def calculate_maxima(matbin): - maxima = [] - - z = [-1, 0, 1] - - max_i = matbin.shape[0] - max_j = matbin.shape[1] - max_k = matbin.shape[2] - - # For each element in the matrix all surrounding 26 elements are compared to - # determine the minimum. - # Algorithm can definitely be improved but is so fast that it's not necessary. - - for i in range(max_i): - for j in range(max_j): - for k in range(max_k): - a = matbin[i][j][k] - b = True - for l in z: - for m in z: - for n in z: - if (l != 0) or (m != 0) or (n != 0): - b = b and ( - a - > matbin[(i + l) % max_i][(j + m) % max_j][ - (k + n) % max_k - ] - ) - - if b: - maxima.append([i, j, k]) - return maxima - - -def list_of_coordinates(matbin): - # Matrix elements are translated back into postion vectors - max_i = matbin.shape[0] - max_j = matbin.shape[1] - max_k = matbin.shape[2] - - coord = np.zeros((max_i, max_j, max_k, 3)) - - for i in range(max_i): - for j in range(max_j): - for k in range(max_k): - coord[i][j][k] = [i, j, k] - - return coord - - -def sphere_quotient( - matbin, - maxima, - coordlist, - tree, - radius, - box, - edge, - unitdist, - z=np.array([-np.inf, np.inf]), -): - # Here pore center is assumed to be system center - diff = np.diag(box)[0:2] / 2 - - # Distance to the z-axis - distance = np.linalg.norm(np.array(maxima)[:, 0:2] * edge - diff, axis=1) - - # selection with given parameters - mask = ( - (distance > radius - 0.05) - & (distance < radius + 0.05) - & (np.array(maxima)[:, 2] * edge > z[0]) - & (np.array(maxima)[:, 2] * edge < z[1]) - ) - - maxima_masked = np.array(maxima)[mask] - - # Distances between maxima and other cubes in the system - coordlist = coordlist.reshape(-1, 3) - numOfNeigbour = np.max(tree.query_ball_point(maxima, unitdist, return_length=True)) - d, neighbourlist = tree.query(maxima_masked, k=numOfNeigbour, workers=-1) - - i = 0 - - average = 0 - - y = [] - num = [] - for neighbours in neighbourlist: - current = maxima_masked[i] - - # energy between minimum and all sourrounding cubes is calculated - energy = -np.log( - matbin[current[0], current[1], current[2]] - / matbin.flatten()[neighbours[1:]] - ) - - v, b = np.histogram( - d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], - bins=np.arange(1, 40.5, 0.5), - weights=energy[(energy < np.Inf) & (energy > -np.Inf)], - ) - - # For averaging purposes number weighted is also the calculated - k, l = np.histogram( - d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], - bins=np.arange(1, 40.5, 0.5), - ) - y.append(v) - num.append(k) - i += 1 - - # energy is averaged over minima - quotient = np.sum(y, axis=0) / np.sum(num, axis=0) - if len(neighbourlist) == 0: - return np.arange(1, 40.5, 0.5) * edge, np.arange(1, 40, 0.5) * 0 - return b * edge, quotient - - -def sphere_quotient_slab( - matbin, maxima, coordlist, tree, radius, box, edge, unitdist, z=[-np.inf, np.inf] -): - # Same as SphereQuotient bur for Slabs - diff = box[2, 2] / 2 - distance = abs(np.array(maxima)[:, 2] * edge - diff) - mask = ( - (distance > radius - 0.05) - & (distance < radius + 0.05) - & (np.array(maxima)[:, 2] > z[0]) - & (np.array(maxima)[:, 2] < z[1]) - ) - - maxima_masked = np.array(maxima)[mask] - - coordlist = coordlist.reshape(-1, 3) - numOfNeigbour = np.max(tree.query_ball_point(maxima, unitdist, return_length=True)) - d, neighbourlist = tree.query(maxima_masked, k=numOfNeigbour, workers=-1) - - i = 0 - - average = 0 - - y = [] - num = [] - for neighbours in neighbourlist: - current = maxima_masked[i] - - energy = -np.log( - matbin[current[0], current[1], current[2]] - / matbin.flatten()[neighbours[1:]] - ) - - # Energy is ordered according to distance to the minimum - v, z = np.histogram( - d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], - bins=unitdist * 2 - 2, - weights=energy[(energy < np.Inf) & (energy > -np.Inf)], - ) - - k, l = np.histogram( - d[i, 1:].flatten()[(energy < np.Inf) & (energy > -np.Inf)], - bins=unitdist * 2 - 2, - ) - - y.append(v) - - num.append(k) - i += 1 - - quotient = np.sum(y, axis=0) / np.sum(num, axis=0) - if len(neighbourlist) == 0: - return np.arange(1, 40.5, 0.5), np.arange(1, 40, 0.5) * 0 - return z * edge, quotient - - -def short_all_radii( - traj, path, edge=0.05, radiusmin=0.05, radiusmax=2.05, z=[-np.inf, np.inf] -): - # Shorthand function cylindrical systems - matbin = fill_bins(traj, path, edge) - maxima = calculate_maxima(matbin) - coordinates = list_of_coordinates(matbin) - tree = KDTree(np.reshape(coordinates, (-1, 3)), boxsize=matbin.shape) - bins = [] - data = [] - for radius in np.arange(radiusmin, radiusmax, 0.1): - b, d = sphere_quotient( - matbin, - maxima, - coordinates, - tree, - radius=radius, - box=traj[0].box, - edge=edge, - unitdist=40, - z=z, - ) - bins.append(b) - data.append(d) - return bins, data - - -def short_all_radii_slab( - traj, path, edge=0.05, radiusmin=0.05, radiusmax=2.05, z=[-np.inf, np.inf] -): - # Shorthand function for Slab systems - matbin = fill_bins(traj, path, edge) - maxima = calculate_maxima(matbin) - coordinates = list_of_coordinates(matbin) - tree = KDTree(np.reshape(coordinates, (-1, 3)), boxsize=matbin.shape) - bins = [] - data = [] - for radius in np.arange(radiusmin, radiusmax, 0.1): - c, d = sphere_quotient_slab( - matbin, - maxima, - coordinates, - tree, - radius=radius, - box=traj[0].box, - edge=edge, - unitdist=40, - z=z, - ) - bins.append(c) - data.append(d) - return bins, data - - -def calculate_energy_difference(data, bins, temperature): - """ - Calculates the energy difference between local energy minimum and the - minimum in the center - """ - difference = [] - i = 0 - while i < len(data): - # - q = ( - get_minimum(data[0], bins[0])[1] - get_minimum(data[i], bins[0])[1] - ) * temperature - difference.append(q) - i += 1 - return difference - - -def chunksplit(list_a, chunk_size): - for i in range(0, len(list_a), chunk_size): - yield list_a[i : i + chunk_size] - - -def get_minimum(data, bins): - # Fits a polynom of order 3 to determine the energy minimum and analytically - # calculates the minimum location - y = data[:10] - x = bins[1:11] - - popt, _ = scipy.optimize.curve_fit(pol3, x, y, maxfev=80000) - - a, b = solve_quadratic(3 * popt[0], 2 * popt[1], popt[2]) - - if 6 * popt[0] * a + 2 * popt[1] > 0: - if (np.real(a) < 0) or (np.real(a) > 0.3): - return np.real(a), np.average(y) - return np.real(a), np.real(pol3(a, *popt)) - else: - if (np.real(b) < 0) or (np.real(b) > 0.3): - return np.real(b), np.average(y) - return np.real(b), np.real(pol3(b, *popt)) - - -def solve_quadratic(a, b, c): - d = (b**2) - (4 * a * c) - - sol1 = (-b - cmath.sqrt(d)) / (2 * a) - sol2 = (-b + cmath.sqrt(d)) / (2 * a) - return sol1, sol2 - - -def pol3(x, a, b, c, d): - return a * x**3 + b * x**2 + c * x + d