Initial version

This commit is contained in:
sebastiankloth
2022-04-20 14:08:38 +02:00
commit 68b8e1a305
48 changed files with 5133 additions and 0 deletions

75
mdevaluate/__init__.py Normal file
View File

@ -0,0 +1,75 @@
import os
from glob import glob
from . import atoms
from . import coordinates
from . import correlation
from . import distribution
from . import functions
from . import pbc
from . import autosave
from . import reader
from .logging import logger
__version__ = '2022.1.dev1'
def open(directory='', topology='*.tpr', trajectory='*.xtc', cached=False,
nojump=False):
"""
Open a simulation from a directory.
Args:
directory: Directory of the simulation.
topology (opt.):
Descriptor of the topology file (tpr or gro). By default a tpr file is
used, if there is exactly one in the directoy.
trajectory (opt.): Descriptor of the trajectory (xtc file).
cached (opt.):
If the trajectory reader should be cached. Can be True, an integer or None.
If this is True maxsize is 128, otherwise this is used as maxsize for
the cache, None means infinite cache (this is a potential memory leak!).
nojump (opt.): If nojump matrixes should be generated. They will alwyas be loaded if present
Returns:
A Coordinate object of the simulation.
Example:
Open a simulation located in '/path/to/sim', where the trajectory is
located in a sub-directory '/path/to/sim/out' and named for Example
'nojump_traj.xtc'. All read frames will be cached in memory.
>>> open('/path/to/sim', trajectory='out/nojump*.xtc', cached=None)
The file descriptors can use unix style pathname expansion to define the filenames.
The default patterns use the recursive placeholder `**` which matches the base or
any subdirctory, thus files in subdirectories with matching file type will be found too.
For example: 'out/nojump*.xtc' would match xtc files in a subdirectory `out` that
start with `nojump` and end with `.xtc`.
For more details see: https://docs.python.org/3/library/glob.html
"""
top_glob = glob(os.path.join(directory, topology), recursive=True)
if top_glob is not None and len(top_glob) == 1:
top_file, = top_glob
logger.info('Loading topology: {}'.format(top_file))
else:
raise FileNotFoundError('Topology file could not be identified.')
traj_glob = glob(os.path.join(directory, trajectory), recursive=True)
if traj_glob is not None and len(traj_glob) == 1:
traj_file = traj_glob[0]
logger.info('Loading trajectory: {}'.format(traj_file))
else:
raise FileNotFoundError('Trajectory file could not be identified.')
atom_set, frames = reader.open_with_mdanalysis(
top_file, traj_file, cached=cached
)
coords = coordinates.Coordinates(frames, atom_subset=atom_set)
if nojump:
try:
frames.nojump_matrixes
except reader.NojumpError:
reader.generate_nojump_matrixes(coords)
return coords

270
mdevaluate/atoms.py Normal file
View File

@ -0,0 +1,270 @@
import re
from scipy.spatial.distance import cdist
from .pbc import pbc_diff
from .checksum import checksum
import numpy as np
import scipy
if scipy.version.version >= '0.17.0':
from scipy.spatial import cKDTree as KDTree
else:
from scipy.spatial import KDTree
def compare_regex(list, exp):
"""
Compare a list of strings with a regular expression.
"""
if not exp.endswith('$'):
exp += '$'
regex = re.compile(exp)
return np.array([regex.match(s) is not None for s in list])
class Atoms:
"""
Basic container class for atom information.
Args:
atoms: N tuples of residue id, residue name and atom name.
indices (optional): Dictionary of named atom index groups.
Attributes:
residue_ids: Indices of the atoms residues
residue_names: Names of the atoms residues
atom_names: Names of the atoms
indices: Dictionary of named atom index groups, if specified
"""
def __init__(self, atoms, indices=None, masses=None, charges=None, reader=None):
self.residue_ids, self.residue_names, self.atom_names = atoms.T
self.residue_ids = np.array([int(m) for m in self.residue_ids])
self.indices = indices
self.masses = masses
self.charges = charges
self.reader = reader
def subset(self, *args, **kwargs):
"""
Return a subset of these atoms with all atoms selected.
All arguments are passed to the :meth:`AtomSubset.subset` method directly.
"""
return AtomSubset(self).subset(*args, **kwargs)
def __len__(self):
return len(self.atom_names)
class AtomMismatch(Exception):
pass
class AtomSubset:
def __init__(self, atoms, selection=None, description=''):
"""
Args:
atoms: Base atom object
selection (opt.): Selected atoms
description (opt.): Descriptive string of the subset.
"""
if selection is None:
selection = np.ones(len(atoms), dtype='bool')
self.selection = selection
self.atoms = atoms
self.description = description
def subset(self, atom_name=None, residue_name=None, residue_id=None, indices=None):
"""
Return a subset of the system. The selection is specified by one or more of
the keyworss below. Names are matched as a regular expression with `re.match`.
Args:
atom_name: Specification of the atom name
residue_name: Specification of the resiude name
residue_id: Residue ID or list of IDs
indices: List of atom indices
"""
new_subset = self
if atom_name is not None:
new_subset &= AtomSubset(
self.atoms,
selection=compare_regex(self.atoms.atom_names, atom_name),
description=atom_name
)
if residue_name is not None:
new_subset &= AtomSubset(
self.atoms,
selection=compare_regex(self.atoms.residue_names, residue_name),
description=residue_name
)
if residue_id is not None:
if np.iterable(residue_id):
selection = np.zeros(len(self.selection), dtype='bool')
selection[np.in1d(self.atoms.residue_ids, residue_id)] = True
new_subset &= AtomSubset(self.atoms, selection)
else:
new_subset &= AtomSubset(self.atoms, self.atoms.residue_ids == residue_id)
if indices is not None:
selection = np.zeros(len(self.selection), dtype='bool')
selection[indices] = True
new_subset &= AtomSubset(self.atoms, selection)
return new_subset
@property
def atom_names(self):
return self.atoms.atom_names[self.selection]
@property
def residue_names(self):
return self.atoms.residue_names[self.selection]
@property
def residue_ids(self):
return self.atoms.residue_ids[self.selection]
@property
def indices(self):
return np.where(self.selection)
def __getitem__(self, slice):
if isinstance(slice, str):
indices = self.atoms.indices[slice]
return self.atoms.subset()[indices] & self
return self.subset(indices=self.indices[0].__getitem__(slice))
def __and__(self, other):
if self.atoms != other.atoms:
raise AtomMismatch
selection = (self.selection & other.selection)
description = '{}_{}'.format(self.description, other.description).strip('_')
return AtomSubset(self.atoms, selection, description)
def __or__(self, other):
if self.atoms != other.atoms:
raise AtomMismatch
selection = (self.selection | other.selection)
description = '{}_{}'.format(self.description, other.description).strip('_')
return AtomSubset(self.atoms, selection, description)
def __invert__(self):
selection = ~self.selection
return AtomSubset(self.atoms, selection, self.description)
def __repr__(self):
return 'Subset of Atoms ({} of {})'.format(len(self.atoms.residue_names[self.selection]),
len(self.atoms))
@property
def summary(self):
return "\n".join(["{}{} {}".format(resid, resname, atom_names)
for resid, resname, atom_names in zip(self.residue_ids, self.residue_names, self.atom_names)
])
def __checksum__(self):
return checksum(self.description)
def center_of_mass(position, mass=None):
if mass is not None:
return 1 / mass.sum() * (mass * position).sum(axis=0)
else:
return 1 / len(position) * position.sum(axis=0)
def gyration_radius(position):
r"""
Calculates a list of all radii of gyration of all molecules given in the coordinate frame,
weighted with the masses of the individual atoms.
Args:
position: Coordinate frame object
..math::
R_G = \left(\frac{\sum_{i=1}^{n} m_i |\vec{r_i} - \vec{r_{COM}}|^2 }{\sum_{i=1}^{n} m_i }
\rigth)^{\frac{1}{2}}
"""
gyration_radii = np.array([])
for resid in np.unique(position.residue_ids):
pos = position.whole[position.residue_ids==resid]
mass = position.masses[position.residue_ids==resid][:,np.newaxis]
COM = center_of_mass(pos,mass)
r_sq = ((pbc_diff(pos,COM,pos.box.diagonal()))**2).sum(1)[:,np.newaxis]
g_radius = ((r_sq*mass).sum()/mass.sum())**(0.5)
gyration_radii = np.append(gyration_radii,g_radius)
return gyration_radii
def layer_of_atoms(atoms,
thickness,
plane_offset=np.array([0, 0, 0]),
plane_normal=np.array([1, 0, 0])):
p_ = atoms - plane_offset
distance = np.dot(p_, plane_normal)
return abs(distance) <= thickness
def distance_to_atoms(ref, atoms, box=None):
"""Get the minimal distance from atoms to ref.
The result is an array of with length == len(atoms)
"""
out = np.empty(atoms.shape[0])
for i, atom in enumerate(atoms):
diff = (pbc_diff(atom, ref, box) ** 2).sum(axis=1).min()
out[i] = np.sqrt(diff)
return out
def distance_to_atoms_cKDtree(ref, atoms, box=None, thickness=None):
"""
Get the minimal distance from atoms to ref.
The result is an array of with length == len(atoms)
Can be faster than distance_to_atoms.
Thickness defaults to box/5. If this is too small results may be wrong.
If box is not given then periodic boundary conditions are not applied!
"""
if thickness == None:
thickness = box/5
if box is not None:
start_coords = np.copy(atoms)%box
all_frame_coords = pbc_points(ref, box, thickness = thickness)
else:
start_coords = atoms
all_frame_coords = ref
tree = spatial.cKDTree(all_frame_coords)
return tree.query(start_coords)[0]
def next_neighbors(atoms, query_atoms=None, number_of_neighbors=1, distance_upper_bound=np.inf, distinct=False):
"""
Find the N next neighbors of a set of atoms.
Args:
atoms: The reference atoms and also the atoms which are queried if `query_atoms` is net provided
query_atoms (opt.): If this is not None, these atoms will be queried
number_of_neighbors (int, opt.): Number of neighboring atoms to find
distance_upper_bound (float, opt.): Upper bound of the distance between neighbors
distinct (bool, opt.): If this is true, the atoms and query atoms are taken as distinct sets of atoms
"""
tree = KDTree(atoms)
dnn = 0
if query_atoms is None:
query_atoms = atoms
elif not distinct:
dnn = 1
dist, indices = tree.query(query_atoms, number_of_neighbors + dnn,
distance_upper_bound=distance_upper_bound)
return indices[:, dnn:]

183
mdevaluate/autosave.py Normal file
View File

@ -0,0 +1,183 @@
import os
import numpy as np
import functools
import inspect
from .checksum import checksum
from .logging import logger
autosave_directory = None
load_autosave_data = False
verbose_print = True
user_autosave_directory = os.path.join(os.environ['HOME'], '.mdevaluate/autosave')
def notify(msg):
if verbose_print:
logger.info(msg)
else:
logger.debug(msg)
def enable(dir, load_data=True, verbose=True):
"""
Enable auto saving results of functions decorated with :func:`autosave_data`.
Args:
dir: Directory where the data should be saved.
load_data (opt., bool): If data should also be loaded.
"""
global autosave_directory, load_autosave_data, verbose_print
verbose_print = verbose
# absolute = os.path.abspath(dir)
# os.makedirs(absolute, exist_ok=True)
autosave_directory = dir
load_autosave_data = load_data
notify('Enabled autosave in directory: {}'.format(autosave_directory))
def disable():
"""Disable autosave."""
global autosave_directory, load_autosave_data
autosave_directory = None
load_autosave_data = False
class disabled:
"""
A context manager that disbales the autosave module within its context.
Example:
import mdevaluate as md
md.autosave.enable('data')
with md.autosave.disabled():
# Autosave functionality is disabled within this context.
md.correlation.shifted_correlation(
...
)
# After the context is exited, autosave will work as before.
"""
def __enter__(self):
self._autosave_directory = autosave_directory
disable()
def __exit__(self, *args):
enable(self._autosave_directory)
def get_directory(reader):
"""Get the autosave directory for a trajectory reader."""
outdir = os.path.dirname(reader.filename)
savedir = os.path.join(outdir, autosave_directory)
if not os.path.exists(savedir):
try:
os.makedirs(savedir)
except PermissionError:
pass
if not os.access(savedir, os.W_OK):
savedir = os.path.join(user_autosave_directory, savedir.lstrip('/'))
logger.info('Switched autosave directory to {}, since original location is not writeable.'.format(savedir))
os.makedirs(savedir, exist_ok=True)
return savedir
def get_filename(function, checksum, description, *args):
"""Get the autosave filename for a specific function call."""
func_desc = function.__name__
for arg in args:
if hasattr(arg, '__name__'):
func_desc += '_{}'.format(arg.__name__)
elif isinstance(arg, functools.partial):
func_desc += '_{}'.format(arg.func.__name__)
if hasattr(arg, 'frames'):
savedir = get_directory(arg.frames)
if hasattr(arg, 'description') and arg.description != '':
description += '_{}'.format(arg.description)
filename = '{}_{}.npz'.format(func_desc.strip('_'), description.strip('_'))
return os.path.join(savedir, filename)
def verify_file(filename, checksum):
"""Verify if the file matches the function call."""
file_checksum = 0
if os.path.exists(filename):
data = np.load(filename)
if 'checksum' in data:
file_checksum = data['checksum']
return file_checksum == checksum
def save_data(filename, checksum, data):
"""Save data and checksum to a file."""
notify('Saving result to file: {}'.format(filename))
try:
data = np.array(data)
except ValueError:
arr = np.empty((len(data),), dtype=object)
arr[:] = data
data = arr
np.savez(filename, checksum=checksum, data=data)
def load_data(filename):
"""Load data from a npz file."""
notify('Loading result from file: {}'.format(filename))
fdata = np.load(filename)
if 'data' in fdata:
return fdata['data']
else:
data = tuple(fdata[k] for k in sorted(fdata) if ('arr' in k))
save_data(filename, fdata['checksum'], data)
return data
def autosave_data(nargs, kwargs_keys=None, version=None):
"""
Enable autosaving of results for a function.
Args:
nargs: Number of args which are relevant for the calculation.
kwargs_keys (opt.): List of keyword arguments which are relevant for the calculation.
version (opt.):
An optional version number of the decorated function, which replaces the checksum of
the function code, hence the checksum does not depend on the function code.
"""
def decorator_function(function):
# make sure too include names of positional arguments in kwargs_keys,
# sice otherwise they will be ignored if passed via keyword.
# nonlocal kwargs_keys
posargs_keys = list(inspect.signature(function).parameters)[:nargs]
@functools.wraps(function)
def autosave(*args, **kwargs):
description = kwargs.pop('description', '')
autoload = kwargs.pop('autoload', True) and load_autosave_data
if autosave_directory is not None:
relevant_args = list(args[:nargs])
if kwargs_keys is not None:
for key in [*posargs_keys, *kwargs_keys]:
if key in kwargs:
relevant_args.append(kwargs[key])
if version is None:
csum = legacy_csum = checksum(function, *relevant_args)
else:
csum = checksum(version, *relevant_args)
legacy_csum = checksum(function, *relevant_args)
filename = get_filename(function, csum, description, *relevant_args)
if autoload and (verify_file(filename, csum) or verify_file(filename, legacy_csum)):
result = load_data(filename)
else:
result = function(*args, **kwargs)
save_data(filename, csum, result)
else:
result = function(*args, **kwargs)
return result
return autosave
return decorator_function

91
mdevaluate/checksum.py Executable file
View File

@ -0,0 +1,91 @@
import functools
import hashlib
from .logging import logger
from types import ModuleType, FunctionType
import inspect
import numpy as np
# This variable is used within the checksum function to salt the sha1 sum.
# May be changed to force a different checksum for similar objects.
SALT = 42
def version(version_nr, calls=[]):
"""Function decorator that assigns a custom checksum to a function."""
def decorator(func):
cs = checksum(func.__name__, version_nr, *calls)
func.__checksum__ = lambda: cs
@functools.wraps(func)
def wrapped(*args, **kwargs):
return func(*args, **kwargs)
return wrapped
return decorator
def strip_comments(s):
"""Strips comment lines and docstring from Python source string."""
o = ''
in_docstring = False
for l in s.split('\n'):
if l.strip().startswith(('#', '"', "'")) or in_docstring:
in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1
continue
o += l + '\n'
return o
def checksum(*args, csum=None):
"""
Calculate a checksum of any object, by sha1 hash.
Input for the hash are some salt bytes and the byte encoding of a string
that depends on the object and its type:
- If a method __checksum__ is available, it's return value is converted to bytes
- str or bytes are used as sha1 input directly
- modules use the __name__ attribute
- functions use the function code and any closures of the function
- functools.partial uses the checksum of the function and any arguments, that were defined
- numpy.ndarray uses bytes representation of the array (arr.tobytes())
- Anything else is converted to a str
"""
if csum is None:
csum = hashlib.sha1()
csum.update(str(SALT).encode())
for arg in args:
if hasattr(arg, '__checksum__'):
logger.debug('Checksum via __checksum__: %s', str(arg))
csum.update(str(arg.__checksum__()).encode())
elif isinstance(arg, bytes):
csum.update(arg)
elif isinstance(arg, str):
csum.update(arg.encode())
elif isinstance(arg, ModuleType):
csum.update(arg.__name__.encode())
elif isinstance(arg, FunctionType):
csum.update(strip_comments(inspect.getsource(arg)).encode())
c = inspect.getclosurevars(arg)
for v in {**c.nonlocals, **c.globals}.values():
if v is not arg:
checksum(v, csum=csum)
elif isinstance(arg, functools.partial):
logger.debug('Checksum via partial for %s', str(arg))
checksum(arg.func, csum=csum)
for x in arg.args:
checksum(x, csum=csum)
for k in sorted(arg.keywords.keys()):
csum.update(k.encode())
checksum(arg.keywords[k], csum=csum)
elif isinstance(arg, np.ndarray):
csum.update(arg.tobytes())
else:
logger.debug('Checksum via str for %s', str(arg))
csum.update(str(arg).encode())
return int.from_bytes(csum.digest(), 'big')

35
mdevaluate/cli.py Normal file
View File

@ -0,0 +1,35 @@
import argparse
from . import logging
from . import open as md_open
def run(*args, **kwargs):
parser = argparse.ArgumentParser()
parser.add_argument(
'xtcfile',
help='The xtc file to index.',
)
parser.add_argument(
'--tpr',
help='The tprfile of the trajectory.',
dest='tpr', default=None
)
parser.add_argument(
'--nojump',
help='Generate Nojump Matrices, requires a tpr file.',
dest='nojump', action='store_true', default=False
)
parser.add_argument(
'--debug',
help='Set logging level to debug.',
dest='debug', action='store_true', default=False
)
args = parser.parse_args()
if args.debug:
logging.setlevel('DEBUG')
md_open('', trajectory=args.xtcfile, topology=args.tpr, nojump=args.nojump)
if __name__ == '__main__':
run()

563
mdevaluate/coordinates.py Executable file
View File

@ -0,0 +1,563 @@
from functools import partial, lru_cache, wraps
from copy import copy
from .logging import logger
import numpy as np
from scipy.spatial import cKDTree, KDTree
from .atoms import AtomSubset
from .pbc import whole, nojump, pbc_diff
from .utils import mask2indices, singledispatchmethod
from .checksum import checksum
class UnknownCoordinatesMode(Exception):
pass
def rotate_axis(coords, axis):
"""
Rotate a set of coordinates to a given axis.
"""
axis = np.array(axis) / np.linalg.norm(axis)
zaxis = np.array([0, 0, 1])
if (axis == zaxis).sum() == 3:
return coords
rotation_axis = np.cross(axis, zaxis)
rotation_axis = rotation_axis / np.linalg.norm(rotation_axis)
theta = np.arccos(axis @ zaxis / np.linalg.norm(axis))
# return theta/pi, rotation_axis
ux, uy, uz = rotation_axis
cross_matrix = np.array([
[0, -uz, uy],
[uz, 0, -ux],
[-uy, ux, 0]
])
rotation_matrix = np.cos(theta) * np.identity(len(axis)) \
+ (1 - np.cos(theta)) * rotation_axis.reshape(-1, 1) @ rotation_axis.reshape(1, -1) \
+ np.sin(theta) * cross_matrix
if len(coords.shape) == 2:
rotated = np.array([rotation_matrix @ xyz for xyz in coords])
else:
rotated = rotation_matrix @ coords
return rotated
def spherical_radius(frame, origin=None):
"""
Transform a frame of cartesian coordinates into the sperical radius.
If origin=None the center of the box is taken as the coordinates origin.
"""
if origin is None:
origin = frame.box.diagonal() / 2
return ((frame - origin)**2).sum(axis=-1)**0.5
def polar_coordinates(x, y):
"""Convert cartesian to polar coordinates."""
radius = (x**2 + y**2)**0.5
phi = np.arctan2(y, x)
return radius, phi
def spherical_coordinates(x, y, z):
"""Convert cartesian to spherical coordinates."""
xy, phi = polar_coordinates(x, y)
radius = (x**2 + y**2 + z**2)**0.5
theta = np.arccos(z / radius)
return radius, phi, theta
def radial_selector(frame, coordinates, rmin, rmax):
"""
Return a selection of all atoms with radius in the interval [rmin, rmax].
"""
crd = coordinates[frame.step]
rad, _ = polar_coordinates(crd[:, 0], crd[:, 1])
selector = (rad >= rmin) & (rad <= rmax)
return mask2indices(selector)
def spatial_selector(frame, transform, rmin, rmax):
"""
Select a subset of atoms which have a radius between rmin and rmax.
Coordinates are filtered by the condition::
rmin <= transform(frame) <= rmax
Args:
frame: The coordinates of the actual trajectory
transform:
A function that transforms the coordinates of the frames into
the one-dimensional spatial coordinate (e.g. radius).
rmin: Minimum value of the radius
rmax: Maximum value of the radius
"""
r = transform(frame)
selector = (rmin <= r) & (rmax >= r)
return mask2indices(selector)
class CoordinateFrame(np.ndarray):
_known_modes = ('pbc', 'whole', 'nojump')
@property
def box(self):
return np.array(self.coordinates.frames[self.step].triclinic_dimensions)
@property
def volume(self):
return self.box.diagonal().cumprod()[-1]
@property
def time(self):
return self.coordinates.frames[self.step].time
@property
def masses(self):
return self.coordinates.atoms.masses[self.coordinates.atom_subset.selection]
@property
def charges(self):
return self.coordinates.atoms.charges[self.coordinates.atom_subset.selection]
@property
def residue_ids(self):
return self.coordinates.atom_subset.residue_ids
@property
def residue_names(self):
return self.coordinates.atom_subset.residue_names
@property
def atom_names(self):
return self.coordinates.atom_subset.atom_names
@property
def indices(self):
return self.coordinates.atom_subset.indices
@property
def selection(self):
return self.coordinates.atom_subset.selection
@property
def whole(self):
frame = whole(self)
frame.mode = 'whole'
return frame
@property
def pbc(self):
frame = self % self.box.diagonal()
frame.mode = 'pbc'
return frame
@property
def nojump(self):
if self.mode != 'nojump':
if self.mode is not None:
logger.warn('Combining Nojump with other Coordinate modes is not supported and may cause unexpected results.')
frame = nojump(self)
frame.mode = 'nojump'
return frame
else:
return self
def __new__(subtype, shape, dtype=float, buffer=None, offset=0, strides=None, order=None,
coordinates=None, step=None, box=None, mode=None):
obj = np.ndarray.__new__(subtype, shape, dtype, buffer, offset, strides)
obj.coordinates = coordinates
obj.step = step
obj.mode = mode
return obj
def __array_finalize__(self, obj):
if obj is None:
return
self.coordinates = getattr(obj, 'coordinates', None)
self.step = getattr(obj, 'step', None)
self.mode = getattr(obj, 'mode', None)
if hasattr(obj, 'reference'):
self.reference = getattr(obj, 'reference')
class Coordinates:
"""
Coordinates represent trajectory data, which is used for evaluation functions.
Atoms may be selected by specifing a atom_subset or a atom_filter.
"""
def get_mode(self, mode):
if self.atom_subset is not None:
return Coordinates(frames=self.frames, atom_subset=self.atom_subset, mode=mode)[self._slice]
else:
return Coordinates(frames=self.frames, atom_filter=self.atom_filter, mode=mode)[self._slice]
@property
def pbc(self):
return self.get_mode('pbc')
@property
def whole(self):
return self.get_mode('whole')
@property
def nojump(self):
return self.get_mode('nojump')
@property
def mode(self):
return self._mode
@mode.setter
def mode(self, val):
if val in CoordinateFrame._known_modes:
logger.warn('Changing the Coordinates mode directly is deprecated. Use Coordinates.%s instead, which returns a copy.', val)
self._mode = val
else:
raise UnknownCoordinatesMode('No such mode: {}'.format(val))
def __init__(self, frames, atom_filter=None, atom_subset: AtomSubset=None, mode=None):
"""
Args:
frames: The trajectory reader
atom_filter (opt.): A mask which selects a subset of the system
atom_subset (opt.): A AtomSubset that selects a subset of the system
mode (opt.): PBC mode of the Coordinates, can be pbc, whole or nojump.
Note:
The caching in Coordinates is deprecated, use the CachedReader or the function open
from the reader module instead.
"""
self._mode = mode
self.frames = frames
self._slice = slice(None)
assert atom_filter is None or atom_subset is None, "Cannot use both: subset and filter"
if atom_filter is not None:
self.atom_filter = atom_filter
self.atom_subset = None
elif atom_subset is not None:
self.atom_filter = atom_subset.selection
self.atom_subset = atom_subset
self.atoms = atom_subset.atoms
else:
self.atom_filter = np.ones(shape=(len(frames[0].coordinates),), dtype=bool)
self.atom_subset = None
def get_frame(self, fnr):
"""Returns the fnr-th frame."""
try:
if self.atom_filter is not None:
frame = self.frames[fnr].positions[self.atom_filter].view(CoordinateFrame)
else:
frame = self.frames.__getitem__(fnr).positions.view(CoordinateFrame)
frame.coordinates = self
frame.step = fnr
if self.mode is not None:
frame = getattr(frame, self.mode)
except EOFError:
raise IndexError
return frame
def clear_cache(self):
"""Clears the frame cache, if it is enabled."""
if hasattr(self.get_frame, 'clear_cache'):
self.get_frame.clear_cache()
def __iter__(self):
for i in range(len(self))[self._slice]:
yield self[i]
@singledispatchmethod
def __getitem__(self, item):
return self.get_frame(item)
@__getitem__.register(slice)
def _(self, item):
sliced = copy(self)
sliced._slice = item
return sliced
def __len__(self):
return len(self.frames)
def __checksum__(self):
return checksum(self.frames, self.atom_filter, self._slice, self.mode)
def __repr__(self):
return "Coordinates <{}>: {}".format(self.frames.filename, self.atom_subset)
@wraps(AtomSubset.subset)
def subset(self, **kwargs):
return Coordinates(self.frames, atom_subset=self.atom_subset.subset(**kwargs), mode=self._mode)
@property
def description(self):
return self.atom_subset.description
@description.setter
def description(self, desc):
self.atom_subset.description = desc
class MeanCoordinates(Coordinates):
def __init__(self, frames, atom_filter=None, mean=1):
super().__init__(frames, atom_filter)
self.mean = mean
assert mean >= 1, "Mean must be positive"
def __getitem__(self, item):
frame = super().__getitem__(item)
for i in range(item + 1, item + self.mean):
frame += super().__getitem__(i)
return frame / self.mean
def len(self):
return len(super() - self.mean + 1)
class CoordinatesMap:
def __init__(self, coordinates, function):
self.coordinates = coordinates
self.frames = self.coordinates.frames
self.atom_subset = self.coordinates.atom_subset
self.function = function
if isinstance(function, partial):
self._description = self.function.func.__name__
else:
self._description = self.function.__name__
def __iter__(self):
for frame in self.coordinates:
step = frame.step
frame = self.function(frame)
if not isinstance(frame, CoordinateFrame):
frame = frame.view(CoordinateFrame)
frame.coordinates = self
frame.step = step
yield frame
def __getitem__(self, item):
if isinstance(item, slice):
return self.__class__(self.coordinates[item], self.function)
else:
frame = self.function(self.coordinates.__getitem__(item))
if not isinstance(frame, CoordinateFrame):
frame = frame.view(CoordinateFrame)
frame.coordinates = self
frame.step = item
return frame
def __len__(self):
return len(self.coordinates.frames)
def __checksum__(self):
return checksum(self.coordinates, self.function)
@wraps(Coordinates.subset)
def subset(self, **kwargs):
return CoordinatesMap(self.coordinates.subset(**kwargs), self.function)
@property
def description(self):
return '{}_{}'.format(self._description, self.coordinates.description)
@description.setter
def description(self, desc):
self._description = desc
@property
def nojump(self):
return CoordinatesMap(self.coordinates.nojump, self.function)
@property
def whole(self):
return CoordinatesMap(self.coordinates.whole, self.function)
@property
def pbc(self):
return CoordinatesMap(self.coordinates.pbc, self.function)
class CoordinatesFilter:
@property
def atom_subset(self):
pass
def __init__(self, coordinates, atom_filter):
self.coordinates = coordinates
self.atom_filter = atom_filter
def __getitem__(self, item):
if isinstance(item, slice):
sliced = copy(self)
sliced.coordinates = self.coordinates[item]
return sliced
else:
frame = self.coordinates[item]
return frame[self.atom_filter]
class CoordinatesKDTree:
"""
A KDTree of coordinates frames. The KDtrees are cached by a :func:`functools.lru_cache`.
Uses :class:`scipy.spatial.cKDTree` by default, since it's significantly faster.
Make sure to use scipy 0.17 or later or switch to the normal KDTree, since cKDTree has
a memory leak in earlier versions.
"""
def clear_cache(self):
"""Clear the LRU cache."""
self._get_tree_at_index.cache_clear()
@property
def cache_info(self):
"""Return info about the state of the cache."""
return self._get_tree_at_index.cache_info()
def _get_tree_at_index(self, index):
frame = self.frames[index]
return self.kdtree(frame[self.selector(frame)])
def __init__(self, frames, selector=None, boxsize=None, maxcache=128, ckdtree=True):
"""
Args:
frames: Trajectory of the simulation, can be Coordinates object or reader
selector: Selector function that selects a subset of each frame
maxcache: Maxsize of the :func:`~functools.lru_cache`
ckdtree: Use :class:`~scipy.spatial.cKDTree` or :class:`~scipy.spatial.KDTree` if False
"""
if selector is not None:
self.selector = selector
else:
self.selector = lambda x: slice(None)
self.frames = frames
self.kdtree = cKDTree if ckdtree else KDTree
if boxsize is not None:
self.kdtree = partial(self.kdtree, boxsize=boxsize)
self._get_tree_at_index = lru_cache(maxsize=maxcache)(self._get_tree_at_index)
def __getitem__(self, index):
return self._get_tree_at_index(index)
def __checksum__(self):
return checksum(self.selector, self.frames)
def __eq__(self, other):
return super().__eq__(other)
def map_coordinates(func):
@wraps(func)
def wrapped(coordinates, **kwargs):
return CoordinatesMap(coordinates, partial(func, **kwargs))
return wrapped
@map_coordinates
def centers_of_mass(c, *, masses=None):
"""
A- 1
B- 2
A- 1
C 3
A-
B-
A-
C
A-
B-
A-
C
Example:
rd = XTCReader('t.xtc')
coordinates = Coordinates(rd)
com = centers_of_mass(coordinates, (1.0, 2.0, 1.0, 3.0))
"""
# At first, regroup our array
number_of_masses = len(masses)
number_of_coordinates, number_of_dimensions = c.shape
number_of_new_coordinates = number_of_coordinates // number_of_masses
grouped_masses = c.reshape(number_of_new_coordinates, number_of_masses, number_of_dimensions)
return np.average(grouped_masses, axis=1, weights=masses)
@map_coordinates
def pore_coordinates(coordinates, origin, sym_axis='z'):
"""
Map coordinates of a pore simulation so the pore has cylindrical symmetry.
Args:
coordinates: Coordinates of the simulation
origin: Origin of the pore which will be the coordinates origin after mapping
sym_axis (opt.): Symmtery axis of the pore, may be a literal direction
'x', 'y' or 'z' or an array of shape (3,)
"""
if sym_axis in ('x', 'y', 'z'):
rot_axis = np.zeros(shape=(3,))
rot_axis[['x', 'y', 'z'].index(sym_axis)] = 1
else:
rot_axis = sym_axis
return rotate_axis(coordinates - origin, rot_axis)
@map_coordinates
def vectors(coordinates, atoms_a, atoms_b, normed=False, box=None):
"""
Compute the vectors between the atoms of two subsets.
Args:
coordinates: The Coordinates object the atoms will be taken from
atoms_a: Mask or indices of the first atom subset
atoms_b: Mask or indices of the second atom subset
normed (opt.): If the vectors should be normed
box (opt.): If not None, the vectors are calcualte with PBC
The defintion of atoms_a/b can be any possible subript of a numpy array.
They can, for example, be given as a masking array of bool values with the
same length as the frames of the coordinates. Or they can be a list of
indices selecting the atoms of these indices from each frame.
It is possible to compute the mean of several atoms before calculating the vectors,
by using a two-dimensional list of indices. The following code computes the vectors
between atoms 0, 3, 6 and the mean coordinate of atoms 1, 4, 7 and 2, 5, 8::
>>> inds_a = [0, 3, 6]
>>> inds_b = [[1, 4, 7], [2, 5, 8]]
>>> vectors(coords, inds_a, inds_b)
array([
coords[0] - (coords[1] + coords[2])/2,
coords[3] - (coords[4] + coords[5])/2,
coords[6] - (coords[7] + coords[8])/2,
])
"""
coords_a = coordinates[atoms_a]
if len(coords_a.shape) > 2:
coords_a = coords_a.mean(axis=0)
coords_b = coordinates[atoms_b]
if len(coords_b.shape) > 2:
coords_b = coords_b.mean(axis=0)
vectors = pbc_diff(coords_a, coords_b, box=box)
norm = np.linalg.norm(vectors, axis=-1).reshape(-1, 1) if normed else 1
vectors.reference = coords_a
return vectors / norm

358
mdevaluate/correlation.py Normal file
View File

@ -0,0 +1,358 @@
import numpy as np
from scipy.special import legendre
from itertools import chain
import dask.array as darray
from .autosave import autosave_data
from .utils import filon_fourier_transformation, coherent_sum, histogram
from .pbc import pbc_diff
from .logging import logger
def set_has_counter(func):
func.has_counter = True
return func
def log_indices(first, last, num=100):
ls = np.logspace(0, np.log10(last - first + 1), num=num)
return np.unique(np.int_(ls) - 1 + first)
def correlation(function, frames):
iterator = iter(frames)
start_frame = next(iterator)
return map(lambda f: function(start_frame, f), chain([start_frame], iterator))
def subensemble_correlation(selector_function, correlation_function=correlation):
def c(function, frames):
iterator = iter(frames)
start_frame = next(iterator)
selector = selector_function(start_frame)
subensemble = map(lambda f: f[selector], chain([start_frame], iterator))
return correlation_function(function, subensemble)
return c
def multi_subensemble_correlation(selector_function):
"""
selector_function has to expect a frame and to
return either valid indices (as with subensemble_correlation)
or a multidimensional array whose entries are valid indices
e.g. slice(10,100,2)
e.g. [1,2,3,4,5]
e.g. [[[0,1],[2],[3]],[[4],[5],[6]] -> shape: 2,3 with
list of indices of varying length
e.g. [slice(1653),slice(1653,None,3)]
e.g. [np.ones(len_of_frames, bool)]
in general using slices is the most efficient.
if the selections are small subsets of a frame or when many subsets are empty
using indices will be more efficient than using masks.
"""
@set_has_counter
def cmulti(function, frames):
iterator = iter(frames)
start_frame = next(iterator)
selectors = np.asarray(selector_function(start_frame))
sel_shape = selectors.shape
if sel_shape[-1] == 0: selectors = np.asarray(selectors,int)
if (selectors.dtype != object): sel_shape = sel_shape[:-1]
f_values = np.zeros(sel_shape + function(start_frame,start_frame).shape,)
count = np.zeros(sel_shape, dtype=int)
is_first_frame_loop = True
def cc(act_frame):
nonlocal is_first_frame_loop
for index in np.ndindex(sel_shape):
sel = selectors[index]
sf_sel = start_frame[sel]
if is_first_frame_loop:
count[index] = len(sf_sel)
f_values[index] = function(sf_sel, act_frame[sel]) if count[index] != 0 else 0
is_first_frame_loop = False
return np.asarray(f_values.copy())
return map(cc, chain([start_frame], iterator)), count
return cmulti
@autosave_data(nargs=2, kwargs_keys=(
'index_distribution', 'correlation', 'segments', 'window', 'skip', 'average'
), version='shifted_correlation-1')
def shifted_correlation(function, frames,
index_distribution=log_indices, correlation=correlation,
segments=10, window=0.5, skip=None,
average=False, ):
"""
Calculate the time series for a correlation function.
The times at which the correlation is calculated are determined automatically by the
function given as ``index_distribution``. The default is a logarithmic distribution.
Args:
function: The function that should be correlated
frames: The coordinates of the simulation data
index_distribution (opt.):
A function that returns the indices for which the timeseries
will be calculated
correlation (function, opt.):
The correlation function
segments (int, opt.):
The number of segments the time window will be shifted
window (float, opt.):
The fraction of the simulation the time series will cover
skip (float, opt.):
The fraction of the trajectory that will be skipped at the beginning,
if this is None the start index of the frames slice will be used,
which defaults to 0.
counter (bool, opt.):
If True, returns length of frames (in general number of particles specified)
average (bool, opt.):
If True, returns averaged correlation function
Returns:
tuple:
A list of length N that contains the indices of the frames at which
the time series was calculated and a numpy array of shape (segments, N)
that holds the (non-avaraged) correlation data
if has_counter == True: adds number of counts to output tupel.
if average is returned it will be weighted.
Example:
Calculating the mean square displacement of a coordinates object named ``coords``:
>>> indices, data = shifted_correlation(msd, coords)
"""
if skip is None:
try:
skip = frames._slice.start / len(frames)
except (TypeError, AttributeError):
skip = 0
assert window + skip < 1
start_frames = np.unique(np.linspace(
len(frames) * skip, len(frames) * (1 - window),
num=segments, endpoint=False, dtype=int
))
num_frames = int(len(frames) * (window))
idx = index_distribution(0, num_frames)
def correlate(start_frame):
shifted_idx = idx + start_frame
return correlation(function, map(frames.__getitem__, shifted_idx))
times = np.array([frames[i].time for i in idx]) - frames[0].time
if getattr(correlation, "has_counter", False):
if average:
for i, start_frame in enumerate(start_frames):
act_result, act_count = correlate(start_frame)
act_result = np.array(list(act_result))
act_count = np.array(act_count)
if i == 0:
count = act_count
cdim = act_count.ndim
rdim = act_result.ndim
bt = np.newaxis,
for i in range(rdim - 1):
if i >= cdim:
bt += np.newaxis,
else:
bt += slice(None),
result = act_result * act_count[bt]
else:
result += act_result * act_count[bt]
count += act_count
np.divide(result, count[bt], out = result, where = count[bt] != 0)
result = np.moveaxis(result,0,cdim)
count = count / len(start_frames)
output = times, result, count
else:
count = []
result = []
for i, start_frame in enumerate(start_frames):
act_result, act_count = correlate(start_frame)
act_result = list(act_result)
result.append(act_result)
count.append(act_count)
count = np.asarray(count)
cdim = count.ndim
result = np.asarray(result)
result = np.moveaxis(result,1,cdim)
output = times, result, count
else:
result = 0 if average else []
for i, start_frame in enumerate(start_frames):
if average:
result += np.array(list(correlate(start_frame)))
else:
result.append(list(correlate(start_frame)))
result = np.array(result)
if average:
result = result / len(start_frames)
output = times, result
return output
def msd(start, frame):
"""
Mean square displacement
"""
vec = start - frame
return (vec ** 2).sum(axis=1).mean()
def isf(start, frame, q, box=None):
"""
Incoherent intermediate scattering function. To specify q, use
water_isf = functools.partial(isf, q=22.77) # q has the value 22.77 nm^-1
:param q: length of scattering vector
"""
vec = start - frame
distance = (vec ** 2).sum(axis=1) ** .5
return np.sinc(distance * q / np.pi).mean()
def rotational_autocorrelation(onset, frame, order=2):
"""
Compute the rotaional autocorrelation of the legendre polynamial for the given vectors.
Args:
onset, frame: CoordinateFrames of vectors
order (opt.): Order of the legendre polynomial.
Returns:
Skalar value of the correltaion function.
"""
scalar_prod = (onset * frame).sum(axis=-1)
poly = legendre(order)
return poly(scalar_prod).mean()
def van_hove_self(start, end, bins):
r"""
Compute the self part of the Van Hove autocorrelation function.
..math::
G(r, t) = \sum_i \delta(|\vec r_i(0) - \vec r_i(t)| - r)
"""
vec = start - end
delta_r = ((vec)**2).sum(axis=-1)**.5
return 1 / len(start) * histogram(delta_r, bins)[0]
def van_hove_distinct(onset, frame, bins, box=None, use_dask=True, comp=False, bincount=True):
r"""
Compute the distinct part of the Van Hove autocorrelation function.
..math::
G(r, t) = \sum_{i, j} \delta(|\vec r_i(0) - \vec r_j(t)| - r)
"""
if box is None:
box = onset.box.diagonal()
dimension = len(box)
N = len(onset)
if use_dask:
onset = darray.from_array(onset, chunks=(500, dimension)).reshape(1, N, dimension)
frame = darray.from_array(frame, chunks=(500, dimension)).reshape(N, 1, dimension)
dist = ((pbc_diff(onset, frame, box)**2).sum(axis=-1)**0.5).ravel()
if np.diff(bins).std() < 1e6:
dx = bins[0] - bins[1]
hist = darray.bincount((dist // dx).astype(int), minlength=(len(bins) - 1))
else:
hist = darray.histogram(dist, bins=bins)[0]
return hist.compute() / N
else:
if comp:
dx = bins[1] - bins[0]
minlength = len(bins) - 1
def f(x):
d = (pbc_diff(x, frame, box)**2).sum(axis=-1)**0.5
return np.bincount((d // dx).astype(int), minlength=minlength)[:minlength]
hist = sum(f(x) for x in onset)
else:
dist = (pbc_diff(onset.reshape(1, -1, 3), frame.reshape(-1, 1, 3), box)**2).sum(axis=-1)**0.5
hist = histogram(dist, bins=bins)[0]
return hist / N
def overlap(onset, frame, crds_tree, radius):
"""
Compute the overlap with a reference configuration defined in a CoordinatesTree.
Args:
onset: Initial frame, this is only used to get the frame index
frame: The current configuration
crds_tree: A CoordinatesTree of the reference configurations
radius: The cutoff radius for the overlap
This function is intended to be used with :func:`shifted_correlation`.
As usual the first two arguments are used internally and the remaining ones
should be defined with :func:`functools.partial`.
If the overlap of a subset of the system should be calculated, this has to be
defined through a selection of the reference configurations in the CoordinatesTree.
Example:
>>> shifted_correlation(
... partial(overlap, crds_tree=CoordinatesTree(traj), radius=0.11),
... traj
... )
"""
tree = crds_tree[onset.step]
return (tree.query(frame)[0] <= radius).sum() / tree.n
def susceptibility(time, correlation, **kwargs):
"""
Calculate the susceptibility of a correlation function.
Args:
time: Timesteps of the correlation data
correlation: Value of the correlation function
**kwargs (opt.):
Additional keyword arguments will be passed to :func:`filon_fourier_transformation`.
"""
frequencies, fourier = filon_fourier_transformation(time, correlation, imag=False, **kwargs)
return frequencies, frequencies * fourier
def coherent_scattering_function(onset, frame, q):
"""
Calculate the coherent scattering function.
"""
box = onset.box.diagonal()
dimension = len(box)
def scfunc(x, y):
sqdist = 0
for i in range(dimension):
d = x[i] - y[i]
if d > box[i] / 2:
d -= box[i]
if d < -box[i] / 2:
d += box[i]
sqdist += d**2
x = sqdist**0.5 * q
if x == 0:
return 1.0
else:
return np.sin(x) / x
return coherent_sum(scfunc, onset.pbc, frame.pbc) / len(onset)
def non_gaussian(onset, frame):
"""
Calculate the Non-Gaussian parameter :
..math:
\alpha_2 (t) = \frac{3}{5}\frac{\langle r_i^4(t)\rangle}{\langle r_i^2(t)\rangle^2} - 1
"""
r_2 = ((frame - onset)**2).sum(axis=-1)
return 3 / 5 * (r_2**2).mean() / r_2.mean()**2 - 1

359
mdevaluate/distribution.py Normal file
View File

@ -0,0 +1,359 @@
import numpy as np
from .coordinates import rotate_axis, polar_coordinates, spherical_coordinates
from .atoms import next_neighbors
from .autosave import autosave_data
from .utils import runningmean
from .pbc import pbc_diff, pbc_points
from .logging import logger
from scipy import spatial
@autosave_data(nargs=2, kwargs_keys=('coordinates_b',), version='time_average-1')
def time_average(function, coordinates, coordinates_b=None, pool=None):
"""
Compute the time average of a function.
Args:
function:
The function that will be averaged, it has to accept exactly one argument
which is the current atom set
coordinates: The coordinates object of the simulation
pool (multiprocessing.Pool, opt.):
A multiprocessing pool which will be used for cocurrent calculation of the
averaged function
"""
if pool is not None:
_map = pool.imap
else:
_map = map
number_of_averages = 0
result = 0
if coordinates_b is not None:
if coordinates._slice != coordinates_b._slice:
logger.warning("Different slice for coordinates and coordinates_b.")
coordinate_iter = (iter(coordinates), iter(coordinates_b))
else:
coordinate_iter = (iter(coordinates),)
evaluated = _map(function, *coordinate_iter)
for ev in evaluated:
number_of_averages += 1
result += ev
if number_of_averages % 100 == 0:
logger.debug('time_average: %d', number_of_averages)
return result / number_of_averages
def time_histogram(function, coordinates, bins, hist_range, pool=None):
coordinate_iter = iter(coordinates)
if pool is not None:
_map = pool.imap
else:
_map = map
evaluated = _map(function, coordinate_iter)
results = []
hist_results = []
for num, ev in enumerate(evaluated):
results.append(ev)
if num % 100 == 0 and num > 0:
print(num)
r = np.array(results).T
for i, row in enumerate(r):
histo, _ = np.histogram(row, bins=bins, range=hist_range)
if len(hist_results) <= i:
hist_results.append(histo)
else:
hist_results[i] += histo
results = []
return hist_results
def rdf(atoms_a, atoms_b=None, bins=None, box=None, kind=None, chunksize=50000, returnx=False, **kwargs):
r"""
Compute the radial pair distribution of one or two sets of atoms.
.. math::
g_{AB}(r) = \frac{1}{\langle \rho_B\rangle N_A}\sum\limits_{i\in A}^{N_A}
\sum\limits_{j\in B}^{N_B}\frac{\delta(r_{ij} -r)}{4\pi r^2}
For use with :func:`time_average`, define bins through the use of :func:`~functools.partial`,
the atom sets are passed to :func:`time_average`, if a second set of atoms should be used
specify it as ``coordinates_b`` and it will be passed to this function.
Args:
atoms_a: First set of atoms, used internally
atoms_b (opt.): Second set of atoms, used internally
bins: Bins of the radial distribution function
box (opt.): Simulations box, if not specified this is taken from ``atoms_a.box``
kind (opt.): Can be 'inter', 'intra' or None (default).
chunksize (opt.):
For large systems (N > 1000) the distaces have to be computed in chunks so the arrays
fit into memory, this parameter controlls the size of these chunks. It should be
as large as possible, depending on the available memory.
returnx (opt.): If True the x ordinate of the histogram is returned.
"""
assert bins is not None, 'Bins of the pair distribution have to be defined.'
assert kind in ['intra', 'inter', None], 'Argument kind must be one of the following: intra, inter, None.'
if box is None:
box = atoms_a.box.diagonal()
if atoms_b is None:
atoms_b = atoms_a
nr_of_atoms = len(atoms_a)
indices = np.triu_indices(nr_of_atoms, k=1)
else:
nr_a, dim = atoms_a.shape
nr_b, dim = atoms_b.shape
indices = np.array([(i, j) for i in range(nr_a) for j in range(nr_b)]).T
# compute the histogram in chunks for large systems
hist = 0
nr_of_samples = 0
for chunk in range(0, len(indices[0]), chunksize):
sl = slice(chunk, chunk + chunksize)
diff = pbc_diff(atoms_a[indices[0][sl]], atoms_b[indices[1][sl]], box)
dist = (diff**2).sum(axis=1)**0.5
if kind == 'intra':
mask = atoms_a.residue_ids[indices[0][sl]] == atoms_b.residue_ids[indices[1][sl]]
dist = dist[mask]
elif kind == 'inter':
mask = atoms_a.residue_ids[indices[0][sl]] != atoms_b.residue_ids[indices[1][sl]]
dist = dist[mask]
nr_of_samples += len(dist)
hist += np.histogram(dist, bins)[0]
volume = 4 / 3 * np.pi * (bins[1:]**3 - bins[:-1]**3)
density = nr_of_samples / np.prod(box)
res = hist / volume / density
if returnx:
return np.vstack((runningmean(bins, 2), res))
else:
return res
def pbc_tree_rdf(atoms_a, atoms_b=None, bins=None, box=None, exclude=0, returnx=False, **kwargs):
if box is None:
box = atoms_a.box.diagonal()
all_coords = pbc_points(pbc_diff(atoms_b,box=box), box, thickness=np.amax(bins)+0.1, center=0)
to_tree = spatial.cKDTree(all_coords)
dist = to_tree.query(pbc_diff(atoms_a,box=box),k=len(atoms_b), distance_upper_bound=np.amax(bins)+0.1)[0].flatten()
dist = dist[dist < np.inf]
hist = np.histogram(dist, bins)[0]
volume = 4/3*np.pi*(bins[1:]**3-bins[:-1]**3)
res = (hist) * np.prod(box) / volume / len(atoms_a) / (len(atoms_b)-exclude)
if returnx:
return np.vstack((runningmean(bins, 2), res))
else:
return res
def pbc_spm_rdf(atoms_a, atoms_b=None, bins=None, box=None, exclude=0, returnx=False, **kwargs):
if box is None:
box = atoms_a.box.diagonal()
all_coords = pbc_points(pbc_diff(atoms_b,box=box), box, thickness=np.amax(bins)+0.1, center=0)
to_tree = spatial.cKDTree(all_coords)
if all_coords.nbytes/1024**3 * len(atoms_a) < 2:
from_tree = spatial.cKDTree(pbc_diff(atoms_a,box=box))
dist = to_tree.sparse_distance_matrix(from_tree, max_distance=np.amax(bins)+0.1, output_type='ndarray')
dist = np.asarray(dist.tolist())[:,2]
hist = np.histogram(dist, bins)[0]
else:
chunksize = int(2 * len(atoms_a) / (all_coords.nbytes/1024**3 * len(atoms_a)))
hist = 0
for chunk in range(0, len(atoms_a), chunksize):
sl = slice(chunk, chunk + chunksize)
from_tree = spatial.cKDTree(pbc_diff(atoms_a[sl],box=box))
dist = to_tree.sparse_distance_matrix(from_tree, max_distance=np.amax(bins)+0.1, output_type='ndarray')
dist = np.asarray(dist.tolist())[:,2]
hist += np.histogram(dist, bins)[0]
volume = 4/3*np.pi*(bins[1:]**3-bins[:-1]**3)
res = (hist) * np.prod(box) / volume / len(atoms_a) / (len(atoms_b)-exclude)
if returnx:
return np.vstack((runningmean(bins, 2), res))
else:
return res
@autosave_data(nargs=2, kwargs_keys=('to_coords','times'))
def fast_averaged_rdf(from_coords, bins, to_coords=None, times=10, exclude=0, **kwargs):
if to_coords is None:
to_coords = from_coords
exclude = 1
# first find timings for the different rdf functions
import time
# only consider sparse matrix for this condition
if (len(from_coords[0])*len(to_coords[0]) <= 3000 * 2000 ) & (len(from_coords[0])/len(to_coords[0]) > 5 ):
funcs = [rdf, pbc_tree_rdf, pbc_spm_rdf]
else:
funcs = [rdf, pbc_tree_rdf]
timings = []
for f in funcs:
start = time.time()
f(from_coords[0], atoms_b=to_coords[0], bins=bins, box=np.diag(from_coords[0].box))
end = time.time()
timings.append(end-start)
timings = np.array(timings)
timings[0] = 2*timings[0] # statistics for the other functions is twice as good per frame
logger.debug('rdf function timings: ' + str(timings))
rdffunc = funcs[np.argmin(timings)]
logger.debug('rdf function used: ' + str(rdffunc))
if rdffunc == rdf:
times = times*2 # duplicate times for same statistics
frames = np.array(range(0, len(from_coords), int(len(from_coords)/times)))[:times]
out = np.zeros(len(bins)-1)
for j, i in enumerate(frames):
logger.debug('multi_radial_pair_distribution: %d/%d', j, len(frames))
out += rdffunc(from_coords[i], to_coords[i], bins, box=np.diag(from_coords[i].box), exclude=exclude)
return out/len(frames)
def distance_distribution(atoms, bins):
connection_vectors = atoms[:-1, :] - atoms[1:, :]
connection_lengths = (connection_vectors**2).sum(axis=1)**.5
return np.histogram(connection_lengths, bins)[0]
def tetrahedral_order(atoms, reference_atoms=None):
if reference_atoms is None:
reference_atoms = atoms
indices = next_neighbors(reference_atoms, query_atoms=atoms, number_of_neighbors=4)
neighbors = reference_atoms[indices]
neighbors_1, neighbors_2, neighbors_3, neighbors_4 = \
neighbors[:, 0, :], neighbors[:, 1, :], neighbors[:, 2, :], neighbors[:, 3, :]
# Connection vectors
neighbors_1 -= atoms
neighbors_2 -= atoms
neighbors_3 -= atoms
neighbors_4 -= atoms
# Normed Connection vectors
neighbors_1 /= np.linalg.norm(neighbors_1, axis=-1).reshape(-1, 1)
neighbors_2 /= np.linalg.norm(neighbors_2, axis=-1).reshape(-1, 1)
neighbors_3 /= np.linalg.norm(neighbors_3, axis=-1).reshape(-1, 1)
neighbors_4 /= np.linalg.norm(neighbors_4, axis=-1).reshape(-1, 1)
a_1_2 = ((neighbors_1 * neighbors_2).sum(axis=1) + 1 / 3)**2
a_1_3 = ((neighbors_1 * neighbors_3).sum(axis=1) + 1 / 3)**2
a_1_4 = ((neighbors_1 * neighbors_4).sum(axis=1) + 1 / 3)**2
a_2_3 = ((neighbors_2 * neighbors_3).sum(axis=1) + 1 / 3)**2
a_2_4 = ((neighbors_2 * neighbors_4).sum(axis=1) + 1 / 3)**2
a_3_4 = ((neighbors_3 * neighbors_4).sum(axis=1) + 1 / 3)**2
q = 1 - 3 / 8 * (a_1_2 + a_1_3 + a_1_4 + a_2_3 + a_2_4 + a_3_4)
return q
def tetrahedral_order_distribution(atoms, reference_atoms=None, bins=None):
assert bins is not None, 'Bin edges of the distribution have to be specified.'
Q = tetrahedral_order(atoms, reference_atoms=reference_atoms)
return np.histogram(Q, bins=bins)[0]
def radial_density(atoms, bins, symmetry_axis=(0, 0, 1), origin=(0, 0, 0), height=1, returnx=False):
"""
Calculate the radial density distribution.
This function is meant to be used with time_average.
Args:
atoms:
Set of coordinates.
bins:
Bin specification that is passed to numpy.histogram. This needs to be
a list of bin edges if the function is used within time_average.
symmetry_axis (opt.):
Vector of the symmetry axis, around which the radial density is calculated,
default is z-axis.
origin (opt.):
Origin of the rotational symmetry, e.g. center of the pore.
height (opt.):
Height of the pore, necessary for correct normalization of the density.
returnx (opt.):
If True, the x ordinate of the distribution is returned.
"""
cartesian = rotate_axis(atoms - origin, symmetry_axis)
radius, _ = polar_coordinates(cartesian[:, 0], cartesian[:, 1])
hist = np.histogram(radius, bins=bins)[0]
volume = np.pi * (bins[1:]**2 - bins[:-1]**2) * height
res = hist / volume
if returnx:
return np.vstack((runningmean(bins, 2), res))
else:
return res
def shell_density(atoms, shell_radius, bins, shell_thickness=0.5,
symmetry_axis=(0, 0, 1), origin=(0, 0, 0)):
"""
Compute the density distribution on a cylindrical shell.
Args:
atoms: The coordinates of the atoms
shell_radius: Inner radius of the shell
bins: Histogramm bins, this has to be a two-dimensional list of bins: [angle, z]
shell_thickness (opt.): Thicknes of the shell, default is 0.5
symmetry_axis (opt.): The symmtery axis of the pore, the coordinates will be
rotated such that this axis is the z-axis
origin (opt.): Origin of the pore, the coordinates will be moved such that this
is the new origin.
Returns:
Two-dimensional density distribution of the atoms in the defined shell.
"""
cartesian = rotate_axis(atoms-origin, symmetry_axis)
radius, theta = polar_coordinates(cartesian[:, 0], cartesian[:, 1])
shell_indices = (shell_radius <= radius) & (radius <= shell_radius + shell_thickness)
hist = np.histogram2d(theta[shell_indices], cartesian[shell_indices, 2], bins)[0]
return hist
def spatial_density(atoms, bins, weights=None):
"""
Compute the spatial density distribution.
"""
density, _ = np.histogramdd(atoms, bins=bins, weights=weights)
return density
def mixing_ratio_distribution(atoms_a, atoms_b, bins_ratio, bins_density,
weights_a=None, weights_b=None, weights_ratio=None):
"""
Compute the distribution of the mixing ratio of two sets of atoms.
"""
density_a, _ = time_average
density_b, _ = np.histogramdd(atoms_b, bins=bins_density, weights=weights_b)
mixing_ratio = density_a/(density_a + density_b)
good_inds = (density_a != 0) & (density_b != 0)
hist, _ = np.histogram(mixing_ratio[good_inds], bins=bins_ratio, weights=weights_ratio)
return hist
def next_neighbor_distribution(atoms, reference=None, number_of_neighbors=4, bins=None, normed=True):
"""
Compute the distribution of next neighbors with the same residue name.
"""
assert bins is not None, 'Bins have to be specified.'
if reference is None:
reference = atoms
nn = next_neighbors(reference, query_atoms=atoms, number_of_neighbors=number_of_neighbors)
resname_nn = reference.residue_names[nn]
count_nn = (resname_nn == atoms.residue_names.reshape(-1, 1)).sum(axis=1)
return np.histogram(count_nn, bins=bins, normed=normed)[0]

38
mdevaluate/functions.py Normal file
View File

@ -0,0 +1,38 @@
import numpy as np
def kww(t, A, τ, β):
return A * np.exp(-(t / τ)**β)
def kww_1e(A, τ, β):
return τ * (-np.log(1 / (np.e * A)))**(1 / β)
def cole_davidson(w, A, b, t0):
P = np.arctan(w * t0)
return A * np.cos(P)**b * np.sin(b * P)
def cole_cole(w, A, b, t0):
return A * (w * t0)**b * np.sin(np.pi * b / 2) / (1 + 2 * (w * t0)**b * np.cos(np.pi * b / 2) + (w * t0)**(2 * b))
def havriliak_negami(ω, A, β, α, τ):
r"""
Imaginary part of the Havriliak-Negami function.
.. math::
\chi_{HN}(\omega) = \Im\left(\frac{A}{(1 + (i\omega\tau)^\alpha)^\beta}\right)
"""
return -(A / (1 + (1j * ω * τ)**α)**β).imag
# fits decay of correlation times, e.g. with distance to pore walls
def colen(d, X, t8, A):
return t8 * np.exp(A*np.exp(-d/X))
# fits decay of the plateau height of the overlap function, e.g. with distance to pore walls
def colenQ(d, X, Qb, g):
return (1-Qb)*np.exp(-(d/X)**g)+Qb

26
mdevaluate/logging.py Normal file
View File

@ -0,0 +1,26 @@
import logging
logger = logging.getLogger('mdevaluate')
logger.setLevel(logging.INFO)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
logger.addHandler(stream_handler)
formatter = logging.Formatter('{levelname[0]}{levelname[1]}{levelname[2]}[{asctime}]:{funcName}: {message}', style='{')
stream_handler.setFormatter(formatter)
def setlevel(level, file=None):
"""
Change the level of logging. If `file` is specified, logs are written to this file.
"""
if isinstance(level, str):
level = getattr(logging, level.upper())
logger.setLevel(level)
if file is not None:
handler = logging.FileHandler(file)
handler.setLevel(level)
handler.setFormatter(formatter)
logger.addHandler(handler)
else:
stream_handler.setLevel(level)

260
mdevaluate/pbc.py Normal file
View File

@ -0,0 +1,260 @@
from collections import OrderedDict
import os
import numpy as np
from scipy.spatial import cKDTree
from itertools import product
from .logging import logger
def pbc_diff_old(v1, v2, box):
"""
Calculate the difference of two vestors, considering optional boundary conditions.
"""
if box is None:
v = v1 - v2
else:
v = v1 % box - v2 % box
v -= (v > box / 2) * box
v += (v < -box / 2) * box
return v
def pbc_diff(v1, v2=None, box=None):
if box is None:
out = v1 - v2
elif len(getattr(box, 'shape', [])) == 1:
out = pbc_diff_rect(v1, v2, box)
elif len(getattr(box, 'shape', [])) == 2:
out = pbc_diff_tric(v1, v2, box)
else: raise NotImplementedError("cannot handle box")
return out
def pbc_diff_rect(v1, v2, box):
"""
Calculate the difference of two vectors, considering periodic boundary conditions.
"""
if v2 is None:
v = v1
else:
v = v1 -v2
s = v / box
v = box * (s - s.round())
return v
def pbc_diff_tric(v1, v2=None, box=None):
"""
difference vector for arbitrary pbc
Args:
box_matrix: CoordinateFrame.box
"""
if len(box.shape) == 1: box = np.diag(box)
if v1.shape == (3,): v1 = v1.reshape((1,3)) #quick 'n dirty
if v2.shape == (3,): v2 = v2.reshape((1,3))
if box is not None:
r3 = np.subtract(v1, v2)
r2 = np.subtract(r3, (np.rint(np.divide(r3[:,2],box[2][2])))[:,np.newaxis] * box[2][np.newaxis,:])
r1 = np.subtract(r2, (np.rint(np.divide(r2[:,1],box[1][1])))[:,np.newaxis] * box[1][np.newaxis,:])
v = np.subtract(r1, (np.rint(np.divide(r1[:,0],box[0][0])))[:,np.newaxis] * box[0][np.newaxis,:])
else:
v = v1 - v2
return v
def pbc_dist(a1,a2,box = None):
return ((pbc_diff(a1,a2,box)**2).sum(axis=1))**0.5
def pbc_extend(c, box):
"""
in: c is frame, box is frame.box
out: all atoms in frame and their perio. image (shape => array(len(c)*27,3))
"""
c=np.asarray(c)
if c.shape == (3,): c = c.reshape((1,3)) #quick 'n dirty
comb = np.array([np.asarray(i) for i in product([0,-1,1],[0,-1,1],[0,-1,1])])
b_matrices = comb[:,:,np.newaxis]*box[np.newaxis,:,:]
b_vectors = b_matrices.sum(axis=1)[np.newaxis,:,:]
return (c[:,np.newaxis,:]+b_vectors)
def pbc_kdtree(v1,box, leafsize = 32, compact_nodes = False, balanced_tree = False):
"""
kd_tree with periodic images
box - whole matrix
rest: optional optimization
"""
r0 = cKDTree(pbc_extend(v1,box).reshape((-1,3)),leafsize ,compact_nodes ,balanced_tree)
return r0
def pbc_kdtree_query(v1,v2,box,n = 1):
"""
kd_tree query with periodic images
"""
r0, r1 = pbc_kdtree(v1,box).query(v2,n)
r1 = r1 // 27
return r0, r1
def pbc_backfold_rect(act_frame,box_matrix):
"""
mimics "trjconv ... -pbc atom -ur rect"
folds coords of act_frame in cuboid
"""
af=np.asarray(act_frame)
if af.shape == (3,): act_frame = act_frame.reshape((1,3)) #quick 'n dirty
b = box_matrix
c = np.diag(b)/2
af = pbc_diff(np.zeros((1,3)),af-c,b)
return af + c
def pbc_backfold_compact(act_frame,box_matrix):
"""
mimics "trjconv ... -pbc atom -ur compact"
folds coords of act_frame in wigner-seitz-cell (e.g. dodecahedron)
"""
c = act_frame
box = box_matrix
ctr = box.sum(0)/2
c=np.asarray(c)
shape = c.shape
if shape == (3,):
c = c.reshape((1,3))
shape = (1,3) #quick 'n dirty
comb = np.array([np.asarray(i) for i in product([0,-1,1],[0,-1,1],[0,-1,1])])
b_matrices = comb[:,:,np.newaxis]*box[np.newaxis,:,:]
b_vectors = b_matrices.sum(axis=1)[np.newaxis,:,:]
sc = c[:,np.newaxis,:]+b_vectors
w = np.argsort((((sc)-ctr)**2).sum(2),1)[:,0]
return sc[range(shape[0]),w]
# Parameter used to switch reference position for whole molecules
# 'com': use center of mass of each molecule
# 'simple': use first atom in each molecule
WHOLEMODE = 'com'
fname = os.path.expanduser('~/.mdevaluate/WHOLEMODE')
if os.path.exists(fname):
with open(fname) as f:
WHOLEMODE = f.read().strip()
logger.info('Setting WHOLEMODE to %s, according to file ~/.mdevaluate/WHOLEMODE', WHOLEMODE)
def whole(frame):
"""
Apply ``-pbc whole`` to a CoordinateFrame.
"""
residue_ids = frame.coordinates.atom_subset.residue_ids
box = frame.box.diagonal()
if WHOLEMODE == 'com':
logger.debug('Using COM as reference for whole.')
coms = np.array([
np.bincount(residue_ids, weights=c * frame.masses)[1:] / np.bincount(residue_ids, weights=frame.masses)[1:]
for c in frame.T
]).T[residue_ids - 1]
else:
# make sure, residue_ids are sorted, then determine indices at which the res id changes
# kind='stable' assures that any existent ordering is preserved
logger.debug('Using first atom as reference for whole.')
sort_ind = residue_ids.argsort(kind='stable')
i = np.concatenate([[0], np.where(np.diff(residue_ids[sort_ind]) > 0)[0] + 1])
coms = frame[sort_ind[i]][residue_ids - 1]
cor = np.zeros_like(frame)
cd = frame - coms
n, d = np.where(cd > box / 2 * 0.9)
cor[n, d] = -box[d]
n, d = np.where(cd < -box / 2 * 0.9)
cor[n, d] = box[d]
# this fix is only necessary when COM is the reference
if WHOLEMODE == 'com':
duomask = np.bincount(residue_ids)[1:][residue_ids - 1] == 2
if np.any(duomask):
duomask[::2] = False
cor[duomask] = 0
return frame + cor
NOJUMP_CACHESIZE = 128
def nojump(frame, usecache=True):
"""
Return the nojump coordinates of a frame, based on a jump matrix.
"""
selection = frame.selection
reader = frame.coordinates.frames
if usecache:
if not hasattr(reader, '_nojump_cache'):
reader._nojump_cache = OrderedDict()
# make sure to use absolute (non negative) index
abstep = frame.step % len(frame.coordinates)
i0s = [x for x in reader._nojump_cache if x <= abstep]
if len(i0s) > 0:
i0 = max(i0s)
delta = reader._nojump_cache[i0]
i0 += 1
else:
i0 = 0
delta = 0
delta = delta + np.array(np.vstack(
[m[i0:abstep + 1].sum(axis=0) for m in reader.nojump_matrixes]
).T) * frame.box.diagonal()
reader._nojump_cache[abstep] = delta
while len(reader._nojump_cache) > NOJUMP_CACHESIZE:
reader._nojump_cache.popitem(last=False)
delta = delta[selection, :]
else:
delta = np.array(np.vstack(
[m[:frame.step + 1, selection].sum(axis=0) for m in reader.nojump_matrixes]
).T) * frame.box.diagonal()
return frame - delta
def pbc_points(coordinates, box, thickness=0, index=False, inclusive=True, center=None):
"""
Returns the points their first periodic images. Does not fold them back into the box.
Thickness 0 means all 27 boxes. Positive means the box+thickness. Negative values mean that less than the box is returned.
index=True also returns the indices with indices of images being their originals values.
inclusive=False returns only images, does not work with thickness <= 0
"""
if center is None:
center = box/2
allcoordinates = np.copy(coordinates)
indices = np.tile(np.arange(len(coordinates)),(27))
for x in range(-1, 2, 1):
for y in range(-1, 2, 1):
for z in range(-1, 2, 1):
vv = np.array([x, y, z], dtype=float)
if not (vv == 0).all() :
allcoordinates = np.concatenate((allcoordinates, coordinates + vv*box), axis=0)
if thickness != 0:
mask = np.all(allcoordinates < center+box/2+thickness, axis=1)
allcoordinates = allcoordinates[mask]
indices = indices[mask]
mask = np.all(allcoordinates > center-box/2-thickness, axis=1)
allcoordinates = allcoordinates[mask]
indices = indices[mask]
if not inclusive and thickness > 0:
allcoordinates = allcoordinates[len(coordinates):]
indices = indices[len(coordinates):]
if index:
return (allcoordinates, indices)
return allcoordinates

283
mdevaluate/reader.py Executable file
View File

@ -0,0 +1,283 @@
"""
Module that provides different readers for trajectory files.
It also provides a common interface layer between the file IO packages,
namely pygmx and mdanalysis, and mdevaluate.
"""
from .checksum import checksum
from .logging import logger
from . import atoms
from functools import lru_cache
from collections import namedtuple
import os
from os import path
from array import array
from zipfile import BadZipFile
import builtins
import warnings
import numpy as np
import MDAnalysis as mdanalysis
from scipy import sparse
from dask import delayed, __version__ as DASK_VERSION
class NojumpError(Exception):
pass
class WrongTopologyError(Exception):
pass
def open_with_mdanalysis(topology, trajectory, cached=False):
"""Open a the topology and trajectory with mdanalysis."""
uni = mdanalysis.Universe(topology, trajectory, convert_units=False)
if cached is not False:
if cached is True:
maxsize = 128
else:
maxsize = cached
reader = CachedReader(uni.trajectory, maxsize)
else:
reader = BaseReader(uni.trajectory)
reader.universe = uni
if topology.endswith('.tpr'):
atms = atoms.Atoms(
np.stack((uni.atoms.resids, uni.atoms.resnames, uni.atoms.names), axis=1),
charges=uni.atoms.charges, masses=uni.atoms.masses
).subset()
elif topology.endswith('.gro'):
atms = atoms.Atoms(
np.stack((uni.atoms.resids, uni.atoms.resnames, uni.atoms.names), axis=1),
charges=None, masses=None
).subset()
else:
raise WrongTopologyError('Topology file should end with ".tpr" or ".gro"')
return atms, reader
def is_writeable(fname):
"""Test if a directory is actually writeable, by writing a temporary file."""
fdir = os.path.dirname(fname)
ftmp = os.path.join(fdir, str(np.random.randint(999999999)))
while os.path.exists(ftmp):
ftmp = os.path.join(fdir, str(np.random.randint(999999999)))
if os.access(fdir, os.W_OK):
try:
with builtins.open(ftmp, 'w'):
pass
os.remove(ftmp)
return True
except PermissionError:
pass
return False
def nojump_filename(reader):
directory, fname = path.split(reader.filename)
fname = path.join(directory, '.{}.nojump.npz'.format(fname))
if os.path.exists(fname) or is_writeable(directory):
return fname
else:
fname = os.path.join(
os.path.join(os.environ['HOME'], '.mdevaluate/nojump'),
directory.lstrip('/'),
'.{}.nojump.npz'.format(fname)
)
logger.info('Saving nojump to {}, since original location is not writeable.'.format(fname))
os.makedirs(os.path.dirname(fname), exist_ok=True)
return fname
CSR_ATTRS = ('data', 'indices', 'indptr')
NOJUMP_MAGIC = 2016
def parse_jumps(trajectory):
prev = trajectory[0].whole
box = prev.box.diagonal()
SparseData = namedtuple('SparseData', ['data', 'row', 'col'])
jump_data = (
SparseData(data=array('b'), row=array('l'), col=array('l')),
SparseData(data=array('b'), row=array('l'), col=array('l')),
SparseData(data=array('b'), row=array('l'), col=array('l'))
)
for i, curr in enumerate(trajectory):
if i % 500 == 0:
logger.debug('Parse jumps Step: %d', i)
delta = ((curr - prev) / box).round().astype(np.int8)
prev = curr
for d in range(3):
col, = np.where(delta[:, d] != 0)
jump_data[d].col.extend(col)
jump_data[d].row.extend([i] * len(col))
jump_data[d].data.extend(delta[col, d])
return jump_data
def generate_nojump_matrixes(trajectory):
"""
Create the matrixes with pbc jumps for a trajectory.
"""
logger.info('generate Nojump Matrixes for: {}'.format(trajectory))
jump_data = parse_jumps(trajectory)
N = len(trajectory)
M = len(trajectory[0])
trajectory.frames.nojump_matrixes = tuple(
sparse.csr_matrix((np.array(m.data), (m.row, m.col)), shape=(N, M)) for m in jump_data
)
save_nojump_matrixes(trajectory.frames)
def save_nojump_matrixes(reader, matrixes=None):
if matrixes is None:
matrixes = reader.nojump_matrixes
data = {'checksum': checksum(NOJUMP_MAGIC, checksum(reader))}
for d, mat in enumerate(matrixes):
data['shape'] = mat.shape
for attr in CSR_ATTRS:
data['{}_{}'.format(attr, d)] = getattr(mat, attr)
np.savez(nojump_filename(reader), **data)
def load_nojump_matrixes(reader):
zipname = nojump_filename(reader)
try:
data = np.load(zipname, allow_pickle=True)
except (AttributeError, BadZipFile, OSError):
# npz-files can be corrupted, propably a bug for big arrays saved with savez_compressed?
logger.info('Removing zip-File: %s', zipname)
os.remove(nojump_filename(reader))
return
try:
if data['checksum'] == checksum(NOJUMP_MAGIC, checksum(reader)):
reader.nojump_matrixes = tuple(
sparse.csr_matrix(
tuple(data['{}_{}'.format(attr, d)] for attr in CSR_ATTRS),
shape=data['shape']
)
for d in range(3)
)
logger.info('Loaded Nojump Matrixes: {}'.format(nojump_filename(reader)))
else:
logger.info('Invlaid Nojump Data: {}'.format(nojump_filename(reader)))
except KeyError:
logger.info('Removing zip-File: %s', zipname)
os.remove(nojump_filename(reader))
return
def correct_nojump_matrixes_for_whole(trajectory):
reader = trajectory.frames
frame = trajectory[0]
box = frame.box.diagonal()
cor = ((frame - frame.whole) / box).round().astype(np.int8)
for d in range(3):
reader.nojump_matrixes[d][0] = cor[:, d]
save_nojump_matrixes(reader)
class BaseReader:
"""Base class for trajectory readers."""
@property
def filename(self):
return self.rd.filename
@property
def nojump_matrixes(self):
if self._nojump_matrixes is None:
raise NojumpError('Nojump Data not available: {}'.format(self.filename))
return self._nojump_matrixes
@nojump_matrixes.setter
def nojump_matrixes(self, mats):
self._nojump_matrixes = mats
def __init__(self, rd):
"""
Args:
filename: Trajectory file to open.
reindex (bool, opt.): If True, regenerate the index file if necessary.
"""
self.rd = rd
self._nojump_matrixes = None
if path.exists(nojump_filename(self)):
load_nojump_matrixes(self)
def __getitem__(self, item):
return self.rd[item]
def __len__(self):
return len(self.rd)
def __checksum__(self):
if hasattr(self.rd, 'cache'):
# Has an pygmx reader
return checksum(self.filename, str(self.rd.cache))
elif hasattr(self.rd, '_xdr'):
# Has an mdanalysis reader
cache = array('L', self.rd._xdr.offsets.tobytes())
return checksum(self.filename, str(cache))
class CachedReader(BaseReader):
"""A reader that has a least-recently-used cache for frames."""
@property
def cache_info(self):
"""Get Information about the lru cache."""
return self._get_item.cache_info()
def clear_cache(self):
"""Clear the cache of the frames."""
self._get_item.cache_clear()
def __init__(self, rd, maxsize):
"""
Args:
filename (str): Trajectory file that will be opened.
maxsize: Maximum size of the lru_cache or None for infinite cache.
"""
super().__init__(rd)
self._get_item = lru_cache(maxsize=maxsize)(self._get_item)
def _get_item(self, item):
"""Buffer function for lru_cache, since __getitem__ can not be cached."""
return super().__getitem__(item)
def __getitem__(self, item):
return self._get_item(item)
class DelayedReader(BaseReader):
@property
def filename(self):
if self.rd is not None:
return self.rd.filename
else:
return self._filename
def __init__(self, filename, reindex=False, ignore_index_timestamps=False):
super().__init__(filename, reindex=False, ignore_index_timestamps=False)
self.natoms = len(self.rd[0].coordinates)
self.cache = self.rd.cache
self._filename = self.rd.filename
self.rd = None
def __len__(self):
return len(self.cache)
def _get_item(self, frame):
return read_xtcframe_delayed(self.filename, self.cache[frame], self.natoms)
def __getitem__(self, frame):
return self._get_item(frame)

366
mdevaluate/utils.py Normal file
View File

@ -0,0 +1,366 @@
"""
Collection of utility functions.
"""
import functools
from types import FunctionType
import numpy as np
import pandas as pd
from .functions import kww, kww_1e
from scipy.ndimage import uniform_filter1d
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
from .logging import logger
def five_point_stencil(xdata, ydata):
"""
Calculate the derivative dy/dx with a five point stencil.
This algorith is only valid for equally distributed x values.
Args:
xdata: x values of the data points
ydata: y values of the data points
Returns:
Values where the derivative was estimated and the value of the derivative at these points.
This algorithm is only valid for values on a regular grid, for unevenly distributed
data it is only an approximation, albeit a quite good one.
See: https://en.wikipedia.org/wiki/Five-point_stencil
"""
return xdata[2:-2], (
(-ydata[4:] + 8 * ydata[3:-1] - 8 * ydata[1:-3] + ydata[:-4]) /
(3 * (xdata[4:] - xdata[:-4]))
)
def filon_fourier_transformation(time, correlation,
frequencies=None, derivative='linear', imag=True,
):
"""
Fourier-transformation for slow varrying functions. The filon algorithmus is
described in detail in ref [Blochowicz]_, ch. 3.2.3.
Args:
time: List of times where the correlation function was sampled.
correlation: Values of the correlation function.
frequencies (opt.):
List of frequencies where the fourier transformation will be calculated.
If None the frequencies will be choosen based on the input times.
derivative (opt.):
Approximation algorithmus for the derivative of the correlation function.
Possible values are: 'linear', 'stencil' or a list of derivatives.
imag (opt.): If imaginary part of the integral should be calculated.
If frequencies are not explicitly given they will be evenly placed on a log scale
in the interval [1/tmax, 0.1/tmin] where tmin and tmax are the smallest respectively
the biggest time (greater than 0) of the provided times. The frequencies are cut off
at high values by one decade, since the fourier transformation deviates quite strongly
in this regime.
.. [Blochowicz]
T. Blochowicz, Broadband dielectric spectroscopy in neat and binary
molecular glass formers, Ph.D. thesis, Universität Bayreuth (2003)
"""
if frequencies is None:
f_min = 1 / max(time)
f_max = 0.05**(1.2 - max(correlation)) / min(time[time > 0])
frequencies = 2 * np.pi * np.logspace(
np.log10(f_min), np.log10(f_max), num=60
)
frequencies.reshape(1, -1)
if derivative == 'linear':
derivative = (np.diff(correlation) / np.diff(time)).reshape(-1, 1)
elif derivative == 'stencil':
_, derivative = five_point_stencil(time, correlation)
time = ((time[2:-1] * time[1:-2])**.5).reshape(-1, 1)
derivative = derivative.reshape(-1, 1)
elif np.iterable(derivative) and len(time) is len(derivative):
derivative.reshape(-1, 1)
else:
raise NotImplementedError(
'Invalid approximation method {}. Possible values are "linear", "stencil" or a list of values.'
)
time = time.reshape(-1, 1)
integral = (np.cos(frequencies * time[1:]) - np.cos(frequencies * time[:-1])) / frequencies**2
fourier = (derivative * integral).sum(axis=0)
if imag:
integral = 1j * (np.sin(frequencies * time[1:]) - np.sin(frequencies * time[:-1])) / frequencies**2
fourier = fourier + (derivative * integral).sum(axis=0) + 1j * correlation[0] / frequencies
return frequencies.reshape(-1,), fourier
def mask2indices(mask):
"""
Return the selected indices of an array mask.
If the mask is two-dimensional, the indices will be calculated for the second axis.
Example:
>>> mask2indices([True, False, True, False])
array([0, 2])
>>> mask2indices([[True, True, False], [True, False, True]])
array([[0, 1], [0, 2]])
"""
mask = np.array(mask)
if len(mask.shape) == 1:
indices = np.where(mask)
else:
indices = np.array([np.where(m) for m in mask])
return indices
def superpose(x1, y1, x2, y2, N=100, damping=1.0):
if x2[0] == 0:
x2 = x2[1:]
y2 = y2[1:]
reg1 = x1 < x2[0]
reg2 = x2 > x1[-1]
x_ol = np.logspace(
np.log10(max(x1[~reg1][0], x2[~reg2][0]) + 0.001),
np.log10(min(x1[~reg1][-1], x2[~reg2][-1]) - 0.001),
(sum(~reg1) + sum(~reg2)) / 2
)
def w(x):
A = x_ol.min()
B = x_ol.max()
return (np.log10(B / x) / np.log10(B / A))**damping
xdata = np.concatenate((x1[reg1], x_ol, x2[reg2]))
y1_interp = interp1d(x1[~reg1], y1[~reg1])
y2_interp = interp1d(x2[~reg2], y2[~reg2])
ydata = np.concatenate((
y1[x1 < x2.min()],
w(x_ol) * y1_interp(x_ol) + (1 - w(x_ol)) * y2_interp(x_ol),
y2[x2 > x1.max()]
))
return xdata, ydata
def runningmean(data, nav):
"""
Compute the running mean of a 1-dimenional array.
Args:
data: Input data of shape (N, )
nav: Number of points over which the data will be averaged
Returns:
Array of shape (N-(nav-1), )
"""
return np.convolve(data, np.ones((nav,)) / nav, mode='valid')
def moving_average(A,n=3):
"""
Compute the running mean of an array.
Uses the second axis if it is of higher dimensionality.
Args:
data: Input data of shape (N, )
n: Number of points over which the data will be averaged
Returns:
Array of shape (N-(n-1), )
Supports 2D-Arrays.
Slower than runningmean for small n but faster for large n.
"""
k1 = int(n/2)
k2 = int((n-1)/2)
if k2 == 0:
if A.ndim > 1:
return uniform_filter1d(A,n)[:,k1:]
return uniform_filter1d(A,n)[k1:]
if A.ndim > 1:
return uniform_filter1d(A,n)[:,k1:-k2]
return uniform_filter1d(A,n)[k1:-k2]
def coherent_sum(func, coord_a, coord_b):
"""
Perform a coherent sum over two arrays :math:`A, B`.
.. math::
\\frac{1}{N_A N_B}\\sum_i\\sum_j f(A_i, B_j)
For numpy arrays this is equal to::
N, d = x.shape
M, d = y.shape
coherent_sum(f, x, y) == f(x.reshape(N, 1, d), x.reshape(1, M, d)).sum()
Args:
func: The function is called for each two items in both arrays, this should return a scalar value.
coord_a, coord_b: The two arrays.
"""
if isinstance(func, FunctionType):
func = numba.jit(func, nopython=True, cache=True)
def cohsum(coord_a, coord_b):
res = 0
for i in range(len(coord_a)):
for j in range(len(coord_b)):
res += func(coord_a[i], coord_b[j])
return res
return cohsum(coord_a, coord_b)
def coherent_histogram(func, coord_a, coord_b, bins, distinct=False):
"""
Compute a coherent histogram over two arrays, equivalent to coherent_sum.
For numpy arrays ofthis is equal to::
N, d = x.shape
M, d = y.shape
bins = np.arange(1, 5, 0.1)
coherent_histogram(f, x, y, bins) == histogram(f(x.reshape(N, 1, d), x.reshape(1, M, d)), bins=bins)
Args:
func: The function is called for each two items in both arrays, this should return a scalar value.
coord_a, coord_b: The two arrays.
bins: The bins used for the histogram must be distributed regular on a linear scale.
"""
if isinstance(func, FunctionType):
func = numba.jit(func, nopython=True, cache=True)
assert np.isclose(np.diff(bins).mean(), np.diff(bins)).all(), 'A regular distribution of bins is required.'
hmin = bins[0]
hmax = bins[-1]
N = len(bins) - 1
dh = (hmax - hmin) / N
def cohsum(coord_a, coord_b):
res = np.zeros((N,))
for i in range(len(coord_a)):
for j in range(len(coord_b)):
if not (distinct and i == j):
h = func(coord_a[i], coord_b[j])
if hmin <= h < hmax:
res[int((h - hmin) / dh)] += 1
return res
return cohsum(coord_a, coord_b)
def Sq_from_gr(r, gr, q, ρ):
r"""
Compute the static structure factor as fourier transform of the pair correlation function. [Yarnell]_
.. math::
S(q) - 1 = \\frac{4\\pi \\rho}{q}\\int\\limits_0^\\infty (g(r) - 1)\\,r \\sin(qr) dr
Args:
r: Radii of the pair correlation function
gr: Values of the pair correlation function
q: List of q values
ρ: Average number density
.. [Yarnell]
Yarnell, J. L., Katz, M. J., Wenzel, R. G., & Koenig, S. H. (1973). Physical Review A, 7(6), 21302144.
http://doi.org/10.1017/CBO9781107415324.004
"""
ydata = ((gr - 1) * r).reshape(-1, 1) * np.sin(r.reshape(-1, 1) * q.reshape(1, -1))
return np.trapz(x=r, y=ydata, axis=0) * (4 * np.pi * ρ / q) + 1
def Fqt_from_Grt(data, q):
"""
Calculate the ISF from the van Hove function for a given q value by fourier transform.
.. math::
F_q(t) = \\int\\limits_0^\\infty dr \\; G(r, t) \\frac{\\sin(qr)}{qr}
Args:
data:
Input data can be a pandas dataframe with columns 'r', 'time' and 'G'
or an array of shape (N, 3), of tuples (r, t, G).
q: Value of q.
Returns:
If input data was a dataframe the result will be returned as one too, else two arrays
will be returned, which will contain times and values of Fq(t) respectively.
"""
if isinstance(data, pd.DataFrame):
df = data.copy()
else:
df = pd.DataFrame(data, columns=['r', 'time', 'G'])
df['isf'] = df['G'] * np.sinc(q / np.pi * df['r'])
isf = df.groupby('time')['isf'].sum()
if isinstance(data, pd.DataFrame):
return pd.DataFrame({'time': isf.index, 'isf': isf.values, 'q': q})
else:
return isf.index, isf.values
'''
@numba.jit
def norm(vec):
return (vec**2).sum()**0.5
'''
def singledispatchmethod(func):
"""A decorator to define a genric instance method, analogue to functools.singledispatch."""
dispatcher = functools.singledispatch(func)
def wrapper(*args, **kw):
return dispatcher.dispatch(args[1].__class__)(*args, **kw)
wrapper.register = dispatcher.register
functools.update_wrapper(wrapper, func)
return wrapper
def histogram(data, bins):
"""Compute the histogram of the given data. Uses numpy.bincount function, if possible."""
dbins = np.diff(bins)
dx = dbins.mean()
if bins.min() == 0 and dbins.std() < 1e-6:
logger.debug("Using numpy.bincount for histogramm compuation.")
hist = np.bincount((data // dx).astype(int), minlength=len(dbins))[:len(dbins)]
else:
hist = np.histogram(data, bins=bins)[0]
return hist, runningmean(bins, 2)
def quick1etau(t, C, n=7):
"""
Estimate the time for a correlation function that goes from 1 to 0 to decay to 1/e.
If successful, returns tau as fine interpolation with a kww fit.
The data is reduce to points around 1/e to remove short and long times from the kww fit!
t is the time
C is C(t) the correlation function
n is the minimum number of points around 1/e required
"""
# first rough estimate, the closest time. This is returned if the interpolation fails!
tau_est = t[np.argmin(np.fabs(C-np.exp(-1)))]
# reduce the data to points around 1/e
k = 0.1
mask = (C < np.exp(-1)+k) & (C > np.exp(-1)-k)
while np.sum(mask) < n:
k += 0.01
mask = (C < np.exp(-1)+k) & (C > np.exp(-1)-k)
if k + np.exp(-1) > 1.0:
break
# if enough points are found, try a curve fit, else and in case of failing keep using the estimate
if np.sum(mask) >= n:
try:
with np.errstate(invalid='ignore'):
fit, _ = curve_fit(kww, t[mask], C[mask], p0=[0.9, tau_est, 0.9], maxfev=100000)
tau_est = kww_1e(*fit)
except:
pass
return tau_est