Moved gromacs module from mdevaluate to pygmx

This commit is contained in:
Niels Müller
2016-06-03 12:34:36 +02:00
parent eeb5c607f8
commit a601636d55
10 changed files with 10017 additions and 0 deletions

64
pygmx/gromacs/__init__.py Normal file
View File

@ -0,0 +1,64 @@
import numpy as np
from numpy import genfromtxt
import itertools
import re
from .reader import XTCReader, TRRReader
from .xtcindex import index_xtcfile
def atoms_from_grofile(file):
""" Deprecated. Use Atoms.from_grofile"""
t = genfromtxt(file, skip_header=2, delimiter=(10, 5), dtype='U8', autostrip=True, skip_footer=1)
return t
def _atoms_from_grofile(file):
t = genfromtxt(file, skip_header=2, delimiter=(5, 5, 5), dtype='U8', autostrip=True, skip_footer=1)
return t
group_re = re.compile('\[ ([-+\w]+) \]')
def load_indices(indexfile):
indices = {}
index_array = None
with open(indexfile) as idx_file:
for line in idx_file:
m = group_re.search(line)
if m is not None:
group_name = m.group(1)
index_array = indices.get(group_name, [])
indices[group_name] = index_array
else:
elements = line.strip().split('\t')
elements = [x.split(' ') for x in elements]
elements = itertools.chain(*elements) # make a flat iterator
elements = [x for x in elements if x != '']
index_array += [int(x) - 1 for x in elements]
return indices
def load_index(indexfile, index_names):
"""
Load an indexfile, returns tuple of indices for index_names
"""
index_arrays = [[] for _ in index_names]
index_array = [] # This array will contain all indices before the first index_group. Should be empty
with open(indexfile) as idx_file:
for line in idx_file:
if line.startswith("["):
for i, index in enumerate(index_names):
if line.startswith("[ " + index + " ]"):
index_array = index_arrays[i]
break
index_array = None
else:
elements = line.strip().split('\t')
elements = [x.split(' ') for x in elements]
elements = itertools.chain(*elements) # make a flat iterator
elements = [x for x in elements if x != '']
index_array += [int(x) - 1 for x in elements]
return index_arrays

351
pygmx/gromacs/compression.c Normal file
View File

@ -0,0 +1,351 @@
#include <stdio.h>
#include <memory.h>
#include <stdlib.h>
extern void test(int *b) {
b[0] = 1;
b[1] = 0;
b[2] = 2;
}
static const int magicints[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 10, 12, 16, 20, 25, 32, 40, 50, 64,
80, 101, 128, 161, 203, 256, 322, 406, 512, 645, 812, 1024, 1290,
1625, 2048, 2580, 3250, 4096, 5060, 6501, 8192, 10321, 13003,
16384, 20642, 26007, 32768, 41285, 52015, 65536,82570, 104031,
131072, 165140, 208063, 262144, 330280, 416127, 524287, 660561,
832255, 1048576, 1321122, 1664510, 2097152, 2642245, 3329021,
4194304, 5284491, 6658042, 8388607, 10568983, 13316085, 16777216
};
#define FIRSTIDX 9
/* note that magicints[FIRSTIDX-1] == 0 */
#define LASTIDX (sizeof(magicints) / sizeof(*magicints))
/* Internal support routines for reading/writing compressed coordinates
* sizeofint - calculate smallest number of bits necessary
* to represent a certain integer.
*/
static int
sizeofint(int size) {
unsigned int num = 1;
int num_of_bits = 0;
while (size >= num && num_of_bits < 32)
{
num_of_bits++;
num <<= 1;
}
return num_of_bits;
}
/*
* sizeofints - calculate 'bitsize' of compressed ints
*
* given a number of small unsigned integers and the maximum value
* return the number of bits needed to read or write them with the
* routines encodeints/decodeints. You need this parameter when
* calling those routines.
* (However, in some cases we can just use the variable 'smallidx'
* which is the exact number of bits, and them we dont need to call
* this routine).
*/
static int
sizeofints(int num_of_ints, unsigned int sizes[])
{
int i, num;
unsigned int num_of_bytes, num_of_bits, bytes[32], bytecnt, tmp;
num_of_bytes = 1;
bytes[0] = 1;
num_of_bits = 0;
for (i=0; i < num_of_ints; i++)
{
tmp = 0;
for (bytecnt = 0; bytecnt < num_of_bytes; bytecnt++)
{
tmp = bytes[bytecnt] * sizes[i] + tmp;
bytes[bytecnt] = tmp & 0xff;
tmp >>= 8;
}
while (tmp != 0)
{
bytes[bytecnt++] = tmp & 0xff;
tmp >>= 8;
}
num_of_bytes = bytecnt;
}
num = 1;
num_of_bytes--;
while (bytes[num_of_bytes] >= num)
{
num_of_bits++;
num *= 2;
}
return num_of_bits + num_of_bytes * 8;
}
/*
* decodebits - decode number from buf using specified number of bits
*
* extract the number of bits from the array buf and construct an integer
* from it. Return that value.
*
*/
static int
decodebits(int state[], int _buf[], int num_of_bits)
{
int cnt, num;
unsigned int lastbits, lastbyte;
int mask = (1 << num_of_bits) -1;
unsigned char * cbuf = (unsigned char *)_buf;
cnt = state[0];
lastbits = (unsigned int) state[1];
lastbyte = (unsigned int) state[2];
num = 0;
while (num_of_bits >= 8)
{
lastbyte = ( lastbyte << 8 ) | cbuf[cnt++];
num |= (lastbyte >> lastbits) << (num_of_bits - 8);
num_of_bits -=8;
}
if (num_of_bits > 0)
{
if (lastbits < num_of_bits)
{
lastbits += 8;
lastbyte = (lastbyte << 8) | cbuf[cnt++];
}
lastbits -= num_of_bits;
num |= (lastbyte >> lastbits) & ((1 << num_of_bits) -1);
}
num &= mask;
state[0] = cnt;
state[1] = lastbits;
state[2] = lastbyte;
return num;
}
/*
* decodeints - decode 'small' integers from the buf array
*
* this routine is the inverse from encodeints() and decodes the small integers
* written to buf by calculating the remainder and doing divisions with
* the given sizes[]. You need to specify the total number of bits to be
* used from buf in num_of_bits.
*
*/
static void
decodeints(int state[], int buf[], int num_of_ints, int num_of_bits,
unsigned int sizes[], int nums[])
{
int bytes[32];
int i, j, num_of_bytes, p, num;
bytes[1] = bytes[2] = bytes[3] = 0;
num_of_bytes = 0;
while (num_of_bits > 8)
{
bytes[num_of_bytes++] = decodebits(state, buf, 8);
num_of_bits -= 8;
}
if (num_of_bits > 0)
{
bytes[num_of_bytes++] = decodebits(state, buf, num_of_bits);
}
for (i = num_of_ints-1; i > 0; i--)
{
num = 0;
for (j = num_of_bytes-1; j >=0; j--)
{
num = (num << 8) | bytes[j];
p = num / sizes[i];
bytes[j] = p;
num = num - p * sizes[i];
}
nums[i] = num;
}
nums[0] = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24);
}
extern int
xdrfile_decompress_coord_float(float *coordinates,
int size,
float precision,
int minint[3],
int maxint[3],
int smallidx,
char* compressed_blob_,
size_t blob_len,
size_t * readed_len)
{
int *compressed_blob = (int*)compressed_blob_;
int *lip;
unsigned sizeint[3], sizesmall[3], bitsizeint[3], size3;
int k, *buf1 , flag;
int smallnum, smaller, i, is_smaller, run;
float *lfp, inv_precision;
int tmp, *thiscoord, prevcoord[3];
unsigned int bitsize;
bitsizeint[0] = 0;
bitsizeint[1] = 0;
bitsizeint[2] = 0;
size3 = size * 3;
if((buf1=(int *)malloc(sizeof(int)*size3))==NULL)
{
fprintf(stderr,"Cannot allocate memory for decompressing coordinates.\n");
return -1;
}
/* Dont bother with compression for three atoms or less */
if(size<=9)
{
// TODO...
/* return number of coords, not floats */
}
sizeint[0] = maxint[0] - minint[0]+1;
sizeint[1] = maxint[1] - minint[1]+1;
sizeint[2] = maxint[2] - minint[2]+1;
/* check if one of the sizes is to big to be multiplied */
if ((sizeint[0] | sizeint[1] | sizeint[2] ) > 0xffffff)
{
bitsizeint[0] = sizeofint(sizeint[0]);
bitsizeint[1] = sizeofint(sizeint[1]);
bitsizeint[2] = sizeofint(sizeint[2]);
bitsize = 0; /* flag the use of large sizes */
}
else
{
bitsize = sizeofints(3, sizeint);
}
tmp = smallidx+8;
tmp = smallidx-1;
tmp = (FIRSTIDX>tmp) ? FIRSTIDX : tmp;
smaller = magicints[tmp] / 2;
smallnum = magicints[smallidx] / 2;
sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
lfp = coordinates;
inv_precision = 1.0 / precision;
run = 0;
i = 0;
lip = buf1;
int state[3] = {0,0,0};
while ( i < size )
{
thiscoord = (int *)(lip) + i * 3;
if (bitsize == 0)
{
thiscoord[0] = decodebits(state, compressed_blob, bitsizeint[0]);
thiscoord[1] = decodebits(state, compressed_blob, bitsizeint[1]);
thiscoord[2] = decodebits(state, compressed_blob, bitsizeint[2]);
}
else
{
decodeints(state, compressed_blob, 3, bitsize, sizeint, thiscoord);
}
i++;
thiscoord[0] += minint[0];
thiscoord[1] += minint[1];
thiscoord[2] += minint[2];
prevcoord[0] = thiscoord[0];
prevcoord[1] = thiscoord[1];
prevcoord[2] = thiscoord[2];
flag = decodebits(state, compressed_blob, 1);
is_smaller = 0;
if (flag == 1)
{
run = decodebits(state, compressed_blob, 5);
is_smaller = run % 3;
run -= is_smaller;
is_smaller--;
}
if (run > 0)
{
thiscoord += 3;
for (k = 0; k < run; k+=3)
{
decodeints(state, compressed_blob, 3, smallidx, sizesmall, thiscoord);
i++;
thiscoord[0] += prevcoord[0] - smallnum;
thiscoord[1] += prevcoord[1] - smallnum;
thiscoord[2] += prevcoord[2] - smallnum;
if (k == 0) {
/* interchange first with second atom for better
* compression of water molecules
*/
tmp = thiscoord[0]; thiscoord[0] = prevcoord[0];
prevcoord[0] = tmp;
tmp = thiscoord[1]; thiscoord[1] = prevcoord[1];
prevcoord[1] = tmp;
tmp = thiscoord[2]; thiscoord[2] = prevcoord[2];
prevcoord[2] = tmp;
*lfp++ = prevcoord[0] * inv_precision;
*lfp++ = prevcoord[1] * inv_precision;
*lfp++ = prevcoord[2] * inv_precision;
} else {
prevcoord[0] = thiscoord[0];
prevcoord[1] = thiscoord[1];
prevcoord[2] = thiscoord[2];
}
*lfp++ = thiscoord[0] * inv_precision;
*lfp++ = thiscoord[1] * inv_precision;
*lfp++ = thiscoord[2] * inv_precision;
}
}
else
{
*lfp++ = thiscoord[0] * inv_precision;
*lfp++ = thiscoord[1] * inv_precision;
*lfp++ = thiscoord[2] * inv_precision;
}
smallidx += is_smaller;
if (is_smaller < 0)
{
smallnum = smaller;
if (smallidx > FIRSTIDX)
{
smaller = magicints[smallidx - 1] /2;
}
else
{
smaller = 0;
}
}
else if (is_smaller > 0)
{
smaller = smallnum;
smallnum = magicints[smallidx] / 2;
}
sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
}
free((void*)buf1);
*readed_len = state[0];
return 0;
}

View File

@ -0,0 +1,12 @@
#include <stdio.h>
int
xdrfile_decompress_coord_float(float *coordinates,
int size,
float precision,
int minint[3],
int maxint[3],
int smallidx,
int * compressed_blob,
size_t blob_len,
size_t * readed_len);

6521
pygmx/gromacs/coordinates.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,47 @@
# distutils: sources = mdevaluate/gromacs/compression.c
from cython cimport view
from array import array
import numpy as np
cimport numpy as np
cdef extern from "compression.h":
int xdrfile_decompress_coord_float(float *coordinates,
int size,
float precision,
int minint[3],
int maxint[3],
int smallidx,
char * compressed_blob,
size_t blob_len,
size_t * readed_len)
def decompress(int size, float precision, minint, maxint, int smallidx, bytes blob):
assert len(minint) == 3
assert len(maxint) == 3
cdef int minints[3]
cdef int maxints[3]
for i in range(3):
minints[i] = minint[i]
maxints[i] = maxint[i]
cdef np.ndarray[float, ndim=2] coordinates = np.empty((size,3), dtype=np.float32)
cdef size_t readed
xdrfile_decompress_coord_float(
<float *>coordinates.data,
size,
precision,
maxints,
minints,
smallidx,
blob,
len(blob),
&readed)
return coordinates

2340
pygmx/gromacs/logarithmic.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
from libc.math cimport floor, ceil, fabs, log10
def is_log_step(long long step, long long per_decade):
# The first step will never be written
if step == 0:
return True
if step == 1:
return True
cdef double log_step = log10(step)
cdef double log_next = log10(step+1)
cdef double log_prev = log10(step-1)
cdef long long decade = int(log10(step))
# Idea: Is there a number x = decade + n / per_decade
# with 0 <= n < per_decade where
# | log_step - x | <= | log_prev - x |
# | log_step - x | < | log_next - x |
# Calculate the closest numbers
cdef double n_smaller = floor((log_step - decade) * per_decade);
cdef double n_larger = ceil((log_step - decade) * per_decade);
cdef double prev_dist = fabs(log_prev-decade-n_smaller/per_decade);
cdef double dist_to_smaller = fabs(log_step-decade-n_smaller/per_decade);
cdef double next_dist = fabs(log_next-decade-n_larger/per_decade);
cdef double dist_to_larger = fabs(log_step-decade-n_larger/per_decade);
if dist_to_larger < next_dist or dist_to_smaller <= prev_dist:
return True
else:
return False

511
pygmx/gromacs/reader.py Normal file
View File

@ -0,0 +1,511 @@
import xdrlib
import io
import os
from .coordinates import decompress
from ..utils import hash_anything, merge_hashes
from functools import partial
import numpy as np
TRR_MAGIC = 1993
XTC_MAGIC = 1995
INDEX_MAGIC = 2015
def index_filename_for_xtc(xtcfile):
"""
Get the filename for the index file of a xtc file.
In general, the index file is located in the same directory as the xtc file.
If the xtc file is located in a read-only directory (for the current user)
and does not exist, the index file will be instead located in a subdirectory
of ~/.xtcindex, of the current user.
The directory for user index files can be changed by setting the environment
variable 'XTCINDEXDIR'.
Example:
xtcfile = '/data/niels/test.xtc'
# case 1: index file exists, or current user is niels
index_filename_for_xtc(xtcfile) == '/data/niels/.test.xtc.xtcindex'
# case 2: index file doesn't exist, and nor write permission in /data/niels
index_filename_for_xtc(xtcfile) == '~/.xtcindex/data/niels/.test.xtc.xtcindex'
Warning:
At this point, the index file is not checked for validity. If an invalid
index file is present in the xtc files directory, this will be used and an
error will be risen by XTCReader.
Warning:
On most systems, the home directory is on the local drive so that the indexing
has probably to do be done on every system if it can not be saved in the directory
of the xtc file.
"""
base = os.path.basename(xtcfile)
filename = "." + base + ".xtcindex"
dir = os.path.abspath(os.path.dirname(xtcfile))
if not os.path.exists(os.path.join(dir, filename)) and not os.access(dir, os.W_OK):
if 'XTCINDEXDIR' in os.environ:
index_dir = os.environ['XTCINDEXDIR']
else:
index_dir = os.path.join(os.environ['HOME'], '.xtcindex')
dir = os.path.join(index_dir, dir.lstrip('/'))
os.makedirs(dir, exist_ok=True)
return os.path.join(dir, filename)
class NumpyUnpacker(xdrlib.Unpacker):
def unpack_float_array(self, n_items):
i = self.get_position()
j = i + 4 * n_items
self.set_position(j)
data = self.get_buffer()[i:j]
if len(data) < 4:
raise EOFError
ret = np.frombuffer(data, '>f')
if len(ret) != n_items:
raise EOFError
return ret
def unpack_double_array(self, n_items):
i = self.get_position()
j = i + 8 * n_items
self.set_position(j)
data = self.get_buffer()[i:j]
if len(data) < 8:
raise EOFError
ret = np.frombuffer(data, '>d')
if len(ret) != n_items:
raise EOFError
return ret
class InvalidMagicException(Exception):
pass
class InvalidIndexException(Exception):
pass
class UnknownLenError(Exception):
pass
class SubscriptableReader:
def __init__(self, fd):
self.fd = fd
self.len = os.stat(self.fd.fileno()).st_size
def __getitem__(self, r):
if isinstance(r, slice):
if r.start is not None:
if r.start < 0:
self.fd.seek(r.start, io.SEEK_END)
else:
self.fd.seek(r.start, io.SEEK_SET)
if r.step is not None:
raise NotImplementedError
return self.fd.read(r.stop - r.start)
else:
self.fd.seek(r, io.SEEK_SET)
return self.fd.read(1)
def __len__(self):
return self.len
class XTCFrame:
__slots__ = ['_coordinates', 'index', 'time', 'box', '_compressed_coordinates']
def __init__(self):
self._coordinates = None
@property
def coordinates(self):
if self._coordinates is None:
self._coordinates = self._compressed_coordinates()
return self._coordinates
class BaseReader:
def __init__(self, file):
self.fd = open(file, 'rb')
self.filename = file
self.reader = SubscriptableReader(self.fd)
self.xdr_reader = NumpyUnpacker(self.reader)
def __exit__(self, *_):
self.fd.close()
def __enter__(self, *_):
return self
def __getstate__(self):
state = self.__dict__.copy()
del state['fd']
del state['reader']
del state['xdr_reader']
state['filepos'] = self.get_position()
return state
def __setstate__(self, state):
filepos = state.pop('filepos')
self.__dict__.update(state)
self.fd = open(self.filename, 'rb')
self.reader = SubscriptableReader(self.fd)
self.xdr_reader = NumpyUnpacker(self.reader)
self.set_position(filepos)
def get_position(self):
return self.fd.tell()
def set_position(self, pos):
return self.fd.seek(pos)
def skip_frames(self, num):
for i in range(num):
self.skip_frame()
def skip_frame(self):
self._read_header()
self._read_frame()
def dump_frame(self):
raise NotImplemented
def _read_header(self):
raise NotImplemented
def _read_frame(self):
raise NotImplemented
XTC_HEADER_SIZE = 4 * 4
XTC_FRAME_HEADER_SIZE = 9 * 4 + 10 * 4
class XTCReader(BaseReader):
len_available = False
def __init__(self, xtcfile, load_cache_file=True):
super().__init__(xtcfile)
self._cache = [0]
self._times = None
self.current_frame = 0
index_file_name = index_filename_for_xtc(xtcfile)
if load_cache_file:
self.load_index(index_file_name)
def load_index(self, filename):
xtc_stat = os.stat(self.filename)
c_time = int(xtc_stat.st_ctime)
m_time = int(xtc_stat.st_mtime)
size = xtc_stat.st_size
with open(filename, 'rb') as index_file_fd:
# TODO: Is NumpyUnpacker even necessary at this point?
# Seems like xdrlib.Unpacker would be sufficient here ...
reader = NumpyUnpacker(SubscriptableReader(index_file_fd))
if reader.unpack_hyper() != INDEX_MAGIC:
raise InvalidMagicException
if reader.unpack_hyper() != c_time:
raise InvalidIndexException
if reader.unpack_hyper() != m_time:
raise InvalidIndexException
if reader.unpack_hyper() != size:
raise InvalidIndexException
self._cache = []
self._times = []
try:
while True:
self._cache.append(reader.unpack_hyper())
self._times.append(reader.unpack_float())
except EOFError:
pass
self.len_available = True
def _raw_header(self):
if len(self._cache) == self.current_frame:
self._cache.append(self.get_position())
return self.fd.read(XTC_HEADER_SIZE)
def _raw_frame(self):
frame_header = self.fd.read(XTC_FRAME_HEADER_SIZE)
blob_size = xdrlib.Unpacker(frame_header[-4:]).unpack_int()
blob_size = (blob_size + 3) // 4 * 4 # Padding to 4 bytes
frame_blob = self.fd.read(blob_size)
self.current_frame += 1
return frame_header + frame_blob
def _unpack_header(self, raw_header):
unpacker = xdrlib.Unpacker(raw_header)
magic = unpacker.unpack_int()
if magic != XTC_MAGIC:
raise InvalidMagicException
n_atoms = unpacker.unpack_int()
step = unpacker.unpack_int()
time = unpacker.unpack_float()
return n_atoms, step, time
def _read_header(self):
raw_header = self._raw_header()
return self._unpack_header(raw_header)
def _unpack_frame(self, raw_frame):
unpacker = xdrlib.Unpacker(raw_frame)
raw_box = unpacker.unpack_farray(9, unpacker.unpack_float)
box = np.array(raw_box).reshape(3, 3)
num_coords = unpacker.unpack_int()
precision = unpacker.unpack_float()
maxint = unpacker.unpack_farray(3, unpacker.unpack_int)
minint = unpacker.unpack_farray(3, unpacker.unpack_int)
smallindex = unpacker.unpack_int()
blob_len = unpacker.unpack_int()
blob = unpacker.unpack_fopaque(blob_len)
return box, precision, num_coords, minint, maxint, smallindex, blob
def _read_frame(self):
raw_frame = self._raw_frame()
return self._unpack_frame(raw_frame)
def decode_frame(self, header=None, body=None):
n_atoms, step, time = header or self._read_header()
box, precision, num_coords, minint, maxint, smallindex, blob = body or self._read_frame()
coordinates = partial(decompress, num_coords, precision, minint, maxint, smallindex, blob)
frame = XTCFrame()
frame._compressed_coordinates = coordinates
frame.index = step
frame.time = time
frame.box = box
return frame
def dump_frame(self):
"""
:return: Tuple: The binary data of the frame, the frame itself
"""
raw_header = self._raw_header()
header = self._unpack_header(raw_header)
raw_body = self._raw_frame()
body = self._unpack_frame(raw_body)
return (raw_header + raw_body, self.decode_frame(header, body))
def cached_position(self, item):
try:
return self._cache[item]
except IndexError:
return None
def __getitem__(self, item):
position = self.cached_position(item)
if position is not None:
self.set_position(position)
self.current_frame = item
return self.decode_frame()
# TODO: Use elif and one single return at the end
# Also: skip_frames is in fact not implemented at all!
# TODO: Catch EOFError and raise IndexError
if self.current_frame <= item:
self.skip_frames(item - self.current_frame)
return self.decode_frame()
else:
self.set_position(0)
self.current_frame = 0
self.skip_frames(item)
return self.decode_frame()
def __len__(self):
if self.len_available:
return len(self._cache)
raise UnknownLenError
def times_of_indices(self, indices):
return [self[i].time for i in indices]
def __hash__(self):
return merge_hashes(hash_anything(self.filename), hash_anything(self._cache))
class TRRHeader:
__slots__ = ['ir_size', 'e_size', 'box_size', 'vir_size', 'pres_size', 'top_size', 'sym_size', 'x_size', 'v_size',
'f_size', 'n_atoms', 'step', 'nre', 't', '_lambda', 'is_double']
@property
def frame_size(self):
return self.ir_size + \
self.e_size + \
self.box_size + \
self.vir_size + \
self.top_size + \
self.sym_size + \
self.x_size + \
self.v_size + \
self.f_size
class TRRFrame:
__slots__ = ['header', 'box', 'x', 'v', 'f']
class TRRReader(BaseReader):
def __iter__(self):
return TRRIterator(self.filename)
def _unpack_float(self, is_double):
if is_double:
return self.xdr_reader.unpack_double()
else:
return self.xdr_reader.unpack_float()
def _unpack_np_float(self, is_double, *dim):
total = np.product(dim)
if is_double:
box = self.xdr_reader.unpack_double_array(total)
else:
box = self.xdr_reader.unpack_float_array(total)
box = box.reshape(*dim)
return box
def _read_header(self):
data = self.fd.read(8) # 4 Magic + 4 VersionStringLen
unpacker = NumpyUnpacker(data)
magic = unpacker.unpack_int()
if magic != TRR_MAGIC:
raise InvalidMagicException
version_string_len = unpacker.unpack_int()
data = self.fd.read(version_string_len + 55) # 4 Magic + 4 VersionStringLen
unpacker.reset(data)
version = unpacker.unpack_string()
assert version == b'GMX_trn_file'
header = TRRHeader()
header.ir_size = unpacker.unpack_int()
header.e_size = unpacker.unpack_int()
header.box_size = unpacker.unpack_int()
header.vir_size = unpacker.unpack_int()
header.pres_size = unpacker.unpack_int()
header.top_size = unpacker.unpack_int()
header.sym_size = unpacker.unpack_int()
header.x_size = unpacker.unpack_int()
header.v_size = unpacker.unpack_int()
header.f_size = unpacker.unpack_int()
header.n_atoms = unpacker.unpack_int()
header.step = unpacker.unpack_int()
header.nre = unpacker.unpack_int()
if header.x_size is not None:
header.is_double = (header.x_size // header.n_atoms) == 8
elif header.v_size is not None:
header.is_double = (header.v_size // header.n_atoms) == 8
elif header.f_size is not None:
header.is_double = (header.f_size // header.n_atoms) == 8
if header.is_double:
data = self.fd.read(16)
else:
data = self.fd.read(8)
unpacker.reset(data)
self.xdr_reader = unpacker
header.t = self._unpack_float(header.is_double)
header._lambda = self._unpack_float(header.is_double)
return header
def _read_frame(self, header):
frame = TRRFrame()
frame.header = header
data = self.fd.read(header.frame_size)
self.xdr_reader = NumpyUnpacker(data)
frame.box = None
if header.box_size:
frame.box = self._unpack_np_float(header.is_double, 3, 3)
if header.vir_size:
for i in range(9):
self._unpack_float(header.is_double)
if header.pres_size:
for i in range(9):
self._unpack_float(header.is_double)
frame.x = None
if header.x_size:
frame.x = self._unpack_np_float(header.is_double, header.n_atoms, 3)
frame.v = None
if header.v_size:
frame.v = self._unpack_np_float(header.is_double, header.n_atoms, 3)
frame.f = None
if header.f_size:
frame.f = self._unpack_np_float(header.is_double, header.n_atoms, 3)
return frame
def decode_frame(self):
h = self._read_header()
return self._read_frame(h)
def get_position(self):
return self.fd.tell()
def set_position(self, pos):
self.fd.seek(pos)
class TRRIterator():
def __init__(self, file):
self.__reader = TRRReader(file)
self.__reader.__enter__()
def __next__(self):
try:
return self.__reader.decode_frame()
except EOFError as err:
self.__reader.__exit__()
raise StopIteration
def __iter__(self):
return self

71
pygmx/gromacs/xtcdemux.py Normal file
View File

@ -0,0 +1,71 @@
import sys
import os
from .logarithmic import is_log_step
from .reader import XTCReader
def next_log_step(step, per_decade):
step += 1
while not is_log_step(step, per_decade): step += 1
return step
def next_lin_step(step, frequency):
return step+frequency
class LogWriter:
current_offset = 0
log_step = 0
fd = None
def __init__(self, file, offset, log_freq):
self.fd = open(file,'wb')
self.offset = offset
self.log_freq = log_freq
def consume_frame(self, step, raw_frame):
if step > self.log_step+self.offset:
raise Exception("Missing frame {}".format(step))
if step == self.log_step+self.offset:
self.fd.write(raw_frame)
self.log_step = next_log_step(self.log_step, self.log_freq)
def main():
filename = sys.argv[1]
reader = XTCReader(filename, load_cache_file=False)
lin_freq = int(sys.argv[2])
log_freq = int(sys.argv[3])
log_restart = int(sys.argv[4])
base,ext = os.path.splitext(filename)
lin_file = base + '.lin' + ext
lin_step = 0
log_writers = []
with open(lin_file,'wb') as lin_fd:
try:
while True:
raw_frame, frame = reader.dump_frame()
if frame.index % log_restart == 0:
print('Starting new log band: {}'.format(frame.index))
log_filename = '{}.{}.log{}'.format(base, len(log_writers), ext)
log_writers.append(LogWriter(log_filename, frame.index, log_freq))
if frame.index > lin_step:
raise Exception("Missing frames")
if frame.index == lin_step:
lin_fd.write(raw_frame)
lin_step = next_lin_step(lin_step, lin_freq)
for writer in log_writers:
writer.consume_frame(frame.index, raw_frame)
except EOFError:
pass
if __name__ == '__main__':
main()

63
pygmx/gromacs/xtcindex.py Normal file
View File

@ -0,0 +1,63 @@
from .reader import XTCReader, index_filename_for_xtc, INDEX_MAGIC
from xdrlib import Packer
import os
import sys
import logging
def index_xtcfile(filename):
"""
Write an index-file for a xtc-file.
The resulting file will have the same name, except for the new extension .xtcindex.
This function has to be called only once for every xtcfile.
Args:
filename (str): Name of the xtc-file
"""
# TODO: Would it be possible, to index files on the fly?
# How long does indexing take?
index_filename = index_filename_for_xtc(filename)
print('Writing index file: {}'.format(index_filename))
packer = Packer()
xtc_stat = os.stat(filename)
c_time = int(xtc_stat.st_ctime)
m_time = int(xtc_stat.st_mtime)
size = xtc_stat.st_size
with XTCReader(filename, load_cache_file=False) as reader, open(index_filename, 'wb') as idx_fd:
packer.pack_hyper(INDEX_MAGIC)
# TODO Maybe store only the hashsum of the file for identification?
packer.pack_hyper(c_time)
packer.pack_hyper(m_time)
packer.pack_hyper(size)
count = 0
try:
while True:
if count % 300 == 0:
print("Frame {}".format(count), end="\r")
position = reader.get_position()
frame = reader.decode_frame()
packer.pack_hyper(position)
packer.pack_float(frame.time)
count += 1
except EOFError:
pass
idx_fd.write(packer.get_buffer())
packer.reset()
def main():
if len(sys.argv) < 2:
print("Usage {} xtc-file [xtc-file]".format(sys.argv[0]))
return -1
for filename in sys.argv[1:]:
index_xtcfile(filename)
if __name__ == '__main__':
sys.exit(main())