Moved gromacs module from mdevaluate to pygmx
This commit is contained in:
64
pygmx/gromacs/__init__.py
Normal file
64
pygmx/gromacs/__init__.py
Normal file
@ -0,0 +1,64 @@
|
||||
import numpy as np
|
||||
from numpy import genfromtxt
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .reader import XTCReader, TRRReader
|
||||
from .xtcindex import index_xtcfile
|
||||
|
||||
def atoms_from_grofile(file):
|
||||
""" Deprecated. Use Atoms.from_grofile"""
|
||||
t = genfromtxt(file, skip_header=2, delimiter=(10, 5), dtype='U8', autostrip=True, skip_footer=1)
|
||||
return t
|
||||
|
||||
|
||||
def _atoms_from_grofile(file):
|
||||
t = genfromtxt(file, skip_header=2, delimiter=(5, 5, 5), dtype='U8', autostrip=True, skip_footer=1)
|
||||
return t
|
||||
|
||||
|
||||
group_re = re.compile('\[ ([-+\w]+) \]')
|
||||
|
||||
|
||||
def load_indices(indexfile):
|
||||
indices = {}
|
||||
index_array = None
|
||||
with open(indexfile) as idx_file:
|
||||
for line in idx_file:
|
||||
m = group_re.search(line)
|
||||
if m is not None:
|
||||
group_name = m.group(1)
|
||||
index_array = indices.get(group_name, [])
|
||||
indices[group_name] = index_array
|
||||
else:
|
||||
elements = line.strip().split('\t')
|
||||
elements = [x.split(' ') for x in elements]
|
||||
elements = itertools.chain(*elements) # make a flat iterator
|
||||
elements = [x for x in elements if x != '']
|
||||
index_array += [int(x) - 1 for x in elements]
|
||||
return indices
|
||||
|
||||
|
||||
def load_index(indexfile, index_names):
|
||||
"""
|
||||
Load an indexfile, returns tuple of indices for index_names
|
||||
"""
|
||||
index_arrays = [[] for _ in index_names]
|
||||
index_array = [] # This array will contain all indices before the first index_group. Should be empty
|
||||
|
||||
with open(indexfile) as idx_file:
|
||||
for line in idx_file:
|
||||
if line.startswith("["):
|
||||
for i, index in enumerate(index_names):
|
||||
if line.startswith("[ " + index + " ]"):
|
||||
index_array = index_arrays[i]
|
||||
break
|
||||
index_array = None
|
||||
else:
|
||||
elements = line.strip().split('\t')
|
||||
elements = [x.split(' ') for x in elements]
|
||||
|
||||
elements = itertools.chain(*elements) # make a flat iterator
|
||||
elements = [x for x in elements if x != '']
|
||||
index_array += [int(x) - 1 for x in elements]
|
||||
return index_arrays
|
351
pygmx/gromacs/compression.c
Normal file
351
pygmx/gromacs/compression.c
Normal file
@ -0,0 +1,351 @@
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
extern void test(int *b) {
|
||||
b[0] = 1;
|
||||
b[1] = 0;
|
||||
b[2] = 2;
|
||||
}
|
||||
|
||||
static const int magicints[] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 10, 12, 16, 20, 25, 32, 40, 50, 64,
|
||||
80, 101, 128, 161, 203, 256, 322, 406, 512, 645, 812, 1024, 1290,
|
||||
1625, 2048, 2580, 3250, 4096, 5060, 6501, 8192, 10321, 13003,
|
||||
16384, 20642, 26007, 32768, 41285, 52015, 65536,82570, 104031,
|
||||
131072, 165140, 208063, 262144, 330280, 416127, 524287, 660561,
|
||||
832255, 1048576, 1321122, 1664510, 2097152, 2642245, 3329021,
|
||||
4194304, 5284491, 6658042, 8388607, 10568983, 13316085, 16777216
|
||||
};
|
||||
|
||||
#define FIRSTIDX 9
|
||||
/* note that magicints[FIRSTIDX-1] == 0 */
|
||||
#define LASTIDX (sizeof(magicints) / sizeof(*magicints))
|
||||
|
||||
/* Internal support routines for reading/writing compressed coordinates
|
||||
* sizeofint - calculate smallest number of bits necessary
|
||||
* to represent a certain integer.
|
||||
*/
|
||||
static int
|
||||
sizeofint(int size) {
|
||||
unsigned int num = 1;
|
||||
int num_of_bits = 0;
|
||||
|
||||
while (size >= num && num_of_bits < 32)
|
||||
{
|
||||
num_of_bits++;
|
||||
num <<= 1;
|
||||
}
|
||||
return num_of_bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* sizeofints - calculate 'bitsize' of compressed ints
|
||||
*
|
||||
* given a number of small unsigned integers and the maximum value
|
||||
* return the number of bits needed to read or write them with the
|
||||
* routines encodeints/decodeints. You need this parameter when
|
||||
* calling those routines.
|
||||
* (However, in some cases we can just use the variable 'smallidx'
|
||||
* which is the exact number of bits, and them we dont need to call
|
||||
* this routine).
|
||||
*/
|
||||
static int
|
||||
sizeofints(int num_of_ints, unsigned int sizes[])
|
||||
{
|
||||
int i, num;
|
||||
unsigned int num_of_bytes, num_of_bits, bytes[32], bytecnt, tmp;
|
||||
num_of_bytes = 1;
|
||||
bytes[0] = 1;
|
||||
num_of_bits = 0;
|
||||
for (i=0; i < num_of_ints; i++)
|
||||
{
|
||||
tmp = 0;
|
||||
for (bytecnt = 0; bytecnt < num_of_bytes; bytecnt++)
|
||||
{
|
||||
tmp = bytes[bytecnt] * sizes[i] + tmp;
|
||||
bytes[bytecnt] = tmp & 0xff;
|
||||
tmp >>= 8;
|
||||
}
|
||||
while (tmp != 0)
|
||||
{
|
||||
bytes[bytecnt++] = tmp & 0xff;
|
||||
tmp >>= 8;
|
||||
}
|
||||
num_of_bytes = bytecnt;
|
||||
}
|
||||
num = 1;
|
||||
num_of_bytes--;
|
||||
while (bytes[num_of_bytes] >= num)
|
||||
{
|
||||
num_of_bits++;
|
||||
num *= 2;
|
||||
}
|
||||
return num_of_bits + num_of_bytes * 8;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* decodebits - decode number from buf using specified number of bits
|
||||
*
|
||||
* extract the number of bits from the array buf and construct an integer
|
||||
* from it. Return that value.
|
||||
*
|
||||
*/
|
||||
|
||||
static int
|
||||
decodebits(int state[], int _buf[], int num_of_bits)
|
||||
{
|
||||
|
||||
int cnt, num;
|
||||
unsigned int lastbits, lastbyte;
|
||||
int mask = (1 << num_of_bits) -1;
|
||||
unsigned char * cbuf = (unsigned char *)_buf;
|
||||
cnt = state[0];
|
||||
lastbits = (unsigned int) state[1];
|
||||
lastbyte = (unsigned int) state[2];
|
||||
|
||||
num = 0;
|
||||
while (num_of_bits >= 8)
|
||||
{
|
||||
lastbyte = ( lastbyte << 8 ) | cbuf[cnt++];
|
||||
num |= (lastbyte >> lastbits) << (num_of_bits - 8);
|
||||
num_of_bits -=8;
|
||||
}
|
||||
if (num_of_bits > 0)
|
||||
{
|
||||
if (lastbits < num_of_bits)
|
||||
{
|
||||
lastbits += 8;
|
||||
lastbyte = (lastbyte << 8) | cbuf[cnt++];
|
||||
}
|
||||
lastbits -= num_of_bits;
|
||||
num |= (lastbyte >> lastbits) & ((1 << num_of_bits) -1);
|
||||
}
|
||||
num &= mask;
|
||||
state[0] = cnt;
|
||||
state[1] = lastbits;
|
||||
state[2] = lastbyte;
|
||||
return num;
|
||||
}
|
||||
|
||||
/*
|
||||
* decodeints - decode 'small' integers from the buf array
|
||||
*
|
||||
* this routine is the inverse from encodeints() and decodes the small integers
|
||||
* written to buf by calculating the remainder and doing divisions with
|
||||
* the given sizes[]. You need to specify the total number of bits to be
|
||||
* used from buf in num_of_bits.
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
decodeints(int state[], int buf[], int num_of_ints, int num_of_bits,
|
||||
unsigned int sizes[], int nums[])
|
||||
{
|
||||
|
||||
int bytes[32];
|
||||
int i, j, num_of_bytes, p, num;
|
||||
|
||||
bytes[1] = bytes[2] = bytes[3] = 0;
|
||||
num_of_bytes = 0;
|
||||
while (num_of_bits > 8)
|
||||
{
|
||||
bytes[num_of_bytes++] = decodebits(state, buf, 8);
|
||||
num_of_bits -= 8;
|
||||
}
|
||||
if (num_of_bits > 0)
|
||||
{
|
||||
bytes[num_of_bytes++] = decodebits(state, buf, num_of_bits);
|
||||
}
|
||||
for (i = num_of_ints-1; i > 0; i--)
|
||||
{
|
||||
num = 0;
|
||||
for (j = num_of_bytes-1; j >=0; j--)
|
||||
{
|
||||
num = (num << 8) | bytes[j];
|
||||
p = num / sizes[i];
|
||||
bytes[j] = p;
|
||||
num = num - p * sizes[i];
|
||||
}
|
||||
nums[i] = num;
|
||||
}
|
||||
nums[0] = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24);
|
||||
}
|
||||
|
||||
extern int
|
||||
xdrfile_decompress_coord_float(float *coordinates,
|
||||
int size,
|
||||
float precision,
|
||||
int minint[3],
|
||||
int maxint[3],
|
||||
int smallidx,
|
||||
char* compressed_blob_,
|
||||
size_t blob_len,
|
||||
size_t * readed_len)
|
||||
{
|
||||
int *compressed_blob = (int*)compressed_blob_;
|
||||
int *lip;
|
||||
unsigned sizeint[3], sizesmall[3], bitsizeint[3], size3;
|
||||
int k, *buf1 , flag;
|
||||
int smallnum, smaller, i, is_smaller, run;
|
||||
float *lfp, inv_precision;
|
||||
int tmp, *thiscoord, prevcoord[3];
|
||||
unsigned int bitsize;
|
||||
|
||||
bitsizeint[0] = 0;
|
||||
bitsizeint[1] = 0;
|
||||
bitsizeint[2] = 0;
|
||||
|
||||
|
||||
size3 = size * 3;
|
||||
|
||||
if((buf1=(int *)malloc(sizeof(int)*size3))==NULL)
|
||||
{
|
||||
fprintf(stderr,"Cannot allocate memory for decompressing coordinates.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Dont bother with compression for three atoms or less */
|
||||
if(size<=9)
|
||||
{
|
||||
|
||||
// TODO...
|
||||
/* return number of coords, not floats */
|
||||
}
|
||||
|
||||
|
||||
sizeint[0] = maxint[0] - minint[0]+1;
|
||||
sizeint[1] = maxint[1] - minint[1]+1;
|
||||
sizeint[2] = maxint[2] - minint[2]+1;
|
||||
|
||||
/* check if one of the sizes is to big to be multiplied */
|
||||
if ((sizeint[0] | sizeint[1] | sizeint[2] ) > 0xffffff)
|
||||
{
|
||||
bitsizeint[0] = sizeofint(sizeint[0]);
|
||||
bitsizeint[1] = sizeofint(sizeint[1]);
|
||||
bitsizeint[2] = sizeofint(sizeint[2]);
|
||||
bitsize = 0; /* flag the use of large sizes */
|
||||
}
|
||||
else
|
||||
{
|
||||
bitsize = sizeofints(3, sizeint);
|
||||
}
|
||||
|
||||
|
||||
tmp = smallidx+8;
|
||||
tmp = smallidx-1;
|
||||
tmp = (FIRSTIDX>tmp) ? FIRSTIDX : tmp;
|
||||
smaller = magicints[tmp] / 2;
|
||||
smallnum = magicints[smallidx] / 2;
|
||||
sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
|
||||
|
||||
|
||||
lfp = coordinates;
|
||||
inv_precision = 1.0 / precision;
|
||||
run = 0;
|
||||
i = 0;
|
||||
lip = buf1;
|
||||
|
||||
int state[3] = {0,0,0};
|
||||
|
||||
while ( i < size )
|
||||
{
|
||||
thiscoord = (int *)(lip) + i * 3;
|
||||
|
||||
if (bitsize == 0)
|
||||
{
|
||||
thiscoord[0] = decodebits(state, compressed_blob, bitsizeint[0]);
|
||||
thiscoord[1] = decodebits(state, compressed_blob, bitsizeint[1]);
|
||||
thiscoord[2] = decodebits(state, compressed_blob, bitsizeint[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
decodeints(state, compressed_blob, 3, bitsize, sizeint, thiscoord);
|
||||
}
|
||||
|
||||
i++;
|
||||
thiscoord[0] += minint[0];
|
||||
thiscoord[1] += minint[1];
|
||||
thiscoord[2] += minint[2];
|
||||
|
||||
prevcoord[0] = thiscoord[0];
|
||||
prevcoord[1] = thiscoord[1];
|
||||
prevcoord[2] = thiscoord[2];
|
||||
|
||||
flag = decodebits(state, compressed_blob, 1);
|
||||
is_smaller = 0;
|
||||
if (flag == 1)
|
||||
{
|
||||
run = decodebits(state, compressed_blob, 5);
|
||||
is_smaller = run % 3;
|
||||
run -= is_smaller;
|
||||
is_smaller--;
|
||||
}
|
||||
if (run > 0)
|
||||
{
|
||||
thiscoord += 3;
|
||||
for (k = 0; k < run; k+=3)
|
||||
{
|
||||
decodeints(state, compressed_blob, 3, smallidx, sizesmall, thiscoord);
|
||||
i++;
|
||||
thiscoord[0] += prevcoord[0] - smallnum;
|
||||
thiscoord[1] += prevcoord[1] - smallnum;
|
||||
thiscoord[2] += prevcoord[2] - smallnum;
|
||||
if (k == 0) {
|
||||
/* interchange first with second atom for better
|
||||
* compression of water molecules
|
||||
*/
|
||||
tmp = thiscoord[0]; thiscoord[0] = prevcoord[0];
|
||||
prevcoord[0] = tmp;
|
||||
tmp = thiscoord[1]; thiscoord[1] = prevcoord[1];
|
||||
prevcoord[1] = tmp;
|
||||
tmp = thiscoord[2]; thiscoord[2] = prevcoord[2];
|
||||
prevcoord[2] = tmp;
|
||||
*lfp++ = prevcoord[0] * inv_precision;
|
||||
*lfp++ = prevcoord[1] * inv_precision;
|
||||
*lfp++ = prevcoord[2] * inv_precision;
|
||||
} else {
|
||||
prevcoord[0] = thiscoord[0];
|
||||
prevcoord[1] = thiscoord[1];
|
||||
prevcoord[2] = thiscoord[2];
|
||||
}
|
||||
*lfp++ = thiscoord[0] * inv_precision;
|
||||
*lfp++ = thiscoord[1] * inv_precision;
|
||||
*lfp++ = thiscoord[2] * inv_precision;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*lfp++ = thiscoord[0] * inv_precision;
|
||||
*lfp++ = thiscoord[1] * inv_precision;
|
||||
*lfp++ = thiscoord[2] * inv_precision;
|
||||
}
|
||||
smallidx += is_smaller;
|
||||
if (is_smaller < 0)
|
||||
{
|
||||
smallnum = smaller;
|
||||
|
||||
if (smallidx > FIRSTIDX)
|
||||
{
|
||||
smaller = magicints[smallidx - 1] /2;
|
||||
}
|
||||
else
|
||||
{
|
||||
smaller = 0;
|
||||
}
|
||||
}
|
||||
else if (is_smaller > 0)
|
||||
{
|
||||
smaller = smallnum;
|
||||
smallnum = magicints[smallidx] / 2;
|
||||
}
|
||||
sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
|
||||
}
|
||||
|
||||
free((void*)buf1);
|
||||
*readed_len = state[0];
|
||||
return 0;
|
||||
}
|
12
pygmx/gromacs/compression.h
Normal file
12
pygmx/gromacs/compression.h
Normal file
@ -0,0 +1,12 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
xdrfile_decompress_coord_float(float *coordinates,
|
||||
int size,
|
||||
float precision,
|
||||
int minint[3],
|
||||
int maxint[3],
|
||||
int smallidx,
|
||||
int * compressed_blob,
|
||||
size_t blob_len,
|
||||
size_t * readed_len);
|
6521
pygmx/gromacs/coordinates.c
Normal file
6521
pygmx/gromacs/coordinates.c
Normal file
File diff suppressed because it is too large
Load Diff
47
pygmx/gromacs/coordinates.pyx
Normal file
47
pygmx/gromacs/coordinates.pyx
Normal file
@ -0,0 +1,47 @@
|
||||
# distutils: sources = mdevaluate/gromacs/compression.c
|
||||
|
||||
from cython cimport view
|
||||
from array import array
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
|
||||
cdef extern from "compression.h":
|
||||
int xdrfile_decompress_coord_float(float *coordinates,
|
||||
int size,
|
||||
float precision,
|
||||
int minint[3],
|
||||
int maxint[3],
|
||||
int smallidx,
|
||||
char * compressed_blob,
|
||||
size_t blob_len,
|
||||
size_t * readed_len)
|
||||
|
||||
|
||||
def decompress(int size, float precision, minint, maxint, int smallidx, bytes blob):
|
||||
|
||||
assert len(minint) == 3
|
||||
assert len(maxint) == 3
|
||||
|
||||
cdef int minints[3]
|
||||
cdef int maxints[3]
|
||||
|
||||
for i in range(3):
|
||||
minints[i] = minint[i]
|
||||
maxints[i] = maxint[i]
|
||||
|
||||
|
||||
cdef np.ndarray[float, ndim=2] coordinates = np.empty((size,3), dtype=np.float32)
|
||||
cdef size_t readed
|
||||
|
||||
xdrfile_decompress_coord_float(
|
||||
<float *>coordinates.data,
|
||||
size,
|
||||
precision,
|
||||
maxints,
|
||||
minints,
|
||||
smallidx,
|
||||
blob,
|
||||
len(blob),
|
||||
&readed)
|
||||
return coordinates
|
2340
pygmx/gromacs/logarithmic.c
Normal file
2340
pygmx/gromacs/logarithmic.c
Normal file
File diff suppressed because it is too large
Load Diff
37
pygmx/gromacs/logarithmic.pyx
Normal file
37
pygmx/gromacs/logarithmic.pyx
Normal file
@ -0,0 +1,37 @@
|
||||
from libc.math cimport floor, ceil, fabs, log10
|
||||
|
||||
def is_log_step(long long step, long long per_decade):
|
||||
|
||||
# The first step will never be written
|
||||
if step == 0:
|
||||
return True
|
||||
|
||||
if step == 1:
|
||||
return True
|
||||
|
||||
|
||||
cdef double log_step = log10(step)
|
||||
cdef double log_next = log10(step+1)
|
||||
cdef double log_prev = log10(step-1)
|
||||
cdef long long decade = int(log10(step))
|
||||
|
||||
# Idea: Is there a number x = decade + n / per_decade
|
||||
# with 0 <= n < per_decade where
|
||||
# | log_step - x | <= | log_prev - x |
|
||||
# | log_step - x | < | log_next - x |
|
||||
|
||||
# Calculate the closest numbers
|
||||
|
||||
cdef double n_smaller = floor((log_step - decade) * per_decade);
|
||||
cdef double n_larger = ceil((log_step - decade) * per_decade);
|
||||
|
||||
cdef double prev_dist = fabs(log_prev-decade-n_smaller/per_decade);
|
||||
cdef double dist_to_smaller = fabs(log_step-decade-n_smaller/per_decade);
|
||||
|
||||
cdef double next_dist = fabs(log_next-decade-n_larger/per_decade);
|
||||
cdef double dist_to_larger = fabs(log_step-decade-n_larger/per_decade);
|
||||
|
||||
if dist_to_larger < next_dist or dist_to_smaller <= prev_dist:
|
||||
return True
|
||||
else:
|
||||
return False
|
511
pygmx/gromacs/reader.py
Normal file
511
pygmx/gromacs/reader.py
Normal file
@ -0,0 +1,511 @@
|
||||
import xdrlib
|
||||
import io
|
||||
import os
|
||||
from .coordinates import decompress
|
||||
from ..utils import hash_anything, merge_hashes
|
||||
from functools import partial
|
||||
import numpy as np
|
||||
|
||||
TRR_MAGIC = 1993
|
||||
XTC_MAGIC = 1995
|
||||
INDEX_MAGIC = 2015
|
||||
|
||||
def index_filename_for_xtc(xtcfile):
|
||||
"""
|
||||
Get the filename for the index file of a xtc file.
|
||||
|
||||
In general, the index file is located in the same directory as the xtc file.
|
||||
If the xtc file is located in a read-only directory (for the current user)
|
||||
and does not exist, the index file will be instead located in a subdirectory
|
||||
of ~/.xtcindex, of the current user.
|
||||
|
||||
The directory for user index files can be changed by setting the environment
|
||||
variable 'XTCINDEXDIR'.
|
||||
|
||||
Example:
|
||||
xtcfile = '/data/niels/test.xtc'
|
||||
|
||||
# case 1: index file exists, or current user is niels
|
||||
index_filename_for_xtc(xtcfile) == '/data/niels/.test.xtc.xtcindex'
|
||||
|
||||
# case 2: index file doesn't exist, and nor write permission in /data/niels
|
||||
index_filename_for_xtc(xtcfile) == '~/.xtcindex/data/niels/.test.xtc.xtcindex'
|
||||
|
||||
Warning:
|
||||
At this point, the index file is not checked for validity. If an invalid
|
||||
index file is present in the xtc files directory, this will be used and an
|
||||
error will be risen by XTCReader.
|
||||
|
||||
Warning:
|
||||
On most systems, the home directory is on the local drive so that the indexing
|
||||
has probably to do be done on every system if it can not be saved in the directory
|
||||
of the xtc file.
|
||||
"""
|
||||
base = os.path.basename(xtcfile)
|
||||
filename = "." + base + ".xtcindex"
|
||||
|
||||
dir = os.path.abspath(os.path.dirname(xtcfile))
|
||||
|
||||
if not os.path.exists(os.path.join(dir, filename)) and not os.access(dir, os.W_OK):
|
||||
if 'XTCINDEXDIR' in os.environ:
|
||||
index_dir = os.environ['XTCINDEXDIR']
|
||||
else:
|
||||
index_dir = os.path.join(os.environ['HOME'], '.xtcindex')
|
||||
dir = os.path.join(index_dir, dir.lstrip('/'))
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
|
||||
return os.path.join(dir, filename)
|
||||
|
||||
|
||||
class NumpyUnpacker(xdrlib.Unpacker):
|
||||
|
||||
def unpack_float_array(self, n_items):
|
||||
i = self.get_position()
|
||||
j = i + 4 * n_items
|
||||
self.set_position(j)
|
||||
data = self.get_buffer()[i:j]
|
||||
|
||||
if len(data) < 4:
|
||||
raise EOFError
|
||||
|
||||
ret = np.frombuffer(data, '>f')
|
||||
if len(ret) != n_items:
|
||||
raise EOFError
|
||||
return ret
|
||||
|
||||
def unpack_double_array(self, n_items):
|
||||
i = self.get_position()
|
||||
j = i + 8 * n_items
|
||||
self.set_position(j)
|
||||
data = self.get_buffer()[i:j]
|
||||
|
||||
if len(data) < 8:
|
||||
raise EOFError
|
||||
|
||||
ret = np.frombuffer(data, '>d')
|
||||
if len(ret) != n_items:
|
||||
raise EOFError
|
||||
return ret
|
||||
|
||||
|
||||
class InvalidMagicException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidIndexException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownLenError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SubscriptableReader:
|
||||
|
||||
def __init__(self, fd):
|
||||
self.fd = fd
|
||||
self.len = os.stat(self.fd.fileno()).st_size
|
||||
|
||||
def __getitem__(self, r):
|
||||
if isinstance(r, slice):
|
||||
if r.start is not None:
|
||||
if r.start < 0:
|
||||
self.fd.seek(r.start, io.SEEK_END)
|
||||
else:
|
||||
self.fd.seek(r.start, io.SEEK_SET)
|
||||
|
||||
if r.step is not None:
|
||||
raise NotImplementedError
|
||||
|
||||
return self.fd.read(r.stop - r.start)
|
||||
else:
|
||||
self.fd.seek(r, io.SEEK_SET)
|
||||
return self.fd.read(1)
|
||||
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
|
||||
class XTCFrame:
|
||||
__slots__ = ['_coordinates', 'index', 'time', 'box', '_compressed_coordinates']
|
||||
|
||||
def __init__(self):
|
||||
self._coordinates = None
|
||||
|
||||
@property
|
||||
def coordinates(self):
|
||||
if self._coordinates is None:
|
||||
self._coordinates = self._compressed_coordinates()
|
||||
return self._coordinates
|
||||
|
||||
|
||||
class BaseReader:
|
||||
|
||||
def __init__(self, file):
|
||||
self.fd = open(file, 'rb')
|
||||
self.filename = file
|
||||
|
||||
self.reader = SubscriptableReader(self.fd)
|
||||
self.xdr_reader = NumpyUnpacker(self.reader)
|
||||
|
||||
def __exit__(self, *_):
|
||||
self.fd.close()
|
||||
|
||||
def __enter__(self, *_):
|
||||
return self
|
||||
|
||||
def __getstate__(self):
|
||||
state = self.__dict__.copy()
|
||||
del state['fd']
|
||||
del state['reader']
|
||||
del state['xdr_reader']
|
||||
state['filepos'] = self.get_position()
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
filepos = state.pop('filepos')
|
||||
self.__dict__.update(state)
|
||||
self.fd = open(self.filename, 'rb')
|
||||
self.reader = SubscriptableReader(self.fd)
|
||||
self.xdr_reader = NumpyUnpacker(self.reader)
|
||||
self.set_position(filepos)
|
||||
|
||||
def get_position(self):
|
||||
return self.fd.tell()
|
||||
|
||||
def set_position(self, pos):
|
||||
return self.fd.seek(pos)
|
||||
|
||||
def skip_frames(self, num):
|
||||
for i in range(num):
|
||||
self.skip_frame()
|
||||
|
||||
def skip_frame(self):
|
||||
self._read_header()
|
||||
self._read_frame()
|
||||
|
||||
def dump_frame(self):
|
||||
raise NotImplemented
|
||||
|
||||
def _read_header(self):
|
||||
raise NotImplemented
|
||||
|
||||
def _read_frame(self):
|
||||
raise NotImplemented
|
||||
|
||||
XTC_HEADER_SIZE = 4 * 4
|
||||
XTC_FRAME_HEADER_SIZE = 9 * 4 + 10 * 4
|
||||
|
||||
|
||||
class XTCReader(BaseReader):
|
||||
|
||||
len_available = False
|
||||
|
||||
def __init__(self, xtcfile, load_cache_file=True):
|
||||
super().__init__(xtcfile)
|
||||
self._cache = [0]
|
||||
self._times = None
|
||||
self.current_frame = 0
|
||||
|
||||
index_file_name = index_filename_for_xtc(xtcfile)
|
||||
if load_cache_file:
|
||||
self.load_index(index_file_name)
|
||||
|
||||
def load_index(self, filename):
|
||||
xtc_stat = os.stat(self.filename)
|
||||
c_time = int(xtc_stat.st_ctime)
|
||||
m_time = int(xtc_stat.st_mtime)
|
||||
size = xtc_stat.st_size
|
||||
|
||||
with open(filename, 'rb') as index_file_fd:
|
||||
# TODO: Is NumpyUnpacker even necessary at this point?
|
||||
# Seems like xdrlib.Unpacker would be sufficient here ...
|
||||
reader = NumpyUnpacker(SubscriptableReader(index_file_fd))
|
||||
|
||||
if reader.unpack_hyper() != INDEX_MAGIC:
|
||||
raise InvalidMagicException
|
||||
if reader.unpack_hyper() != c_time:
|
||||
raise InvalidIndexException
|
||||
if reader.unpack_hyper() != m_time:
|
||||
raise InvalidIndexException
|
||||
if reader.unpack_hyper() != size:
|
||||
raise InvalidIndexException
|
||||
|
||||
self._cache = []
|
||||
self._times = []
|
||||
try:
|
||||
while True:
|
||||
self._cache.append(reader.unpack_hyper())
|
||||
self._times.append(reader.unpack_float())
|
||||
except EOFError:
|
||||
pass
|
||||
self.len_available = True
|
||||
|
||||
def _raw_header(self):
|
||||
if len(self._cache) == self.current_frame:
|
||||
self._cache.append(self.get_position())
|
||||
return self.fd.read(XTC_HEADER_SIZE)
|
||||
|
||||
def _raw_frame(self):
|
||||
frame_header = self.fd.read(XTC_FRAME_HEADER_SIZE)
|
||||
blob_size = xdrlib.Unpacker(frame_header[-4:]).unpack_int()
|
||||
blob_size = (blob_size + 3) // 4 * 4 # Padding to 4 bytes
|
||||
frame_blob = self.fd.read(blob_size)
|
||||
|
||||
self.current_frame += 1
|
||||
return frame_header + frame_blob
|
||||
|
||||
def _unpack_header(self, raw_header):
|
||||
unpacker = xdrlib.Unpacker(raw_header)
|
||||
|
||||
magic = unpacker.unpack_int()
|
||||
|
||||
if magic != XTC_MAGIC:
|
||||
raise InvalidMagicException
|
||||
|
||||
n_atoms = unpacker.unpack_int()
|
||||
step = unpacker.unpack_int()
|
||||
time = unpacker.unpack_float()
|
||||
|
||||
return n_atoms, step, time
|
||||
|
||||
def _read_header(self):
|
||||
raw_header = self._raw_header()
|
||||
return self._unpack_header(raw_header)
|
||||
|
||||
def _unpack_frame(self, raw_frame):
|
||||
unpacker = xdrlib.Unpacker(raw_frame)
|
||||
|
||||
raw_box = unpacker.unpack_farray(9, unpacker.unpack_float)
|
||||
box = np.array(raw_box).reshape(3, 3)
|
||||
num_coords = unpacker.unpack_int()
|
||||
precision = unpacker.unpack_float()
|
||||
maxint = unpacker.unpack_farray(3, unpacker.unpack_int)
|
||||
minint = unpacker.unpack_farray(3, unpacker.unpack_int)
|
||||
smallindex = unpacker.unpack_int()
|
||||
|
||||
blob_len = unpacker.unpack_int()
|
||||
blob = unpacker.unpack_fopaque(blob_len)
|
||||
|
||||
return box, precision, num_coords, minint, maxint, smallindex, blob
|
||||
|
||||
def _read_frame(self):
|
||||
raw_frame = self._raw_frame()
|
||||
return self._unpack_frame(raw_frame)
|
||||
|
||||
def decode_frame(self, header=None, body=None):
|
||||
n_atoms, step, time = header or self._read_header()
|
||||
box, precision, num_coords, minint, maxint, smallindex, blob = body or self._read_frame()
|
||||
coordinates = partial(decompress, num_coords, precision, minint, maxint, smallindex, blob)
|
||||
|
||||
frame = XTCFrame()
|
||||
frame._compressed_coordinates = coordinates
|
||||
frame.index = step
|
||||
frame.time = time
|
||||
frame.box = box
|
||||
return frame
|
||||
|
||||
def dump_frame(self):
|
||||
"""
|
||||
:return: Tuple: The binary data of the frame, the frame itself
|
||||
"""
|
||||
|
||||
raw_header = self._raw_header()
|
||||
header = self._unpack_header(raw_header)
|
||||
|
||||
raw_body = self._raw_frame()
|
||||
body = self._unpack_frame(raw_body)
|
||||
|
||||
return (raw_header + raw_body, self.decode_frame(header, body))
|
||||
|
||||
def cached_position(self, item):
|
||||
try:
|
||||
return self._cache[item]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def __getitem__(self, item):
|
||||
position = self.cached_position(item)
|
||||
|
||||
if position is not None:
|
||||
self.set_position(position)
|
||||
self.current_frame = item
|
||||
return self.decode_frame()
|
||||
# TODO: Use elif and one single return at the end
|
||||
# Also: skip_frames is in fact not implemented at all!
|
||||
# TODO: Catch EOFError and raise IndexError
|
||||
if self.current_frame <= item:
|
||||
self.skip_frames(item - self.current_frame)
|
||||
return self.decode_frame()
|
||||
else:
|
||||
self.set_position(0)
|
||||
self.current_frame = 0
|
||||
self.skip_frames(item)
|
||||
return self.decode_frame()
|
||||
|
||||
def __len__(self):
|
||||
if self.len_available:
|
||||
return len(self._cache)
|
||||
raise UnknownLenError
|
||||
|
||||
def times_of_indices(self, indices):
|
||||
return [self[i].time for i in indices]
|
||||
|
||||
def __hash__(self):
|
||||
return merge_hashes(hash_anything(self.filename), hash_anything(self._cache))
|
||||
|
||||
|
||||
class TRRHeader:
|
||||
__slots__ = ['ir_size', 'e_size', 'box_size', 'vir_size', 'pres_size', 'top_size', 'sym_size', 'x_size', 'v_size',
|
||||
'f_size', 'n_atoms', 'step', 'nre', 't', '_lambda', 'is_double']
|
||||
|
||||
@property
|
||||
def frame_size(self):
|
||||
return self.ir_size + \
|
||||
self.e_size + \
|
||||
self.box_size + \
|
||||
self.vir_size + \
|
||||
self.top_size + \
|
||||
self.sym_size + \
|
||||
self.x_size + \
|
||||
self.v_size + \
|
||||
self.f_size
|
||||
|
||||
|
||||
class TRRFrame:
|
||||
__slots__ = ['header', 'box', 'x', 'v', 'f']
|
||||
|
||||
|
||||
class TRRReader(BaseReader):
|
||||
|
||||
def __iter__(self):
|
||||
return TRRIterator(self.filename)
|
||||
|
||||
def _unpack_float(self, is_double):
|
||||
if is_double:
|
||||
return self.xdr_reader.unpack_double()
|
||||
else:
|
||||
return self.xdr_reader.unpack_float()
|
||||
|
||||
def _unpack_np_float(self, is_double, *dim):
|
||||
total = np.product(dim)
|
||||
|
||||
if is_double:
|
||||
box = self.xdr_reader.unpack_double_array(total)
|
||||
else:
|
||||
box = self.xdr_reader.unpack_float_array(total)
|
||||
|
||||
box = box.reshape(*dim)
|
||||
return box
|
||||
|
||||
def _read_header(self):
|
||||
|
||||
data = self.fd.read(8) # 4 Magic + 4 VersionStringLen
|
||||
unpacker = NumpyUnpacker(data)
|
||||
|
||||
magic = unpacker.unpack_int()
|
||||
|
||||
if magic != TRR_MAGIC:
|
||||
raise InvalidMagicException
|
||||
|
||||
version_string_len = unpacker.unpack_int()
|
||||
data = self.fd.read(version_string_len + 55) # 4 Magic + 4 VersionStringLen
|
||||
unpacker.reset(data)
|
||||
|
||||
version = unpacker.unpack_string()
|
||||
assert version == b'GMX_trn_file'
|
||||
|
||||
header = TRRHeader()
|
||||
|
||||
header.ir_size = unpacker.unpack_int()
|
||||
header.e_size = unpacker.unpack_int()
|
||||
header.box_size = unpacker.unpack_int()
|
||||
header.vir_size = unpacker.unpack_int()
|
||||
header.pres_size = unpacker.unpack_int()
|
||||
header.top_size = unpacker.unpack_int()
|
||||
header.sym_size = unpacker.unpack_int()
|
||||
header.x_size = unpacker.unpack_int()
|
||||
header.v_size = unpacker.unpack_int()
|
||||
header.f_size = unpacker.unpack_int()
|
||||
header.n_atoms = unpacker.unpack_int()
|
||||
header.step = unpacker.unpack_int()
|
||||
header.nre = unpacker.unpack_int()
|
||||
|
||||
if header.x_size is not None:
|
||||
header.is_double = (header.x_size // header.n_atoms) == 8
|
||||
elif header.v_size is not None:
|
||||
header.is_double = (header.v_size // header.n_atoms) == 8
|
||||
elif header.f_size is not None:
|
||||
header.is_double = (header.f_size // header.n_atoms) == 8
|
||||
|
||||
if header.is_double:
|
||||
data = self.fd.read(16)
|
||||
else:
|
||||
data = self.fd.read(8)
|
||||
unpacker.reset(data)
|
||||
self.xdr_reader = unpacker
|
||||
|
||||
header.t = self._unpack_float(header.is_double)
|
||||
header._lambda = self._unpack_float(header.is_double)
|
||||
|
||||
return header
|
||||
|
||||
def _read_frame(self, header):
|
||||
frame = TRRFrame()
|
||||
frame.header = header
|
||||
|
||||
data = self.fd.read(header.frame_size)
|
||||
self.xdr_reader = NumpyUnpacker(data)
|
||||
|
||||
frame.box = None
|
||||
if header.box_size:
|
||||
frame.box = self._unpack_np_float(header.is_double, 3, 3)
|
||||
|
||||
if header.vir_size:
|
||||
for i in range(9):
|
||||
self._unpack_float(header.is_double)
|
||||
|
||||
if header.pres_size:
|
||||
for i in range(9):
|
||||
self._unpack_float(header.is_double)
|
||||
|
||||
frame.x = None
|
||||
if header.x_size:
|
||||
frame.x = self._unpack_np_float(header.is_double, header.n_atoms, 3)
|
||||
|
||||
frame.v = None
|
||||
if header.v_size:
|
||||
frame.v = self._unpack_np_float(header.is_double, header.n_atoms, 3)
|
||||
|
||||
frame.f = None
|
||||
if header.f_size:
|
||||
frame.f = self._unpack_np_float(header.is_double, header.n_atoms, 3)
|
||||
|
||||
return frame
|
||||
|
||||
def decode_frame(self):
|
||||
h = self._read_header()
|
||||
return self._read_frame(h)
|
||||
|
||||
def get_position(self):
|
||||
return self.fd.tell()
|
||||
|
||||
def set_position(self, pos):
|
||||
self.fd.seek(pos)
|
||||
|
||||
|
||||
class TRRIterator():
|
||||
|
||||
def __init__(self, file):
|
||||
self.__reader = TRRReader(file)
|
||||
self.__reader.__enter__()
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
return self.__reader.decode_frame()
|
||||
except EOFError as err:
|
||||
self.__reader.__exit__()
|
||||
raise StopIteration
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
71
pygmx/gromacs/xtcdemux.py
Normal file
71
pygmx/gromacs/xtcdemux.py
Normal file
@ -0,0 +1,71 @@
|
||||
import sys
|
||||
import os
|
||||
from .logarithmic import is_log_step
|
||||
from .reader import XTCReader
|
||||
|
||||
def next_log_step(step, per_decade):
|
||||
step += 1
|
||||
while not is_log_step(step, per_decade): step += 1
|
||||
return step
|
||||
|
||||
def next_lin_step(step, frequency):
|
||||
return step+frequency
|
||||
|
||||
class LogWriter:
|
||||
current_offset = 0
|
||||
log_step = 0
|
||||
fd = None
|
||||
|
||||
def __init__(self, file, offset, log_freq):
|
||||
self.fd = open(file,'wb')
|
||||
self.offset = offset
|
||||
self.log_freq = log_freq
|
||||
|
||||
def consume_frame(self, step, raw_frame):
|
||||
if step > self.log_step+self.offset:
|
||||
raise Exception("Missing frame {}".format(step))
|
||||
|
||||
if step == self.log_step+self.offset:
|
||||
self.fd.write(raw_frame)
|
||||
self.log_step = next_log_step(self.log_step, self.log_freq)
|
||||
|
||||
|
||||
def main():
|
||||
filename = sys.argv[1]
|
||||
reader = XTCReader(filename, load_cache_file=False)
|
||||
lin_freq = int(sys.argv[2])
|
||||
log_freq = int(sys.argv[3])
|
||||
log_restart = int(sys.argv[4])
|
||||
|
||||
base,ext = os.path.splitext(filename)
|
||||
|
||||
lin_file = base + '.lin' + ext
|
||||
lin_step = 0
|
||||
|
||||
log_writers = []
|
||||
|
||||
with open(lin_file,'wb') as lin_fd:
|
||||
try:
|
||||
while True:
|
||||
raw_frame, frame = reader.dump_frame()
|
||||
|
||||
if frame.index % log_restart == 0:
|
||||
print('Starting new log band: {}'.format(frame.index))
|
||||
log_filename = '{}.{}.log{}'.format(base, len(log_writers), ext)
|
||||
log_writers.append(LogWriter(log_filename, frame.index, log_freq))
|
||||
|
||||
if frame.index > lin_step:
|
||||
raise Exception("Missing frames")
|
||||
|
||||
if frame.index == lin_step:
|
||||
lin_fd.write(raw_frame)
|
||||
lin_step = next_lin_step(lin_step, lin_freq)
|
||||
|
||||
for writer in log_writers:
|
||||
writer.consume_frame(frame.index, raw_frame)
|
||||
|
||||
except EOFError:
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
63
pygmx/gromacs/xtcindex.py
Normal file
63
pygmx/gromacs/xtcindex.py
Normal file
@ -0,0 +1,63 @@
|
||||
from .reader import XTCReader, index_filename_for_xtc, INDEX_MAGIC
|
||||
from xdrlib import Packer
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
|
||||
|
||||
def index_xtcfile(filename):
|
||||
"""
|
||||
Write an index-file for a xtc-file.
|
||||
|
||||
The resulting file will have the same name, except for the new extension .xtcindex.
|
||||
This function has to be called only once for every xtcfile.
|
||||
|
||||
Args:
|
||||
filename (str): Name of the xtc-file
|
||||
|
||||
"""
|
||||
# TODO: Would it be possible, to index files on the fly?
|
||||
# How long does indexing take?
|
||||
index_filename = index_filename_for_xtc(filename)
|
||||
print('Writing index file: {}'.format(index_filename))
|
||||
packer = Packer()
|
||||
xtc_stat = os.stat(filename)
|
||||
c_time = int(xtc_stat.st_ctime)
|
||||
m_time = int(xtc_stat.st_mtime)
|
||||
size = xtc_stat.st_size
|
||||
|
||||
with XTCReader(filename, load_cache_file=False) as reader, open(index_filename, 'wb') as idx_fd:
|
||||
packer.pack_hyper(INDEX_MAGIC)
|
||||
# TODO Maybe store only the hashsum of the file for identification?
|
||||
packer.pack_hyper(c_time)
|
||||
packer.pack_hyper(m_time)
|
||||
packer.pack_hyper(size)
|
||||
count = 0
|
||||
try:
|
||||
while True:
|
||||
if count % 300 == 0:
|
||||
print("Frame {}".format(count), end="\r")
|
||||
position = reader.get_position()
|
||||
frame = reader.decode_frame()
|
||||
packer.pack_hyper(position)
|
||||
packer.pack_float(frame.time)
|
||||
|
||||
count += 1
|
||||
|
||||
except EOFError:
|
||||
pass
|
||||
idx_fd.write(packer.get_buffer())
|
||||
packer.reset()
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage {} xtc-file [xtc-file]".format(sys.argv[0]))
|
||||
return -1
|
||||
|
||||
for filename in sys.argv[1:]:
|
||||
index_xtcfile(filename)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
Reference in New Issue
Block a user