Moved gromacs module from mdevaluate to pygmx

2016-06-03 12:34:36 +02:00
parent eeb5c607f8
commit a601636d55
10 changed files with 10017 additions and 0 deletions
--- a/pygmx/gromacs/init.py
+++ b/pygmx/gromacs/init.py
@@ -0,0 +1,64 @@
+import numpy as np
+from numpy import genfromtxt
+import itertools
+import re
+
+from .reader import XTCReader, TRRReader
+from .xtcindex import index_xtcfile
+
+def atoms_from_grofile(file):
+    """ Deprecated. Use Atoms.from_grofile"""
+    t = genfromtxt(file, skip_header=2, delimiter=(10, 5), dtype='U8', autostrip=True, skip_footer=1)
+    return t
+
+
+def _atoms_from_grofile(file):
+    t = genfromtxt(file, skip_header=2, delimiter=(5, 5, 5), dtype='U8', autostrip=True, skip_footer=1)
+    return t
+
+
+group_re = re.compile('\[ ([-+\w]+) \]')
+
+
+def load_indices(indexfile):
+    indices = {}
+    index_array = None
+    with open(indexfile) as idx_file:
+        for line in idx_file:
+            m = group_re.search(line)
+            if m is not None:
+                group_name = m.group(1)
+                index_array = indices.get(group_name, [])
+                indices[group_name] = index_array
+            else:
+                elements = line.strip().split('\t')
+                elements = [x.split(' ') for x in elements]
+                elements = itertools.chain(*elements)  # make a flat iterator
+                elements = [x for x in elements if x != '']
+                index_array += [int(x) - 1 for x in elements]
+    return indices
+
+
+def load_index(indexfile, index_names):
+    """
+    Load an indexfile, returns tuple of indices for index_names
+    """
+    index_arrays = [[] for _ in index_names]
+    index_array = []  # This array will contain all indices before the first index_group. Should be empty
+
+    with open(indexfile) as idx_file:
+        for line in idx_file:
+            if line.startswith("["):
+                for i, index in enumerate(index_names):
+                    if line.startswith("[ " + index + " ]"):
+                        index_array = index_arrays[i]
+                        break
+                    index_array = None
+            else:
+                elements = line.strip().split('\t')
+                elements = [x.split(' ') for x in elements]
+
+                elements = itertools.chain(*elements)  # make a flat iterator
+                elements = [x for x in elements if x != '']
+                index_array += [int(x) - 1 for x in elements]
+    return index_arrays
--- a/pygmx/gromacs/compression.c
+++ b/pygmx/gromacs/compression.c
@@ -0,0 +1,351 @@
+#include <stdio.h>
+#include <memory.h>
+#include <stdlib.h>
+
+
+extern void test(int *b) {
+	b[0] = 1;
+	b[1] = 0;
+	b[2] = 2;
+}
+
+static const int magicints[] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 10, 12, 16, 20, 25, 32, 40, 50, 64,
+    80, 101, 128, 161, 203, 256, 322, 406, 512, 645, 812, 1024, 1290,
+    1625, 2048, 2580, 3250, 4096, 5060, 6501, 8192, 10321, 13003, 
+    16384, 20642, 26007, 32768, 41285, 52015, 65536,82570, 104031, 
+    131072, 165140, 208063, 262144, 330280, 416127, 524287, 660561, 
+    832255, 1048576, 1321122, 1664510, 2097152, 2642245, 3329021, 
+    4194304, 5284491, 6658042, 8388607, 10568983, 13316085, 16777216 
+};
+
+#define FIRSTIDX 9
+/* note that magicints[FIRSTIDX-1] == 0 */
+#define LASTIDX (sizeof(magicints) / sizeof(*magicints))
+
+/* Internal support routines for reading/writing compressed coordinates
+ * sizeofint - calculate smallest number of bits necessary
+ * to represent a certain integer.
+ */
+static int
+sizeofint(int size) {
+    unsigned int num = 1;
+    int num_of_bits = 0;
+    
+    while (size >= num && num_of_bits < 32)
+    {
+        num_of_bits++;
+        num <<= 1;
+    }
+    return num_of_bits;
+}
+
+/*
+ * sizeofints - calculate 'bitsize' of compressed ints
+ *
+ * given a number of small unsigned integers and the maximum value
+ * return the number of bits needed to read or write them with the
+ * routines encodeints/decodeints. You need this parameter when
+ * calling those routines.
+ * (However, in some cases we can just use the variable 'smallidx'
+ * which is the exact number of bits, and them we dont need to call
+ * this routine).
+ */
+static int
+sizeofints(int num_of_ints, unsigned int sizes[])
+{
+    int i, num;
+    unsigned int num_of_bytes, num_of_bits, bytes[32], bytecnt, tmp;
+    num_of_bytes = 1;
+    bytes[0] = 1;
+    num_of_bits = 0;
+    for (i=0; i < num_of_ints; i++)
+    {
+        tmp = 0;
+        for (bytecnt = 0; bytecnt < num_of_bytes; bytecnt++)
+        {
+            tmp = bytes[bytecnt] * sizes[i] + tmp;
+            bytes[bytecnt] = tmp & 0xff;
+            tmp >>= 8;
+        }
+        while (tmp != 0)
+        {
+            bytes[bytecnt++] = tmp & 0xff;
+            tmp >>= 8;
+        }
+        num_of_bytes = bytecnt;
+    }
+    num = 1;
+    num_of_bytes--;
+    while (bytes[num_of_bytes] >= num)
+    {
+        num_of_bits++;
+        num *= 2;
+    }
+    return num_of_bits + num_of_bytes * 8;
+    
+}
+
+/*
+ * decodebits - decode number from buf using specified number of bits
+ *
+ * extract the number of bits from the array buf and construct an integer
+ * from it. Return that value.
+ *
+ */
+
+static int
+decodebits(int state[], int _buf[], int num_of_bits)
+{
+    
+    int cnt, num;
+    unsigned int lastbits, lastbyte;
+    int mask = (1 << num_of_bits) -1;
+    unsigned char * cbuf = (unsigned char *)_buf;
+    cnt = state[0];
+    lastbits = (unsigned int) state[1];
+    lastbyte = (unsigned int) state[2];
+    
+    num = 0;
+    while (num_of_bits >= 8)
+    {
+        lastbyte = ( lastbyte << 8 ) | cbuf[cnt++];
+        num |=  (lastbyte >> lastbits) << (num_of_bits - 8);
+        num_of_bits -=8;
+    }
+    if (num_of_bits > 0)
+    {
+        if (lastbits < num_of_bits)
+        {
+            lastbits += 8;
+            lastbyte = (lastbyte << 8) | cbuf[cnt++];
+        }
+        lastbits -= num_of_bits;
+        num |= (lastbyte >> lastbits) & ((1 << num_of_bits) -1);
+    }
+    num &= mask;
+    state[0] = cnt;
+    state[1] = lastbits;
+    state[2] = lastbyte;
+    return num;
+}
+
+/*
+ * decodeints - decode 'small' integers from the buf array
+ *
+ * this routine is the inverse from encodeints() and decodes the small integers
+ * written to buf by calculating the remainder and doing divisions with
+ * the given sizes[]. You need to specify the total number of bits to be
+ * used from buf in num_of_bits.
+ *
+ */
+
+static void
+decodeints(int state[], int buf[], int num_of_ints, int num_of_bits,
+           unsigned int sizes[], int nums[])
+{
+    
+    int bytes[32];
+    int i, j, num_of_bytes, p, num;
+    
+    bytes[1] = bytes[2] = bytes[3] = 0;
+    num_of_bytes = 0;
+    while (num_of_bits > 8)
+    {
+        bytes[num_of_bytes++] = decodebits(state, buf, 8);
+        num_of_bits -= 8;
+    }
+    if (num_of_bits > 0)
+    {
+        bytes[num_of_bytes++] = decodebits(state, buf, num_of_bits);
+    }
+    for (i = num_of_ints-1; i > 0; i--)
+    {
+        num = 0;
+        for (j = num_of_bytes-1; j >=0; j--)
+        {
+            num = (num << 8) | bytes[j];
+            p = num / sizes[i];
+            bytes[j] = p;
+            num = num - p * sizes[i];
+        }
+        nums[i] = num;
+    }
+    nums[0] = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24);
+}
+
+extern int
+xdrfile_decompress_coord_float(float     *coordinates,
+                               int       size,
+                               float     precision,
+                               int       minint[3],
+                               int       maxint[3],
+                               int       smallidx,
+                               char*     compressed_blob_,
+                               size_t    blob_len,
+                               size_t *     readed_len)
+{
+    int *compressed_blob = (int*)compressed_blob_;
+    int *lip;
+    unsigned sizeint[3], sizesmall[3], bitsizeint[3], size3;
+    int k, *buf1 , flag;
+    int smallnum, smaller, i, is_smaller, run;
+    float *lfp, inv_precision;
+    int tmp, *thiscoord,  prevcoord[3];
+    unsigned int bitsize;
+    
+    bitsizeint[0] = 0;
+    bitsizeint[1] = 0;
+    bitsizeint[2] = 0;
+    
+    
+    size3 = size * 3;
+    
+    if((buf1=(int *)malloc(sizeof(int)*size3))==NULL)
+    {
+        fprintf(stderr,"Cannot allocate memory for decompressing coordinates.\n");
+        return -1;
+    }
+    
+    /* Dont bother with compression for three atoms or less */
+    if(size<=9)
+    {
+        
+        // TODO...
+        /* return number of coords, not floats */
+    }
+    
+    
+    sizeint[0] = maxint[0] - minint[0]+1;
+    sizeint[1] = maxint[1] - minint[1]+1;
+    sizeint[2] = maxint[2] - minint[2]+1;
+    
+    /* check if one of the sizes is to big to be multiplied */
+    if ((sizeint[0] | sizeint[1] | sizeint[2] ) > 0xffffff)
+    {
+        bitsizeint[0] = sizeofint(sizeint[0]);
+        bitsizeint[1] = sizeofint(sizeint[1]);
+        bitsizeint[2] = sizeofint(sizeint[2]);
+        bitsize = 0; /* flag the use of large sizes */
+    }
+    else
+    {
+        bitsize = sizeofints(3, sizeint);
+    }
+    
+    
+    tmp = smallidx+8;
+    tmp = smallidx-1;
+    tmp = (FIRSTIDX>tmp) ? FIRSTIDX : tmp;
+    smaller = magicints[tmp] / 2;
+    smallnum = magicints[smallidx] / 2;
+    sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
+    
+    
+    lfp = coordinates;
+    inv_precision = 1.0 / precision;
+    run = 0;
+    i = 0;
+    lip = buf1;
+    
+    int state[3] = {0,0,0};
+    
+    while ( i < size )
+    {
+        thiscoord = (int *)(lip) + i * 3;
+        
+        if (bitsize == 0)
+        {
+            thiscoord[0] = decodebits(state, compressed_blob, bitsizeint[0]);
+            thiscoord[1] = decodebits(state, compressed_blob, bitsizeint[1]);
+            thiscoord[2] = decodebits(state, compressed_blob, bitsizeint[2]);
+        }
+        else
+        {
+            decodeints(state, compressed_blob, 3, bitsize, sizeint, thiscoord);
+        }
+        
+        i++;
+        thiscoord[0] += minint[0];
+        thiscoord[1] += minint[1];
+        thiscoord[2] += minint[2];
+        
+        prevcoord[0] = thiscoord[0];
+        prevcoord[1] = thiscoord[1];
+        prevcoord[2] = thiscoord[2];
+        
+        flag = decodebits(state, compressed_blob, 1);
+        is_smaller = 0;
+        if (flag == 1)
+        {
+            run = decodebits(state, compressed_blob, 5);
+            is_smaller = run % 3;
+            run -= is_smaller;
+            is_smaller--;
+        }
+        if (run > 0)
+        {
+            thiscoord += 3;
+            for (k = 0; k < run; k+=3)
+            {
+                decodeints(state, compressed_blob, 3, smallidx, sizesmall, thiscoord);
+                i++;
+                thiscoord[0] += prevcoord[0] - smallnum;
+                thiscoord[1] += prevcoord[1] - smallnum;
+                thiscoord[2] += prevcoord[2] - smallnum;
+                if (k == 0) {
+                    /* interchange first with second atom for better
+                     * compression of water molecules
+                     */
+                    tmp = thiscoord[0]; thiscoord[0] = prevcoord[0];
+                    prevcoord[0] = tmp;
+                    tmp = thiscoord[1]; thiscoord[1] = prevcoord[1];
+                    prevcoord[1] = tmp;
+                    tmp = thiscoord[2]; thiscoord[2] = prevcoord[2];
+                    prevcoord[2] = tmp;
+                    *lfp++ = prevcoord[0] * inv_precision;
+                    *lfp++ = prevcoord[1] * inv_precision;
+                    *lfp++ = prevcoord[2] * inv_precision;
+                } else {
+                    prevcoord[0] = thiscoord[0];
+                    prevcoord[1] = thiscoord[1];
+                    prevcoord[2] = thiscoord[2];
+                }
+                *lfp++ = thiscoord[0] * inv_precision;
+                *lfp++ = thiscoord[1] * inv_precision;
+                *lfp++ = thiscoord[2] * inv_precision;
+            }
+        } 
+        else
+        {
+            *lfp++ = thiscoord[0] * inv_precision;
+            *lfp++ = thiscoord[1] * inv_precision;
+            *lfp++ = thiscoord[2] * inv_precision;		
+        }
+        smallidx += is_smaller;
+        if (is_smaller < 0) 
+        {
+            smallnum = smaller;
+            
+            if (smallidx > FIRSTIDX) 
+            {
+                smaller = magicints[smallidx - 1] /2;
+            } 
+            else 
+            {
+                smaller = 0;
+            }
+        } 
+        else if (is_smaller > 0)
+        {
+            smaller = smallnum;
+            smallnum = magicints[smallidx] / 2;
+        }
+        sizesmall[0] = sizesmall[1] = sizesmall[2] = magicints[smallidx] ;
+    }
+    
+    free((void*)buf1);
+    *readed_len = state[0];
+    return 0;
+}
--- a/pygmx/gromacs/compression.h
+++ b/pygmx/gromacs/compression.h
@@ -0,0 +1,12 @@
+#include <stdio.h>
+
+int
+xdrfile_decompress_coord_float(float     *coordinates,
+                               int       size,
+                               float     precision,
+                               int       minint[3],
+                               int       maxint[3],
+                               int       smallidx,
+                               int *     compressed_blob,
+                               size_t    blob_len,
+                               size_t *  readed_len);
--- a/pygmx/gromacs/coordinates.c
+++ b/pygmx/gromacs/coordinates.c
--- a/pygmx/gromacs/coordinates.pyx
+++ b/pygmx/gromacs/coordinates.pyx
@@ -0,0 +1,47 @@
+# distutils: sources = mdevaluate/gromacs/compression.c
+
+from cython cimport view
+from array import array
+import numpy as np
+cimport numpy as np
+
+
+cdef extern from "compression.h":
+    int xdrfile_decompress_coord_float(float     *coordinates,
+                                   int       size,
+                                   float     precision,
+                                   int       minint[3],
+                                   int       maxint[3],
+                                   int       smallidx,
+                                   char *    compressed_blob,
+                                   size_t    blob_len,
+                                   size_t *  readed_len)
+
+
+def decompress(int size, float precision, minint, maxint, int smallidx, bytes blob):
+     
+    assert len(minint) == 3
+    assert len(maxint) == 3
+    
+    cdef int minints[3]
+    cdef int maxints[3]
+    
+    for i in range(3):
+        minints[i] = minint[i]
+        maxints[i] = maxint[i]
+    
+    
+    cdef np.ndarray[float, ndim=2] coordinates = np.empty((size,3), dtype=np.float32)
+    cdef size_t readed
+    
+    xdrfile_decompress_coord_float(
+        <float *>coordinates.data, 
+        size, 
+        precision,
+        maxints,
+        minints, 
+        smallidx, 
+        blob,
+        len(blob),
+        &readed)
+    return coordinates
--- a/pygmx/gromacs/logarithmic.c
+++ b/pygmx/gromacs/logarithmic.c
--- a/pygmx/gromacs/logarithmic.pyx
+++ b/pygmx/gromacs/logarithmic.pyx
@@ -0,0 +1,37 @@
+from libc.math cimport floor, ceil, fabs, log10
+
+def is_log_step(long long step, long long  per_decade):
+
+    # The first step will never be written
+    if step == 0:
+        return True
+
+    if step == 1:
+        return True
+
+
+    cdef double log_step = log10(step)
+    cdef double log_next = log10(step+1)
+    cdef double log_prev = log10(step-1)
+    cdef long long decade = int(log10(step))
+
+    # Idea: Is there a number x = decade + n / per_decade
+    # with 0 <= n < per_decade where
+    # | log_step - x | <= | log_prev - x |
+    # | log_step - x | <  | log_next - x |
+
+    # Calculate the closest numbers
+
+    cdef double n_smaller = floor((log_step - decade) * per_decade);
+    cdef double n_larger = ceil((log_step - decade) * per_decade);
+
+    cdef double prev_dist = fabs(log_prev-decade-n_smaller/per_decade);
+    cdef double dist_to_smaller = fabs(log_step-decade-n_smaller/per_decade);
+
+    cdef double next_dist = fabs(log_next-decade-n_larger/per_decade);
+    cdef double dist_to_larger = fabs(log_step-decade-n_larger/per_decade);
+
+    if dist_to_larger < next_dist or dist_to_smaller <= prev_dist:
+        return True
+    else:
+        return False
--- a/pygmx/gromacs/reader.py
+++ b/pygmx/gromacs/reader.py
@@ -0,0 +1,511 @@
+import xdrlib
+import io
+import os
+from .coordinates import decompress
+from ..utils import hash_anything, merge_hashes
+from functools import partial
+import numpy as np
+
+TRR_MAGIC = 1993
+XTC_MAGIC = 1995
+INDEX_MAGIC = 2015
+
+def index_filename_for_xtc(xtcfile):
+    """
+    Get the filename for the index file of a xtc file.
+
+    In general, the index file is located in the same directory as the xtc file.
+    If the xtc file is located in a read-only directory (for the current user)
+    and does not exist, the index file will be instead located in a subdirectory
+    of ~/.xtcindex, of the current user.
+
+    The directory for user index files can be changed by setting the environment
+    variable 'XTCINDEXDIR'.
+
+    Example:
+        xtcfile = '/data/niels/test.xtc'
+
+        # case 1: index file exists, or current user is niels
+        index_filename_for_xtc(xtcfile) == '/data/niels/.test.xtc.xtcindex'
+
+        # case 2: index file doesn't exist, and nor write permission in /data/niels
+        index_filename_for_xtc(xtcfile) == '~/.xtcindex/data/niels/.test.xtc.xtcindex'
+
+    Warning:
+        At this point, the index file is not checked for validity. If an invalid
+        index file is present in the xtc files directory, this will be used and an
+        error will be risen by XTCReader.
+
+    Warning:
+        On most systems, the home directory is on the local drive so that the indexing
+        has probably to do be done on every system if it can not be saved in the directory
+        of the xtc file.
+    """
+    base = os.path.basename(xtcfile)
+    filename = "." + base + ".xtcindex"
+
+    dir = os.path.abspath(os.path.dirname(xtcfile))
+
+    if not os.path.exists(os.path.join(dir, filename)) and  not os.access(dir, os.W_OK):
+        if 'XTCINDEXDIR' in os.environ:
+            index_dir = os.environ['XTCINDEXDIR']
+        else:
+            index_dir = os.path.join(os.environ['HOME'], '.xtcindex')
+        dir = os.path.join(index_dir, dir.lstrip('/'))
+        os.makedirs(dir, exist_ok=True)
+
+    return os.path.join(dir, filename)
+
+
+class NumpyUnpacker(xdrlib.Unpacker):
+
+    def unpack_float_array(self, n_items):
+        i = self.get_position()
+        j = i + 4 * n_items
+        self.set_position(j)
+        data = self.get_buffer()[i:j]
+
+        if len(data) < 4:
+            raise EOFError
+
+        ret = np.frombuffer(data, '>f')
+        if len(ret) != n_items:
+            raise EOFError
+        return ret
+
+    def unpack_double_array(self, n_items):
+        i = self.get_position()
+        j = i + 8 * n_items
+        self.set_position(j)
+        data = self.get_buffer()[i:j]
+
+        if len(data) < 8:
+            raise EOFError
+
+        ret = np.frombuffer(data, '>d')
+        if len(ret) != n_items:
+            raise EOFError
+        return ret
+
+
+class InvalidMagicException(Exception):
+    pass
+
+
+class InvalidIndexException(Exception):
+    pass
+
+
+class UnknownLenError(Exception):
+    pass
+
+
+class SubscriptableReader:
+
+    def __init__(self, fd):
+        self.fd = fd
+        self.len = os.stat(self.fd.fileno()).st_size
+
+    def __getitem__(self, r):
+        if isinstance(r, slice):
+            if r.start is not None:
+                if r.start < 0:
+                    self.fd.seek(r.start, io.SEEK_END)
+                else:
+                    self.fd.seek(r.start, io.SEEK_SET)
+
+            if r.step is not None:
+                raise NotImplementedError
+
+            return self.fd.read(r.stop - r.start)
+        else:
+            self.fd.seek(r, io.SEEK_SET)
+            return self.fd.read(1)
+
+    def __len__(self):
+        return self.len
+
+
+class XTCFrame:
+    __slots__ = ['_coordinates', 'index', 'time', 'box', '_compressed_coordinates']
+
+    def __init__(self):
+        self._coordinates = None
+
+    @property
+    def coordinates(self):
+        if self._coordinates is None:
+            self._coordinates = self._compressed_coordinates()
+        return self._coordinates
+
+
+class BaseReader:
+
+    def __init__(self, file):
+        self.fd = open(file, 'rb')
+        self.filename = file
+
+        self.reader = SubscriptableReader(self.fd)
+        self.xdr_reader = NumpyUnpacker(self.reader)
+
+    def __exit__(self, *_):
+        self.fd.close()
+
+    def __enter__(self, *_):
+        return self
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        del state['fd']
+        del state['reader']
+        del state['xdr_reader']
+        state['filepos'] = self.get_position()
+        return state
+
+    def __setstate__(self, state):
+        filepos = state.pop('filepos')
+        self.__dict__.update(state)
+        self.fd = open(self.filename, 'rb')
+        self.reader = SubscriptableReader(self.fd)
+        self.xdr_reader = NumpyUnpacker(self.reader)
+        self.set_position(filepos)
+
+    def get_position(self):
+        return self.fd.tell()
+
+    def set_position(self, pos):
+        return self.fd.seek(pos)
+
+    def skip_frames(self, num):
+        for i in range(num):
+            self.skip_frame()
+
+    def skip_frame(self):
+        self._read_header()
+        self._read_frame()
+
+    def dump_frame(self):
+        raise NotImplemented
+
+    def _read_header(self):
+        raise NotImplemented
+
+    def _read_frame(self):
+        raise NotImplemented
+
+XTC_HEADER_SIZE = 4 * 4
+XTC_FRAME_HEADER_SIZE = 9 * 4 + 10 * 4
+
+
+class XTCReader(BaseReader):
+
+    len_available = False
+
+    def __init__(self, xtcfile, load_cache_file=True):
+        super().__init__(xtcfile)
+        self._cache = [0]
+        self._times = None
+        self.current_frame = 0
+
+        index_file_name = index_filename_for_xtc(xtcfile)
+        if load_cache_file:
+            self.load_index(index_file_name)
+
+    def load_index(self, filename):
+        xtc_stat = os.stat(self.filename)
+        c_time = int(xtc_stat.st_ctime)
+        m_time = int(xtc_stat.st_mtime)
+        size = xtc_stat.st_size
+
+        with open(filename, 'rb') as index_file_fd:
+            # TODO: Is NumpyUnpacker even necessary at this point?
+            #       Seems like xdrlib.Unpacker would be sufficient here ...
+            reader = NumpyUnpacker(SubscriptableReader(index_file_fd))
+
+            if reader.unpack_hyper() != INDEX_MAGIC:
+                raise InvalidMagicException
+            if reader.unpack_hyper() != c_time:
+                raise InvalidIndexException
+            if reader.unpack_hyper() != m_time:
+                raise InvalidIndexException
+            if reader.unpack_hyper() != size:
+                raise InvalidIndexException
+
+            self._cache = []
+            self._times = []
+            try:
+                while True:
+                    self._cache.append(reader.unpack_hyper())
+                    self._times.append(reader.unpack_float())
+            except EOFError:
+                pass
+        self.len_available = True
+
+    def _raw_header(self):
+        if len(self._cache) == self.current_frame:
+            self._cache.append(self.get_position())
+        return self.fd.read(XTC_HEADER_SIZE)
+
+    def _raw_frame(self):
+        frame_header = self.fd.read(XTC_FRAME_HEADER_SIZE)
+        blob_size = xdrlib.Unpacker(frame_header[-4:]).unpack_int()
+        blob_size = (blob_size + 3) // 4 * 4   # Padding to 4 bytes
+        frame_blob = self.fd.read(blob_size)
+
+        self.current_frame += 1
+        return frame_header + frame_blob
+
+    def _unpack_header(self, raw_header):
+        unpacker = xdrlib.Unpacker(raw_header)
+
+        magic = unpacker.unpack_int()
+
+        if magic != XTC_MAGIC:
+            raise InvalidMagicException
+
+        n_atoms = unpacker.unpack_int()
+        step = unpacker.unpack_int()
+        time = unpacker.unpack_float()
+
+        return n_atoms, step, time
+
+    def _read_header(self):
+        raw_header = self._raw_header()
+        return self._unpack_header(raw_header)
+
+    def _unpack_frame(self, raw_frame):
+        unpacker = xdrlib.Unpacker(raw_frame)
+
+        raw_box = unpacker.unpack_farray(9, unpacker.unpack_float)
+        box = np.array(raw_box).reshape(3, 3)
+        num_coords = unpacker.unpack_int()
+        precision = unpacker.unpack_float()
+        maxint = unpacker.unpack_farray(3, unpacker.unpack_int)
+        minint = unpacker.unpack_farray(3, unpacker.unpack_int)
+        smallindex = unpacker.unpack_int()
+
+        blob_len = unpacker.unpack_int()
+        blob = unpacker.unpack_fopaque(blob_len)
+
+        return box, precision, num_coords, minint, maxint, smallindex, blob
+
+    def _read_frame(self):
+        raw_frame = self._raw_frame()
+        return self._unpack_frame(raw_frame)
+
+    def decode_frame(self, header=None, body=None):
+        n_atoms, step, time = header or self._read_header()
+        box, precision, num_coords, minint, maxint, smallindex, blob = body or self._read_frame()
+        coordinates = partial(decompress, num_coords, precision, minint, maxint, smallindex, blob)
+
+        frame = XTCFrame()
+        frame._compressed_coordinates = coordinates
+        frame.index = step
+        frame.time = time
+        frame.box = box
+        return frame
+
+    def dump_frame(self):
+        """
+        :return: Tuple: The binary data of the frame, the frame itself
+        """
+
+        raw_header = self._raw_header()
+        header = self._unpack_header(raw_header)
+
+        raw_body = self._raw_frame()
+        body = self._unpack_frame(raw_body)
+
+        return (raw_header + raw_body, self.decode_frame(header, body))
+
+    def cached_position(self, item):
+        try:
+            return self._cache[item]
+        except IndexError:
+            return None
+
+    def __getitem__(self, item):
+        position = self.cached_position(item)
+
+        if position is not None:
+            self.set_position(position)
+            self.current_frame = item
+            return self.decode_frame()
+        # TODO: Use elif and one single return at the end
+        # Also: skip_frames is in fact not implemented at all!
+        # TODO: Catch EOFError and raise IndexError
+        if self.current_frame <= item:
+            self.skip_frames(item - self.current_frame)
+            return self.decode_frame()
+        else:
+            self.set_position(0)
+            self.current_frame = 0
+            self.skip_frames(item)
+            return self.decode_frame()
+
+    def __len__(self):
+        if self.len_available:
+            return len(self._cache)
+        raise UnknownLenError
+
+    def times_of_indices(self, indices):
+        return [self[i].time for i in indices]
+
+    def __hash__(self):
+        return merge_hashes(hash_anything(self.filename), hash_anything(self._cache))
+
+
+class TRRHeader:
+    __slots__ = ['ir_size', 'e_size', 'box_size', 'vir_size', 'pres_size', 'top_size', 'sym_size', 'x_size', 'v_size',
+                 'f_size', 'n_atoms', 'step', 'nre', 't', '_lambda', 'is_double']
+
+    @property
+    def frame_size(self):
+        return self.ir_size + \
+            self.e_size + \
+            self.box_size + \
+            self.vir_size + \
+            self.top_size + \
+            self.sym_size + \
+            self.x_size + \
+            self.v_size + \
+            self.f_size
+
+
+class TRRFrame:
+    __slots__ = ['header', 'box', 'x', 'v', 'f']
+
+
+class TRRReader(BaseReader):
+
+    def __iter__(self):
+        return TRRIterator(self.filename)
+
+    def _unpack_float(self, is_double):
+        if is_double:
+            return self.xdr_reader.unpack_double()
+        else:
+            return self.xdr_reader.unpack_float()
+
+    def _unpack_np_float(self, is_double, *dim):
+        total = np.product(dim)
+
+        if is_double:
+            box = self.xdr_reader.unpack_double_array(total)
+        else:
+            box = self.xdr_reader.unpack_float_array(total)
+
+        box = box.reshape(*dim)
+        return box
+
+    def _read_header(self):
+
+        data = self.fd.read(8)  # 4 Magic + 4 VersionStringLen
+        unpacker = NumpyUnpacker(data)
+
+        magic = unpacker.unpack_int()
+
+        if magic != TRR_MAGIC:
+            raise InvalidMagicException
+
+        version_string_len = unpacker.unpack_int()
+        data = self.fd.read(version_string_len + 55)  # 4 Magic + 4 VersionStringLen
+        unpacker.reset(data)
+
+        version = unpacker.unpack_string()
+        assert version == b'GMX_trn_file'
+
+        header = TRRHeader()
+
+        header.ir_size = unpacker.unpack_int()
+        header.e_size = unpacker.unpack_int()
+        header.box_size = unpacker.unpack_int()
+        header.vir_size = unpacker.unpack_int()
+        header.pres_size = unpacker.unpack_int()
+        header.top_size = unpacker.unpack_int()
+        header.sym_size = unpacker.unpack_int()
+        header.x_size = unpacker.unpack_int()
+        header.v_size = unpacker.unpack_int()
+        header.f_size = unpacker.unpack_int()
+        header.n_atoms = unpacker.unpack_int()
+        header.step = unpacker.unpack_int()
+        header.nre = unpacker.unpack_int()
+
+        if header.x_size is not None:
+            header.is_double = (header.x_size // header.n_atoms) == 8
+        elif header.v_size is not None:
+            header.is_double = (header.v_size // header.n_atoms) == 8
+        elif header.f_size is not None:
+            header.is_double = (header.f_size // header.n_atoms) == 8
+
+        if header.is_double:
+            data = self.fd.read(16)
+        else:
+            data = self.fd.read(8)
+        unpacker.reset(data)
+        self.xdr_reader = unpacker
+
+        header.t = self._unpack_float(header.is_double)
+        header._lambda = self._unpack_float(header.is_double)
+
+        return header
+
+    def _read_frame(self, header):
+        frame = TRRFrame()
+        frame.header = header
+
+        data = self.fd.read(header.frame_size)
+        self.xdr_reader = NumpyUnpacker(data)
+
+        frame.box = None
+        if header.box_size:
+            frame.box = self._unpack_np_float(header.is_double, 3, 3)
+
+        if header.vir_size:
+            for i in range(9):
+                self._unpack_float(header.is_double)
+
+        if header.pres_size:
+            for i in range(9):
+                self._unpack_float(header.is_double)
+
+        frame.x = None
+        if header.x_size:
+            frame.x = self._unpack_np_float(header.is_double, header.n_atoms, 3)
+
+        frame.v = None
+        if header.v_size:
+            frame.v = self._unpack_np_float(header.is_double, header.n_atoms, 3)
+
+        frame.f = None
+        if header.f_size:
+            frame.f = self._unpack_np_float(header.is_double, header.n_atoms, 3)
+
+        return frame
+
+    def decode_frame(self):
+        h = self._read_header()
+        return self._read_frame(h)
+
+    def get_position(self):
+        return self.fd.tell()
+
+    def set_position(self, pos):
+        self.fd.seek(pos)
+
+
+class TRRIterator():
+
+    def __init__(self, file):
+        self.__reader = TRRReader(file)
+        self.__reader.__enter__()
+
+    def __next__(self):
+        try:
+            return self.__reader.decode_frame()
+        except EOFError as err:
+            self.__reader.__exit__()
+            raise StopIteration
+
+    def __iter__(self):
+        return self
--- a/pygmx/gromacs/xtcdemux.py
+++ b/pygmx/gromacs/xtcdemux.py
@@ -0,0 +1,71 @@
+import sys
+import os
+from .logarithmic import is_log_step
+from .reader import XTCReader
+
+def next_log_step(step, per_decade):
+    step += 1
+    while not is_log_step(step, per_decade): step += 1
+    return step
+
+def next_lin_step(step, frequency):
+    return step+frequency
+
+class LogWriter:
+    current_offset = 0
+    log_step = 0
+    fd = None
+
+    def __init__(self, file, offset, log_freq):
+        self.fd = open(file,'wb')
+        self.offset = offset
+        self.log_freq = log_freq
+
+    def consume_frame(self, step, raw_frame):
+        if step > self.log_step+self.offset:
+            raise Exception("Missing frame {}".format(step))
+
+        if step == self.log_step+self.offset:
+            self.fd.write(raw_frame)
+            self.log_step = next_log_step(self.log_step, self.log_freq)
+
+
+def main():
+    filename = sys.argv[1]
+    reader = XTCReader(filename, load_cache_file=False)
+    lin_freq = int(sys.argv[2])
+    log_freq = int(sys.argv[3])
+    log_restart = int(sys.argv[4])
+
+    base,ext = os.path.splitext(filename)
+
+    lin_file = base + '.lin' + ext
+    lin_step = 0
+
+    log_writers = []
+
+    with open(lin_file,'wb') as lin_fd:
+        try:
+            while True:
+                raw_frame, frame = reader.dump_frame()
+
+                if frame.index % log_restart == 0:
+                    print('Starting new log band: {}'.format(frame.index))
+                    log_filename = '{}.{}.log{}'.format(base, len(log_writers), ext)
+                    log_writers.append(LogWriter(log_filename, frame.index, log_freq))
+
+                if frame.index > lin_step:
+                    raise Exception("Missing frames")
+
+                if frame.index == lin_step:
+                    lin_fd.write(raw_frame)
+                    lin_step = next_lin_step(lin_step, lin_freq)
+
+                for writer in log_writers:
+                    writer.consume_frame(frame.index, raw_frame)
+
+        except EOFError:
+            pass
+
+if __name__ == '__main__':
+    main()
--- a/pygmx/gromacs/xtcindex.py
+++ b/pygmx/gromacs/xtcindex.py
@@ -0,0 +1,63 @@
+from .reader import XTCReader, index_filename_for_xtc, INDEX_MAGIC
+from xdrlib import Packer
+import os
+import sys
+import logging
+
+
+def index_xtcfile(filename):
+    """
+    Write an index-file for a xtc-file.
+
+    The resulting file will have the same name, except for the new extension .xtcindex.
+    This function has to be called only once for every xtcfile.
+
+    Args:
+        filename (str): Name of the xtc-file
+
+    """
+    # TODO: Would it be possible, to index files on the fly?
+    #       How long does indexing take?
+    index_filename = index_filename_for_xtc(filename)
+    print('Writing index file: {}'.format(index_filename))
+    packer = Packer()
+    xtc_stat = os.stat(filename)
+    c_time = int(xtc_stat.st_ctime)
+    m_time = int(xtc_stat.st_mtime)
+    size = xtc_stat.st_size
+
+    with XTCReader(filename, load_cache_file=False) as reader, open(index_filename, 'wb') as idx_fd:
+        packer.pack_hyper(INDEX_MAGIC)
+        # TODO Maybe store only the hashsum of the file for identification?
+        packer.pack_hyper(c_time)
+        packer.pack_hyper(m_time)
+        packer.pack_hyper(size)
+        count = 0
+        try:
+            while True:
+                if count % 300 == 0:
+                    print("Frame {}".format(count), end="\r")
+                position = reader.get_position()
+                frame = reader.decode_frame()
+                packer.pack_hyper(position)
+                packer.pack_float(frame.time)
+
+                count += 1
+
+        except EOFError:
+            pass
+        idx_fd.write(packer.get_buffer())
+        packer.reset()
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage {} xtc-file [xtc-file]".format(sys.argv[0]))
+        return -1
+
+    for filename in sys.argv[1:]:
+        index_xtcfile(filename)
+
+
+
+if __name__ == '__main__':
+    sys.exit(main())