diff --git a/src/mdevaluate/__init__.py b/src/mdevaluate/__init__.py index 921a6cd..bff185d 100644 --- a/src/mdevaluate/__init__.py +++ b/src/mdevaluate/__init__.py @@ -16,7 +16,7 @@ from . import reader from . import system from . import utils from . import extra -from .logging import logger +from .logging_util import logger def open( diff --git a/src/mdevaluate/autosave.py b/src/mdevaluate/autosave.py index 9195eef..3026e6b 100644 --- a/src/mdevaluate/autosave.py +++ b/src/mdevaluate/autosave.py @@ -5,7 +5,7 @@ from typing import Optional, Callable, Iterable import numpy as np from .checksum import checksum -from .logging import logger +from .logging_util import logger autosave_directory: Optional[str] = None load_autosave_data = False diff --git a/src/mdevaluate/checksum.py b/src/mdevaluate/checksum.py index f651987..fa6d85f 100755 --- a/src/mdevaluate/checksum.py +++ b/src/mdevaluate/checksum.py @@ -1,9 +1,13 @@ import functools import hashlib -from .logging import logger +from .logging_util import logger from types import ModuleType, FunctionType import inspect from typing import Iterable +import ast +import io +import tokenize +import re import numpy as np @@ -28,16 +32,43 @@ def version(version_nr: int, calls: Iterable = ()): return decorator -def strip_comments(s: str): - """Strips comment lines and docstring from Python source string.""" - o = "" - in_docstring = False - for l in s.split("\n"): - if l.strip().startswith(("#", '"', "'")) or in_docstring: - in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1 +def strip_comments(source: str) -> str: + """Removes docstrings, comments, and irrelevant whitespace from Python source code.""" + + # Step 1: Remove docstrings using AST + def remove_docstrings(node): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)): + if (doc := ast.get_docstring(node, clean=False)): + first_stmt = node.body[0] + if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant): + node.body.pop(0) # Remove the docstring entirely + for child in ast.iter_child_nodes(node): + remove_docstrings(child) + + tree = ast.parse(source) + remove_docstrings(tree) + code_without_docstrings = ast.unparse(tree) + + # Step 2: Remove comments using tokenize + tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline) + result = [] + last_lineno = -1 + last_col = 0 + + for toknum, tokval, (srow, scol), (erow, ecol), line in tokens: + if toknum == tokenize.COMMENT: continue - o += l + "\n" - return o + if srow > last_lineno: + last_col = 0 + if scol > last_col: + result.append(" " * (scol - last_col)) + result.append(tokval) + last_lineno, last_col = erow, ecol + + code_no_comments = ''.join(result) + + # Step 3: Remove empty lines (whitespace-only or truly blank) + return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""]) def checksum(*args, csum=None): diff --git a/src/mdevaluate/coordinates.py b/src/mdevaluate/coordinates.py index d3f3c60..ae683f4 100755 --- a/src/mdevaluate/coordinates.py +++ b/src/mdevaluate/coordinates.py @@ -1,6 +1,6 @@ from functools import partial, wraps from copy import copy -from .logging import logger +from .logging_util import logger from typing import Optional, Callable, List, Tuple import numpy as np diff --git a/src/mdevaluate/correlation.py b/src/mdevaluate/correlation.py index 7e89b3d..7bcd3be 100644 --- a/src/mdevaluate/correlation.py +++ b/src/mdevaluate/correlation.py @@ -431,9 +431,9 @@ def non_gaussian_parameter( trajectory: Coordinates = None, axis: str = "all", ) -> float: - """ + r""" Calculate the non-Gaussian parameter. - ..math: + .. math: \alpha_2 (t) = \frac{3}{5}\frac{\langle r_i^4(t)\rangle}{\langle r_i^2(t)\rangle^2} - 1 """ diff --git a/src/mdevaluate/logging.py b/src/mdevaluate/logging_util.py similarity index 100% rename from src/mdevaluate/logging.py rename to src/mdevaluate/logging_util.py diff --git a/src/mdevaluate/pbc.py b/src/mdevaluate/pbc.py index b44f083..56a5249 100644 --- a/src/mdevaluate/pbc.py +++ b/src/mdevaluate/pbc.py @@ -7,7 +7,7 @@ from numpy.typing import ArrayLike, NDArray from itertools import product -from .logging import logger +from .logging_util import logger if TYPE_CHECKING: from mdevaluate.coordinates import CoordinateFrame diff --git a/src/mdevaluate/reader.py b/src/mdevaluate/reader.py index 46a2b41..91e2a5a 100755 --- a/src/mdevaluate/reader.py +++ b/src/mdevaluate/reader.py @@ -19,13 +19,13 @@ import MDAnalysis from scipy import sparse from .checksum import checksum -from .logging import logger +from .logging_util import logger from . import atoms from .coordinates import Coordinates CSR_ATTRS = ("data", "indices", "indptr") NOJUMP_MAGIC = 2016 -Group_RE = re.compile("\[ ([-+\w]+) \]") +Group_RE = re.compile(r"\[ ([-+\w]+) \]") class NojumpError(Exception): diff --git a/src/mdevaluate/utils.py b/src/mdevaluate/utils.py index cf21285..954590f 100644 --- a/src/mdevaluate/utils.py +++ b/src/mdevaluate/utils.py @@ -14,7 +14,7 @@ from scipy.ndimage import uniform_filter1d from scipy.interpolate import interp1d from scipy.optimize import curve_fit -from .logging import logger +from .logging_util import logger from .functions import kww, kww_1e