extended checksum.strip_comments function to work with prefixed docstrings and other small features

This commit is contained in:
robrobo
2025-06-16 22:15:22 +02:00
parent a2a0ae8d7b
commit 6d8b86c1ef

View File

@ -4,6 +4,10 @@ from .logging_util import logger
from types import ModuleType, FunctionType from types import ModuleType, FunctionType
import inspect import inspect
from typing import Iterable from typing import Iterable
import ast
import io
import tokenize
import re
import numpy as np import numpy as np
@ -28,16 +32,43 @@ def version(version_nr: int, calls: Iterable = ()):
return decorator return decorator
def strip_comments(s: str): def strip_comments(source: str) -> str:
"""Strips comment lines and docstring from Python source string.""" """Removes docstrings, comments, and irrelevant whitespace from Python source code."""
o = ""
in_docstring = False # Step 1: Remove docstrings using AST
for l in s.split("\n"): def remove_docstrings(node):
if l.strip().startswith(("#", '"', "'")) or in_docstring: if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1 if (doc := ast.get_docstring(node, clean=False)):
first_stmt = node.body[0]
if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant):
node.body.pop(0) # Remove the docstring entirely
for child in ast.iter_child_nodes(node):
remove_docstrings(child)
tree = ast.parse(source)
remove_docstrings(tree)
code_without_docstrings = ast.unparse(tree)
# Step 2: Remove comments using tokenize
tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline)
result = []
last_lineno = -1
last_col = 0
for toknum, tokval, (srow, scol), (erow, ecol), line in tokens:
if toknum == tokenize.COMMENT:
continue continue
o += l + "\n" if srow > last_lineno:
return o last_col = 0
if scol > last_col:
result.append(" " * (scol - last_col))
result.append(tokval)
last_lineno, last_col = erow, ecol
code_no_comments = ''.join(result)
# Step 3: Remove empty lines (whitespace-only or truly blank)
return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""])
def checksum(*args, csum=None): def checksum(*args, csum=None):