extended checksum.strip_comments function to work with prefixed docstrings and other small features

This commit is contained in:
robrobo
2025-06-16 22:15:22 +02:00
parent a2a0ae8d7b
commit 6d8b86c1ef

View File

@ -4,6 +4,10 @@ from .logging_util import logger
from types import ModuleType, FunctionType
import inspect
from typing import Iterable
import ast
import io
import tokenize
import re
import numpy as np
@ -28,16 +32,43 @@ def version(version_nr: int, calls: Iterable = ()):
return decorator
def strip_comments(s: str):
"""Strips comment lines and docstring from Python source string."""
o = ""
in_docstring = False
for l in s.split("\n"):
if l.strip().startswith(("#", '"', "'")) or in_docstring:
in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1
def strip_comments(source: str) -> str:
"""Removes docstrings, comments, and irrelevant whitespace from Python source code."""
# Step 1: Remove docstrings using AST
def remove_docstrings(node):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
if (doc := ast.get_docstring(node, clean=False)):
first_stmt = node.body[0]
if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant):
node.body.pop(0) # Remove the docstring entirely
for child in ast.iter_child_nodes(node):
remove_docstrings(child)
tree = ast.parse(source)
remove_docstrings(tree)
code_without_docstrings = ast.unparse(tree)
# Step 2: Remove comments using tokenize
tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline)
result = []
last_lineno = -1
last_col = 0
for toknum, tokval, (srow, scol), (erow, ecol), line in tokens:
if toknum == tokenize.COMMENT:
continue
o += l + "\n"
return o
if srow > last_lineno:
last_col = 0
if scol > last_col:
result.append(" " * (scol - last_col))
result.append(tokval)
last_lineno, last_col = erow, ecol
code_no_comments = ''.join(result)
# Step 3: Remove empty lines (whitespace-only or truly blank)
return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""])
def checksum(*args, csum=None):