extended checksum.strip_comments function to work with prefixed docstrings and other small features
This commit is contained in:
@ -4,6 +4,10 @@ from .logging_util import logger
|
||||
from types import ModuleType, FunctionType
|
||||
import inspect
|
||||
from typing import Iterable
|
||||
import ast
|
||||
import io
|
||||
import tokenize
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -28,16 +32,43 @@ def version(version_nr: int, calls: Iterable = ()):
|
||||
return decorator
|
||||
|
||||
|
||||
def strip_comments(s: str):
|
||||
"""Strips comment lines and docstring from Python source string."""
|
||||
o = ""
|
||||
in_docstring = False
|
||||
for l in s.split("\n"):
|
||||
if l.strip().startswith(("#", '"', "'")) or in_docstring:
|
||||
in_docstring = l.strip().startswith(('"""', "'''")) + in_docstring == 1
|
||||
def strip_comments(source: str) -> str:
|
||||
"""Removes docstrings, comments, and irrelevant whitespace from Python source code."""
|
||||
|
||||
# Step 1: Remove docstrings using AST
|
||||
def remove_docstrings(node):
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
|
||||
if (doc := ast.get_docstring(node, clean=False)):
|
||||
first_stmt = node.body[0]
|
||||
if isinstance(first_stmt, ast.Expr) and isinstance(first_stmt.value, ast.Constant):
|
||||
node.body.pop(0) # Remove the docstring entirely
|
||||
for child in ast.iter_child_nodes(node):
|
||||
remove_docstrings(child)
|
||||
|
||||
tree = ast.parse(source)
|
||||
remove_docstrings(tree)
|
||||
code_without_docstrings = ast.unparse(tree)
|
||||
|
||||
# Step 2: Remove comments using tokenize
|
||||
tokens = tokenize.generate_tokens(io.StringIO(code_without_docstrings).readline)
|
||||
result = []
|
||||
last_lineno = -1
|
||||
last_col = 0
|
||||
|
||||
for toknum, tokval, (srow, scol), (erow, ecol), line in tokens:
|
||||
if toknum == tokenize.COMMENT:
|
||||
continue
|
||||
o += l + "\n"
|
||||
return o
|
||||
if srow > last_lineno:
|
||||
last_col = 0
|
||||
if scol > last_col:
|
||||
result.append(" " * (scol - last_col))
|
||||
result.append(tokval)
|
||||
last_lineno, last_col = erow, ecol
|
||||
|
||||
code_no_comments = ''.join(result)
|
||||
|
||||
# Step 3: Remove empty lines (whitespace-only or truly blank)
|
||||
return "\n".join([line for line in code_no_comments.splitlines() if line.strip() != ""])
|
||||
|
||||
|
||||
def checksum(*args, csum=None):
|
||||
|
Reference in New Issue
Block a user