36 lines
1.3 KiB
Python
36 lines
1.3 KiB
Python
def remove_docstrings_and_comments_from_code(code, parser):
|
|
# Parse the code
|
|
tree = parser.parse(bytes(code, "utf8"))
|
|
cursor = tree.walk()
|
|
|
|
# Traverse the tree and collect all docstrings
|
|
to_remove = []
|
|
|
|
def traverse_tree(cursor, prev_node_type=None):
|
|
node_type = cursor.node.type
|
|
node_text = cursor.node.text.decode("utf-8")
|
|
# Check if the current node is a function or class definition
|
|
if node_type == "string" and node_text.startswith('"""') and node_text.endswith('"""') and prev_node_type == "expression_statement":
|
|
to_remove.append((cursor.node.start_byte, cursor.node.end_byte))
|
|
if cursor.node.type == "comment":
|
|
to_remove.append((cursor.node.start_byte, cursor.node.end_byte))
|
|
|
|
# Traverse children
|
|
if cursor.goto_first_child():
|
|
while True:
|
|
traverse_tree(cursor, node_type)
|
|
if not cursor.goto_next_sibling():
|
|
break
|
|
cursor.goto_parent()
|
|
|
|
return node_type
|
|
|
|
# Start traversing from the root
|
|
traverse_tree(cursor)
|
|
|
|
# Remove docstrings from code
|
|
code_without_docstrings = code
|
|
for start, end in sorted(to_remove, reverse=True):
|
|
code_without_docstrings = code_without_docstrings[:start] + code_without_docstrings[end:]
|
|
|
|
return code_without_docstrings |