msc-patryk-bartkowiak/code/utils.py

36 lines
1.3 KiB
Python

def remove_docstrings_and_comments_from_code(code, parser):
# Parse the code
tree = parser.parse(bytes(code, "utf8"))
cursor = tree.walk()
# Traverse the tree and collect all docstrings
to_remove = []
def traverse_tree(cursor, prev_node_type=None):
node_type = cursor.node.type
node_text = cursor.node.text.decode("utf-8")
# Check if the current node is a function or class definition
if node_type == "string" and node_text.startswith('"""') and node_text.endswith('"""') and prev_node_type == "expression_statement":
to_remove.append((cursor.node.start_byte, cursor.node.end_byte))
if cursor.node.type == "comment":
to_remove.append((cursor.node.start_byte, cursor.node.end_byte))
# Traverse children
if cursor.goto_first_child():
while True:
traverse_tree(cursor, node_type)
if not cursor.goto_next_sibling():
break
cursor.goto_parent()
return node_type
# Start traversing from the root
traverse_tree(cursor)
# Remove docstrings from code
code_without_docstrings = code
for start, end in sorted(to_remove, reverse=True):
code_without_docstrings = code_without_docstrings[:start] + code_without_docstrings[end:]
return code_without_docstrings