import ast import pandas as pd import os SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) IN_DIR = os.path.join(SCRIPT_DIR, "tensorflow") OUT_FILE = os.path.join(SCRIPT_DIR, "data.csv") def find_py_files(dir): for (cwd, dirs, files) in os.walk(dir): for file in files: if file.endswith(".py"): yield os.path.join(cwd, file) class FeatureVisitor(ast.NodeVisitor): def __init__(self, filename): self.filename = os.path.relpath(filename, SCRIPT_DIR) self.rows = [] def visit_FunctionDef(self, node): self.rows.append({ "name": node.name, "file": self.filename, "line": node.lineno, "type": "function", "comment": ast.get_docstring(node) }) def visit_MethodDef(self, node): self.rows.append({ "name": node.name, "file": self.filename, "line": node.lineno, "type": "method", "comment": ast.get_docstring(node) }) def visit_ClassDef(self, node): self.rows.append({ "name": node.name, "file": self.filename, "line": node.lineno, "type": "class", "comment": ast.get_docstring(node) }) def main(): df = pd.DataFrame(columns=["name", "file", "line", "type", "comment"]) for file in find_py_files(IN_DIR): with open(file, "r") as f: py_source = f.read() py_ast = ast.parse(py_source) visitor = FeatureVisitor(file) visitor.visit(py_ast) df_visitor = pd.DataFrame.from_records(visitor.rows) df = pd.concat([df, df_visitor]) df.reset_index(drop=True).to_csv(OUT_FILE) if __name__ == "__main__": main()