import ast import pandas as pd import os SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) IN_DIR = os.path.join(SCRIPT_DIR, "tensorflow") OUT_FILE = os.path.join(SCRIPT_DIR, "data.csv") def find_py_files(dir): for (cwd, dirs, files) in os.walk(dir): for file in files: if file.endswith(".py"): yield os.path.join(cwd, file) def keep_name(name): return not name.startswith("_") and not "main" in str(name).lower() and \ "test" not in str(name).lower() class FeatureVisitor(ast.NodeVisitor): def __init__(self, filename): self.filename = os.path.relpath(filename, SCRIPT_DIR) self.rows = [] def visit_FunctionDef(self, node): if keep_name(node.name): self.rows.append({ "name": node.name, "file": self.filename, "line": node.lineno, "type": "function", "comment": ast.get_docstring(node) }) def visit_ClassDef(self, node): if keep_name(node.name): self.rows.append({ "name": node.name, "file": self.filename, "line": node.lineno, "type": "class", "comment": ast.get_docstring(node) }) for nd in ast.walk(node): if isinstance(nd, ast.FunctionDef): if keep_name(nd.name): self.rows.append({ "name": nd.name, "file": self.filename, "line": nd.lineno, "type": "method", "comment": ast.get_docstring(nd) }) def main(): df = pd.DataFrame(columns=["name", "file", "line", "type", "comment"]) for file in find_py_files(IN_DIR): with open(file, "r") as f: py_source = f.read() py_ast = ast.parse(py_source) visitor = FeatureVisitor(file) visitor.visit(py_ast) df_visitor = pd.DataFrame.from_records(visitor.rows) df = pd.concat([df, df_visitor]) df.reset_index(drop=True).to_csv(OUT_FILE) if __name__ == "__main__": main()