import ast
import pandas as pd
import os

SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
IN_DIR = os.path.join(SCRIPT_DIR, "tensorflow")
OUT_FILE = os.path.join(SCRIPT_DIR, "data.csv")


def find_py_files(dir):
    for (cwd, dirs, files) in os.walk(dir):
        for file in files:
            if file.endswith(".py"):
                yield os.path.join(cwd, file)


def keep_name(name):
    return not name.startswith("_") and not "main" in str(name).lower() and \
        "test" not in str(name).lower()


class FeatureVisitor(ast.NodeVisitor):

    def __init__(self, filename):
        self.filename = os.path.relpath(filename, SCRIPT_DIR)
        self.rows = []

    def visit_FunctionDef(self, node):
        if keep_name(node.name):
            self.rows.append({
                "name": node.name, 
                "file": self.filename,
                "line": node.lineno,
                "type": "function",
                "comment": ast.get_docstring(node)
            })

    def visit_ClassDef(self, node):
        if keep_name(node.name):
            self.rows.append({
                "name": node.name,
                "file": self.filename,
                "line": node.lineno,
                "type": "class",
                "comment": ast.get_docstring(node)
            })
            for nd in ast.walk(node):
                if isinstance(nd, ast.FunctionDef):
                    if keep_name(nd.name):
                        self.rows.append({
                            "name": nd.name,
                            "file": self.filename,
                            "line": nd.lineno,
                            "type": "method",
                            "comment": ast.get_docstring(nd)
                        })


def main():
    df = pd.DataFrame(columns=["name", "file", "line", "type", "comment"])
    
    for file in find_py_files(IN_DIR):
        with open(file, "r") as f:
            py_source = f.read()
       
        py_ast = ast.parse(py_source)

        visitor = FeatureVisitor(file)
        visitor.visit(py_ast)
        df_visitor = pd.DataFrame.from_records(visitor.rows)
        df = pd.concat([df, df_visitor])

    df.reset_index(drop=True).to_csv(OUT_FILE)


if __name__ == "__main__":
    main()