part 1 almost done (no filtering on filenames / identifiers)
This commit is contained in:
parent
1122cdd8b0
commit
57cf6164f4
3 changed files with 116359 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
env/
|
69
extract-data.py
Normal file
69
extract-data.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import ast
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
IN_DIR = os.path.join(SCRIPT_DIR, "tensorflow")
|
||||||
|
OUT_FILE = os.path.join(SCRIPT_DIR, "data.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def find_py_files(dir):
|
||||||
|
for (cwd, dirs, files) in os.walk(dir):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(".py"):
|
||||||
|
yield os.path.join(cwd, file)
|
||||||
|
|
||||||
|
|
||||||
|
class FeatureVisitor(ast.NodeVisitor):
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.filename = os.path.relpath(filename, SCRIPT_DIR)
|
||||||
|
self.rows = []
|
||||||
|
|
||||||
|
def visit_FunctionDef(self, node):
|
||||||
|
self.rows.append({
|
||||||
|
"name": node.name,
|
||||||
|
"file": self.filename,
|
||||||
|
"line": node.lineno,
|
||||||
|
"type": "function",
|
||||||
|
"comment": ast.get_docstring(node)
|
||||||
|
})
|
||||||
|
|
||||||
|
def visit_MethodDef(self, node):
|
||||||
|
self.rows.append({
|
||||||
|
"name": node.name,
|
||||||
|
"file": self.filename,
|
||||||
|
"line": node.lineno,
|
||||||
|
"type": "method",
|
||||||
|
"comment": ast.get_docstring(node)
|
||||||
|
})
|
||||||
|
|
||||||
|
def visit_ClassDef(self, node):
|
||||||
|
self.rows.append({
|
||||||
|
"name": node.name,
|
||||||
|
"file": self.filename,
|
||||||
|
"line": node.lineno,
|
||||||
|
"type": "class",
|
||||||
|
"comment": ast.get_docstring(node)
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = pd.DataFrame(columns=["name", "file", "line", "type", "comment"])
|
||||||
|
|
||||||
|
for file in find_py_files(IN_DIR):
|
||||||
|
with open(file, "r") as f:
|
||||||
|
py_source = f.read()
|
||||||
|
|
||||||
|
py_ast = ast.parse(py_source)
|
||||||
|
|
||||||
|
visitor = FeatureVisitor(file)
|
||||||
|
visitor.visit(py_ast)
|
||||||
|
df_visitor = pd.DataFrame.from_records(visitor.rows)
|
||||||
|
df = pd.concat([df, df_visitor])
|
||||||
|
|
||||||
|
df.reset_index(drop=True).to_csv(OUT_FILE)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in a new issue