wip part 2

This commit is contained in:
Claudio Maggioni 2023-10-11 13:59:07 +02:00
parent 57cf6164f4
commit d964c138a6
3 changed files with 34380 additions and 24819 deletions

59121
data.csv

File diff suppressed because it is too large Load diff

View file

@ -14,6 +14,11 @@ def find_py_files(dir):
yield os.path.join(cwd, file) yield os.path.join(cwd, file)
def keep_name(name):
return not name.startswith("_") and not "main" in str(name).lower() and \
"test" not in str(name).lower()
class FeatureVisitor(ast.NodeVisitor): class FeatureVisitor(ast.NodeVisitor):
def __init__(self, filename): def __init__(self, filename):
@ -21,31 +26,35 @@ class FeatureVisitor(ast.NodeVisitor):
self.rows = [] self.rows = []
def visit_FunctionDef(self, node): def visit_FunctionDef(self, node):
self.rows.append({ if keep_name(node.name):
"name": node.name, self.rows.append({
"file": self.filename, "name": node.name,
"line": node.lineno, "file": self.filename,
"type": "function", "line": node.lineno,
"comment": ast.get_docstring(node) "type": "function",
}) "comment": ast.get_docstring(node)
})
def visit_MethodDef(self, node):
self.rows.append({
"name": node.name,
"file": self.filename,
"line": node.lineno,
"type": "method",
"comment": ast.get_docstring(node)
})
def visit_ClassDef(self, node): def visit_ClassDef(self, node):
self.rows.append({ if keep_name(node.name):
"name": node.name, self.rows.append({
"file": self.filename, "name": node.name,
"line": node.lineno, "file": self.filename,
"type": "class", "line": node.lineno,
"comment": ast.get_docstring(node) "type": "class",
}) "comment": ast.get_docstring(node)
})
for nd in ast.walk(node):
if isinstance(nd, ast.FunctionDef):
if keep_name(nd.name):
self.rows.append({
"name": nd.name,
"file": self.filename,
"line": nd.lineno,
"type": "method",
"comment": ast.get_docstring(nd)
})
def main(): def main():

23
search-data.py Normal file
View file

@ -0,0 +1,23 @@
import re
import argparse
import os
import pandas as pd
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
IN_DATASET = os.path.join(SCRIPT_DIR, "data.csv")
def search(query):
df = pd.read_csv(IN_DATASET)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("query", help="the query to search the corpus with", type=str)
args = parser.parse_args()
search(query)
if __name__ == "__main__":
main()