project work part 1 done
This commit is contained in:
parent
5af7724e0d
commit
6ecca28c13
5 changed files with 85 additions and 1 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1 +1,2 @@
|
|||
virtualenv/
|
||||
env/
|
||||
__pycache__/
|
||||
|
|
74
find_god_classes.py
Executable file
74
find_god_classes.py
Executable file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
import javalang
|
||||
import os
|
||||
import pandas as pd
|
||||
import glob
|
||||
|
||||
# God class if:
|
||||
# |M(C)| > E(M) + 6*V(M)
|
||||
# (number of methods greater than average across all classes plus 6 times the
|
||||
# standard deviation)
|
||||
|
||||
|
||||
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||||
SOURCES: str = DIR + '/resources/xerces2-j-src'
|
||||
OUT_DIR: str = DIR + '/god_classes'
|
||||
|
||||
|
||||
def clean_output():
|
||||
filelist = glob.glob(OUT_DIR + '/*.csv')
|
||||
for f in filelist:
|
||||
os.remove(f)
|
||||
|
||||
|
||||
def parse(path: str) -> list[tuple[str, str]]:
|
||||
# Get the AST of the file
|
||||
with open(path) as file:
|
||||
data = file.read()
|
||||
tree = javalang.parse.parse(data)
|
||||
|
||||
# Fetch package name from package declaration
|
||||
# if node is missing, assuming default package ('')
|
||||
package_name = ''
|
||||
for _, node in tree.filter(javalang.tree.PackageDeclaration):
|
||||
package_name = node.name
|
||||
break
|
||||
|
||||
# Get all classes and number of methods for each one
|
||||
rows: list[tuple[str, str]] = []
|
||||
for _, node in tree.filter(javalang.tree.ClassDeclaration):
|
||||
fqdn = package_name + '.' + node.name
|
||||
rows.append((fqdn, len(node.methods),))
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def create_df(root) -> pd.DataFrame:
|
||||
frame = pd.DataFrame(columns=['class_name', 'method_num'])
|
||||
|
||||
i: int = 0
|
||||
for path, dirs, files in os.walk(root):
|
||||
for f in files:
|
||||
if f.endswith('.java'):
|
||||
# for each java file, add all entries found to dataframe
|
||||
for row in parse(path + '/' + f):
|
||||
frame.loc[i] = row
|
||||
i += 1
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def main():
|
||||
clean_output()
|
||||
df = create_df(SOURCES)
|
||||
|
||||
mean = df.loc[:, 'method_num'].mean()
|
||||
std = df.loc[:, 'method_num'].std()
|
||||
threshold = mean + 6 * std
|
||||
|
||||
god_classes_df = df[df['method_num'] > threshold]
|
||||
god_classes_df.to_csv(OUT_DIR + '/god_classes.csv')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
0
god_classes/.gitkeep
Normal file
0
god_classes/.gitkeep
Normal file
5
god_classes/god_classes.csv
Normal file
5
god_classes/god_classes.csv
Normal file
|
@ -0,0 +1,5 @@
|
|||
,class_name,method_num
|
||||
250,org.apache.xerces.impl.xs.traversers.XSDHandler,118
|
||||
300,org.apache.xerces.impl.dtd.DTDGrammar,101
|
||||
406,org.apache.xerces.xinclude.XIncludeHandler,116
|
||||
602,org.apache.xerces.dom.CoreDocumentImpl,125
|
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
javalang==0.13.0
|
||||
pandas==1.5.2
|
||||
scikit_learn==1.2.1
|
||||
shrek==0.0.2
|
Reference in a new issue