From 8a37418ebb03583e8b64343a7a1c3171218616ce Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Tue, 14 Feb 2023 11:01:31 +0100 Subject: [PATCH] god-2022: part 1 done --- proj2022/.gitignore | 2 ++ proj2022/README.md | 15 +++++++++ proj2022/find_god_classes.py | 63 ++++++++++++++++++++++++++++++++++++ proj2022/requirements.txt | 2 ++ 4 files changed, 82 insertions(+) create mode 100644 proj2022/.gitignore create mode 100644 proj2022/README.md create mode 100755 proj2022/find_god_classes.py create mode 100644 proj2022/requirements.txt diff --git a/proj2022/.gitignore b/proj2022/.gitignore new file mode 100644 index 0000000..84a2d97 --- /dev/null +++ b/proj2022/.gitignore @@ -0,0 +1,2 @@ +xerces2/ +env/ diff --git a/proj2022/README.md b/proj2022/README.md new file mode 100644 index 0000000..e53135e --- /dev/null +++ b/proj2022/README.md @@ -0,0 +1,15 @@ +# IMA - God classes project + +## Xerces2 + +Downloaded from: https://dlcdn.apache.org//xerces/j/source/Xerces-J-src.2.12.2.zip + +## Install dependencies + +```shell +# create venv +python -m venv env +source env/bin/activate + +pip3 install -r requirements.txt +``` diff --git a/proj2022/find_god_classes.py b/proj2022/find_god_classes.py new file mode 100755 index 0000000..e88d16a --- /dev/null +++ b/proj2022/find_god_classes.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import javalang +import os +import pandas as pd + +DIR: str = os.path.dirname(os.path.realpath(__file__)) +SOURCES: str = DIR + '/xerces2/src' + +# God class if: +# |M(C)| > E(M) + 6 V(M) +# (number of methods greater than average across all classes plus 6 times the +# standard deviation) + +def parse(path: str) -> list[tuple[str, str]]: + # Get the AST of the file + with open(path) as file: + data = file.read() + tree = javalang.parse.parse(data) + + # Fetch package name from package declaration + # if node is missing, assuming default package ('') + package_name = '' + for _, node in tree.filter(javalang.tree.PackageDeclaration): + package_name = node.name + break + + # Get all classes and number of methods for each one + rows: list[tuple[str, str]] = [] + for _, node in tree.filter(javalang.tree.ClassDeclaration): + fqdn = package_name + '.' + node.name + rows.append((fqdn, len(node.methods),)) + + return rows + + +def create_df(root) -> pd.DataFrame: + frame = pd.DataFrame(columns=['class_name', 'method_num']) + + i: int = 0 + for path, dirs, files in os.walk(root): + for f in files: + if f.endswith('.java'): + # for each java file, add all entries found to dataframe + for row in parse(path + '/' + f): + frame.loc[i] = row + i += 1 + + return frame + + +def main(): + df = create_df(SOURCES) + + mean = df.loc[:, 'method_num'].mean() + std = df.loc[:, 'method_num'].std() + treshold = mean + 6 * std + + god_classes_df = df[df['method_num'] > treshold] + print(god_classes_df) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/proj2022/requirements.txt b/proj2022/requirements.txt new file mode 100644 index 0000000..bad8ae3 --- /dev/null +++ b/proj2022/requirements.txt @@ -0,0 +1,2 @@ +javalang==0.13.0 +pandas==1.5.2