51 lines
1.2 KiB
Python
51 lines
1.2 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import javalang
|
||
|
import os
|
||
|
import pandas as pd
|
||
|
import glob
|
||
|
import re
|
||
|
|
||
|
# God class if:
|
||
|
# |M(C)| > E(M) + 6*V(M)
|
||
|
# (number of methods greater than average across all classes plus 6 times the
|
||
|
# standard deviation)
|
||
|
|
||
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
||
|
LABEL_DIR: str = DIR + '/defects4j/framework/projects/Closure/modified_classes'
|
||
|
IN_DIR: str = DIR + '/metrics'
|
||
|
OUT_DIR: str = DIR + '/metrics'
|
||
|
|
||
|
|
||
|
def clean_output():
|
||
|
filelist = glob.glob(OUT_DIR + '/*_labeled.csv')
|
||
|
for f in filelist:
|
||
|
os.remove(f)
|
||
|
|
||
|
|
||
|
def get_is_buggy_df() -> pd.DataFrame:
|
||
|
lines: list[str] = set()
|
||
|
|
||
|
filelist = glob.glob(LABEL_DIR + '/*.src')
|
||
|
for f in filelist:
|
||
|
with open(f, 'r') as fobj:
|
||
|
for line in fobj.readlines():
|
||
|
lines.add(line.strip().strip('\n'))
|
||
|
|
||
|
return lines
|
||
|
|
||
|
|
||
|
def is_class_buggy(fqdn: str, se) -> bool:
|
||
|
clazz_path = fqdn.replace('.', '/')
|
||
|
|
||
|
|
||
|
def main():
|
||
|
is_buggy_set = get_is_buggy_df()
|
||
|
df = pd.read_csv(IN_DIR + '/feature_vectors.csv')
|
||
|
df['buggy'] = df['class_name'].apply(
|
||
|
lambda fqdn: 1 if fqdn in is_buggy_set else 0)
|
||
|
df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|