47 lines
1.1 KiB
Python
Executable file
47 lines
1.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import glob
|
|
import os
|
|
import re
|
|
|
|
import javalang
|
|
import pandas as pd
|
|
|
|
# God class if:
|
|
# |M(C)| > E(M) + 6*V(M)
|
|
# (number of methods greater than average across all classes plus 6 times the
|
|
# standard deviation)
|
|
|
|
DIR: str = os.path.dirname(os.path.realpath(__file__))
|
|
LABEL_DIR: str = DIR + '/defects4j/framework/projects/Closure/modified_classes'
|
|
IN_DIR: str = DIR + '/metrics'
|
|
OUT_DIR: str = DIR + '/metrics'
|
|
|
|
|
|
def clean_output():
|
|
filelist = glob.glob(OUT_DIR + '/*_labeled.csv')
|
|
for f in filelist:
|
|
os.remove(f)
|
|
|
|
|
|
def get_is_buggy_set() -> set[str]:
|
|
lines: set[str] = set()
|
|
|
|
filelist = glob.glob(LABEL_DIR + '/*.src')
|
|
for f in filelist:
|
|
with open(f, 'r') as file_obj:
|
|
for line in file_obj.readlines():
|
|
lines.add(line.strip().strip('\n'))
|
|
|
|
return lines
|
|
|
|
|
|
def main():
|
|
is_buggy_set = get_is_buggy_set()
|
|
df = pd.read_csv(IN_DIR + '/feature_vectors.csv')
|
|
df['buggy'] = df['class_name'].apply(
|
|
lambda fqdn: 1 if fqdn in is_buggy_set else 0)
|
|
df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|