#!/usr/bin/env python3 import glob import os import re import javalang import pandas as pd # God class if: # |M(C)| > E(M) + 6*V(M) # (number of methods greater than average across all classes plus 6 times the # standard deviation) DIR: str = os.path.dirname(os.path.realpath(__file__)) LABEL_DIR: str = DIR + '/defects4j/framework/projects/Closure/modified_classes' IN_DIR: str = DIR + '/metrics' OUT_DIR: str = DIR + '/metrics' def clean_output(): filelist = glob.glob(OUT_DIR + '/*_labeled.csv') for f in filelist: os.remove(f) def get_is_buggy_set() -> set[str]: lines: set[str] = set() filelist = glob.glob(LABEL_DIR + '/*.src') for f in filelist: with open(f, 'r') as file_obj: for line in file_obj.readlines(): lines.add(line.strip().strip('\n')) return lines def main(): is_buggy_set = get_is_buggy_set() df = pd.read_csv(IN_DIR + '/feature_vectors.csv') df['buggy'] = df['class_name'].apply( lambda fqdn: 1 if fqdn in is_buggy_set else 0) df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False) if __name__ == '__main__': main()