This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
ima-preparation/bug-2022/label_feature_vectors.py

48 lines
1.1 KiB
Python
Raw Normal View History

2023-02-19 13:20:13 +00:00
#!/usr/bin/env python3
import glob
2023-02-19 17:18:51 +00:00
import os
2023-02-19 13:20:13 +00:00
import re
2023-02-19 17:18:51 +00:00
import javalang
import pandas as pd
2023-02-19 13:20:13 +00:00
# God class if:
# |M(C)| > E(M) + 6*V(M)
# (number of methods greater than average across all classes plus 6 times the
# standard deviation)
DIR: str = os.path.dirname(os.path.realpath(__file__))
LABEL_DIR: str = DIR + '/defects4j/framework/projects/Closure/modified_classes'
IN_DIR: str = DIR + '/metrics'
OUT_DIR: str = DIR + '/metrics'
def clean_output():
filelist = glob.glob(OUT_DIR + '/*_labeled.csv')
for f in filelist:
os.remove(f)
2023-02-19 17:18:51 +00:00
def get_is_buggy_set() -> set[str]:
lines: set[str] = set()
2023-02-19 13:20:13 +00:00
filelist = glob.glob(LABEL_DIR + '/*.src')
for f in filelist:
2023-02-19 17:18:51 +00:00
with open(f, 'r') as file_obj:
for line in file_obj.readlines():
2023-02-19 13:20:13 +00:00
lines.add(line.strip().strip('\n'))
return lines
def main():
2023-02-19 17:18:51 +00:00
is_buggy_set = get_is_buggy_set()
2023-02-19 13:20:13 +00:00
df = pd.read_csv(IN_DIR + '/feature_vectors.csv')
df['buggy'] = df['class_name'].apply(
lambda fqdn: 1 if fqdn in is_buggy_set else 0)
df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False)
if __name__ == '__main__':
main()