This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
ima-preparation/bug-2022/label_feature_vectors.py

51 lines
1.2 KiB
Python
Raw Normal View History

2023-02-19 13:20:13 +00:00
#!/usr/bin/env python3
import javalang
import os
import pandas as pd
import glob
import re
# God class if:
# |M(C)| > E(M) + 6*V(M)
# (number of methods greater than average across all classes plus 6 times the
# standard deviation)
DIR: str = os.path.dirname(os.path.realpath(__file__))
LABEL_DIR: str = DIR + '/defects4j/framework/projects/Closure/modified_classes'
IN_DIR: str = DIR + '/metrics'
OUT_DIR: str = DIR + '/metrics'
def clean_output():
filelist = glob.glob(OUT_DIR + '/*_labeled.csv')
for f in filelist:
os.remove(f)
def get_is_buggy_df() -> pd.DataFrame:
lines: list[str] = set()
filelist = glob.glob(LABEL_DIR + '/*.src')
for f in filelist:
with open(f, 'r') as fobj:
for line in fobj.readlines():
lines.add(line.strip().strip('\n'))
return lines
def is_class_buggy(fqdn: str, se) -> bool:
clazz_path = fqdn.replace('.', '/')
def main():
is_buggy_set = get_is_buggy_df()
df = pd.read_csv(IN_DIR + '/feature_vectors.csv')
df['buggy'] = df['class_name'].apply(
lambda fqdn: 1 if fqdn in is_buggy_set else 0)
df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False)
if __name__ == '__main__':
main()