This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
ima02/label_feature_vectors.py

48 lines
1.1 KiB
Python
Executable File

#!/usr/bin/env python3
import glob
import os
import re
import javalang
import pandas as pd
# God class if:
# |M(C)| > E(M) + 6*V(M)
# (number of methods greater than average across all classes plus 6 times the
# standard deviation)
DIR: str = os.path.dirname(os.path.realpath(__file__))
LABEL_DIR: str = DIR + '/resources/modified_classes'
IN_DIR: str = DIR + '/metrics'
OUT_DIR: str = DIR + '/metrics'
def clean_output():
filelist = glob.glob(OUT_DIR + '/*_labeled.csv')
for f in filelist:
os.remove(f)
def get_is_buggy_set() -> set[str]:
lines: set[str] = set()
filelist = glob.glob(LABEL_DIR + '/*.src')
for f in filelist:
with open(f, 'r') as file_obj:
for line in file_obj.readlines():
lines.add(line.strip().strip('\n'))
return lines
def main():
is_buggy_set = get_is_buggy_set()
df = pd.read_csv(IN_DIR + '/feature_vectors.csv')
df['buggy'] = df['class_name'].apply(
lambda fqdn: 1 if fqdn in is_buggy_set else 0)
df.to_csv(OUT_DIR + '/feature_vectors_labeled.csv', index=False)
if __name__ == '__main__':
main()