import os import pandas as pd import numpy as np from scraper.top100_extractor import programming_crime_list from typing import Optional ROOT_PATH: str = os.path.join(os.path.dirname(__file__), '..', '..') COMPANIES_CSV_PATH: str = os.path.join('scraper', 'companies.csv') COMPANY_DATA_CSV_PATH: str = os.path.join('Elaborated_Data', 'normalized_data.csv') def non_nan(a: list[any]) -> list[any]: return list(filter(lambda a: type(a) == str or not np.isnan(a), a)) def get_companies(tickers: Optional[list[str]] = None) -> list[dict]: """ reads the companies.csv file and returns it as a JSON-ifiable object to return to the frontend. """ df = pd.read_csv(os.path.join(ROOT_PATH, COMPANIES_CSV_PATH), index_col='ticker') tickers = pd.Series(programming_crime_list if tickers is None else tickers) df = df.loc[df.index.isin(tickers), :] df['tags'] = df[['tag 1', 'tag 2', 'tag 3']].values.tolist() df['tags'] = df['tags'].apply(non_nan) del df['tag 1'] del df['tag 2'] del df['tag 3'] # Include company metrics df_data = pd.read_csv(os.path.join(ROOT_PATH, COMPANY_DATA_CSV_PATH), index_col='Ticker') \ .loc[:, ['Valuation', 'Financial Health', 'Estimated Growth', 'Past Performance']] # Compute limits of metrics # print(df_data.agg([min, max]).to_dict('records')) df = df.join(df_data) return df.reset_index().replace({ np.nan: None }).to_dict('records')