This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
va-project/backend/api/companies.py

38 lines
1.4 KiB
Python
Raw Permalink Normal View History

2023-05-08 10:16:06 +00:00
import os
import pandas as pd
import numpy as np
from scraper.top100_extractor import programming_crime_list
2023-05-21 14:16:18 +00:00
from typing import Optional
2023-05-21 14:16:18 +00:00
ROOT_PATH: str = os.path.join(os.path.dirname(__file__), '..', '..')
COMPANIES_CSV_PATH: str = os.path.join('scraper', 'companies.csv')
COMPANY_DATA_CSV_PATH: str = os.path.join('Elaborated_Data', 'normalized_data.csv')
2023-05-08 10:16:06 +00:00
2023-05-10 08:33:40 +00:00
def non_nan(a: list[any]) -> list[any]:
return list(filter(lambda a: type(a) == str or not np.isnan(a), a))
2023-05-08 10:16:06 +00:00
2023-05-21 14:16:18 +00:00
def get_companies(tickers: Optional[list[str]] = None) -> list[dict]:
2023-05-08 10:16:06 +00:00
"""
reads the companies.csv file and returns it as a JSON-ifiable object
to return to the frontend.
"""
2023-05-21 14:16:18 +00:00
df = pd.read_csv(os.path.join(ROOT_PATH, COMPANIES_CSV_PATH), index_col='ticker')
tickers = pd.Series(programming_crime_list if tickers is None else tickers)
2023-05-08 10:16:06 +00:00
df = df.loc[df.index.isin(tickers), :]
2023-05-10 08:33:40 +00:00
df['tags'] = df[['tag 1', 'tag 2', 'tag 3']].values.tolist()
df['tags'] = df['tags'].apply(non_nan)
del df['tag 1']
del df['tag 2']
del df['tag 3']
# Include company metrics
2023-05-21 14:16:18 +00:00
df_data = pd.read_csv(os.path.join(ROOT_PATH, COMPANY_DATA_CSV_PATH), index_col='Ticker') \
.loc[:, ['Valuation', 'Financial Health', 'Estimated Growth', 'Past Performance']]
# Compute limits of metrics
# print(df_data.agg([min, max]).to_dict('records'))
df = df.join(df_data)
2023-05-08 10:16:06 +00:00
return df.reset_index().replace({ np.nan: None }).to_dict('records')