va-project/indexer/indexer.py

import sys
sys.path.append('../VISUAL-AN-PROJECT')
import math
import pandas as pd
import os
from scraper.top100_extractor import programming_crime_list
import numpy as np
from sklearn import preprocessing


pd.set_option('display.max_rows', 500)

def get_peg(ticker: str):
    # Read current ratios .csv. Check if it exists

    current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0])


    # Convert Object to DateTime
    current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate'])

    # Sorting per Date
    current_ratios = current_ratios.sort_values('asOfDate', ascending=False)

    # Drop NaN pandas values
    current_ratios = current_ratios.dropna()

    # Take first value (the last peg ratio)
    # If it does not exist, it returns 0

    try:
        if len(current_ratios['PegRatio']) > 0:
            peg_ratio = current_ratios['PegRatio'].iloc[:1]
        else:
            return 0.0
    except KeyError:
        return 0.0

    return peg_ratio.values[0]

def get_financial_health(ticker: str):
    # Read balance sheet .csv
    balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0])

    # Convert Object to DateTime
    balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate'])

    # Sorting per Date
    balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False)

    # Drop NaN pandas values
    balance_sheet = balance_sheet.dropna()

    # Create financial health column
    try:
        balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets']
    except KeyError:
        return 2.0

    # Get financial health
    financial_health = balance_sheet['financial_health'].iloc[:1]

    return financial_health.values[0]

def estimated_growth(ticker: str):
     # Read 5 years growth estimates
    growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0]

    return growth_estimated

def past_performance_earnings(ticker: str):
    # Read earnings csv
    earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0])

    # Performance
    performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2)

    return performance_index

def normalizer():
    ''' Normalize the dataframe columns to a range between 0 and 200'''

    # Read Not_normalized .csv
    not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv')

    # Elaborate Valuation column
    v_values = (200/(1+math.e**( 0.2*(-not_normalized['Valuation'].mean()+not_normalized['Valuation'])))) #VALUATION STAT
    not_normalized['Valuation'] = v_values

    # Elaborate Financial health column
    fh_values= (80/not_normalized['Financial Health'].mean())*not_normalized['Financial Health'] #FINANCIAL HEALTH STAT
    not_normalized['Financial Health'] = fh_values

    # Elaborate Estimated Growth column
    not_normalized['Estimated Growth'] = not_normalized['Estimated Growth'].str.strip("%").astype("float")
    eg_values= (200/(1+math.e**( 0.08*(not_normalized['Estimated Growth'].mean()-not_normalized['Estimated Growth'])))) #ESTIMATED GROWTH STAT
    for i in range(len(eg_values)):
        eg_values[i] = float(round(eg_values[i],2))
    not_normalized['Estimated Growth']= eg_values

    # Elaborate Past Performance Column
    pf_values = (200/(1+math.e**( 0.08*(not_normalized['Past Performance'].mean()-not_normalized['Past Performance'])))) #PAST PERFORMANCE
    not_normalized['Past Performance'] = pf_values

    # Create normalized dataframe for main page
    not_normalized.to_csv(r'Elaborated_Data/normalized_data.csv')

def create_df(companies_list):
    # Dictionary
    d = {
        'Ticker': [],
        'Valuation' : [],
        'Financial Health': [],
        'Estimated Growth': [],
        'Past Performance': []
    }

    # Loop to get all the data
    for company in companies_list:
        d['Ticker'].append(company)
        d['Valuation'].append(get_peg(company))
        d['Financial Health'].append(get_financial_health(company))
        d['Estimated Growth'].append(estimated_growth(company))
        d['Past Performance'].append(past_performance_earnings(company))

    # Dataframe
    df = pd.DataFrame(data=d)

    # Save to csv
    df.to_csv("Elaborated_Data/Not_Normalized.csv")

def main():

    if not os.path.exists(r"Elaborated_Data"):
        os.mkdir(r"Elaborated_Data")

    create_df(programming_crime_list)
    normalizer()
    # print(get_peg('GOOGL'))  # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE)
    # print(get_financial_health('GOOGL'))  # < 1 (GREEN); > 1 (RED); = 1 (ORANGE)
    # print(estimated_growth('GOOGL'))  # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)
    # print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)

if __name__ == '__main__':
    main()