va-project/indexer/indexer.py

import sys
sys.path.append('../group-1')

import pandas as pd
from scraper.top100_extractor import programming_crime_list

from sklearn import preprocessing

pd.set_option('display.max_rows', 500)

def get_peg(ticker: str):
    # Read current ratios .csv. Check if it exists
    
    current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0])
    

    # Convert Object to DateTime
    current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate'])

    # Sorting per Date
    current_ratios = current_ratios.sort_values('asOfDate', ascending=False)

    # Drop NaN pandas values
    current_ratios = current_ratios.dropna()

    # Take first value (the last peg ratio)
    # If it does not exist, it returns 0
    print(ticker)
    try:
        if len(current_ratios['PegRatio']) > 0:
            peg_ratio = current_ratios['PegRatio'].iloc[:1]
        else:
            return 0.0
    except KeyError:
        return 0.0
    
    return peg_ratio.values[0]

def get_financial_health(ticker: str):
    # Read balance sheet .csv
    balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0])

    # Convert Object to DateTime
    balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate'])

    # Sorting per Date
    balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False)

    # Drop NaN pandas values
    balance_sheet = balance_sheet.dropna()
    
    # Create financial health column
    try:
        balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] 
    except KeyError:
        return "NoDebt"

    # Get financial health
    financial_health = balance_sheet['financial_health'].iloc[:1]

    return financial_health.values[0]

def estimated_growth(ticker: str):
     # Read 5 years growth estimates
    growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0]
    
    return growth_estimated

def past_performance_earnings(ticker: str):
    # Read earnings csv
    earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0])
    
    # Performance 
    performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) 

    return performance_index

def normalizer():
    # Read Not_normalized .csv
    not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv')

    # Takes values for Valuation and compute normalization 
    v_low, v_up = not_normalized['Valuation'].min(), not_normalized['Valuation'].max()
    # v_values = (100 - 0) * ((not_normalized['Valuation'] - v_low) / v_up - v_low) + 0
    v_values = 240 / not_normalized['Valuation']
    not_normalized['Valuation'] = v_values

    # # Takes values for financial health and compute normalization
    # fh_low, fh_up = not_normalized['Financial Health'],min(), not_normalized['Financial Health'].max()
    # fh_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0
    # not_normalized['Financial Health'] = fh_values
    
    # eg_low, eg_up = not_normalized['Estimated Growth'],min(), not_normalized['Estimated Growth'].max()
    # eg_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0

    print(not_normalized)

def create_df(companies_list):
    # Dictionary 
    d = {
        'Ticker': [],
        'Valuation' : [],
        'Financial Health': [],
        'Estimated Growth': [],
        'Past Performance': []
    }

    # Loop to get all the data
    for company in companies_list:
        d['Ticker'].append(company)
        d['Valuation'].append(get_peg(company))
        d['Financial Health'].append(get_financial_health(company))
        d['Estimated Growth'].append(estimated_growth(company))
        d['Past Performance'].append(past_performance_earnings(company))
    
    # Dataframe
    df = pd.DataFrame(data=d)

    # Save to csv
    df.to_csv("Elaborated_Data/Not_Normalized.csv")

def main():
    # create_df(programming_crime_list)
    normalizer()
    # print(get_peg('GOOGL'))  # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE)
    # print(get_financial_health('GOOGL'))  # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) 
    # print(estimated_growth('GOOGL'))  # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)
    # print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)

if __name__ == '__main__':
    main()
Indexer da finire 2023-05-15 11:37:16 +00:00			`import sys`
			`sys.path.append('../group-1')`

Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`import pandas as pd`
Indexer da finire 2023-05-15 11:37:16 +00:00			`from scraper.top100_extractor import programming_crime_list`

			`from sklearn import preprocessing`

			`pd.set_option('display.max_rows', 500)`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`def get_peg(ticker: str):`
Indexer da finire 2023-05-15 11:37:16 +00:00			`# Read current ratios .csv. Check if it exists`

Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0])`
Indexer da finire 2023-05-15 11:37:16 +00:00
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`# Convert Object to DateTime`
			`current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate'])`

			`# Sorting per Date`
			`current_ratios = current_ratios.sort_values('asOfDate', ascending=False)`

			`# Drop NaN pandas values`
			`current_ratios = current_ratios.dropna()`

			`# Take first value (the last peg ratio)`
Indexer da finire 2023-05-15 11:37:16 +00:00			`# If it does not exist, it returns 0`
			`print(ticker)`
			`try:`
			`if len(current_ratios['PegRatio']) > 0:`
			`peg_ratio = current_ratios['PegRatio'].iloc[:1]`
			`else:`
			`return 0.0`
			`except KeyError:`
			`return 0.0`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`return peg_ratio.values[0]`

			`def get_financial_health(ticker: str):`
			`# Read balance sheet .csv`
			`balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0])`

			`# Convert Object to DateTime`
			`balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate'])`

			`# Sorting per Date`
			`balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False)`

			`# Drop NaN pandas values`
			`balance_sheet = balance_sheet.dropna()`

			`# Create financial health column`
Indexer da finire 2023-05-15 11:37:16 +00:00			`try:`
			`balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets']`
			`except KeyError:`
			`return "NoDebt"`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`# Get financial health`
Indexer da finire 2023-05-15 11:37:16 +00:00			`financial_health = balance_sheet['financial_health'].iloc[:1]`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`return financial_health.values[0]`

			`def estimated_growth(ticker: str):`
Indexer finished with the last index, on monday we will discuss the implementation of the main dashboard with these indexes 2023-05-13 14:52:31 +00:00			`# Read 5 years growth estimates`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0]`
Indexer da finire 2023-05-15 11:37:16 +00:00
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`return growth_estimated`

Indexer finished with the last index, on monday we will discuss the implementation of the main dashboard with these indexes 2023-05-13 14:52:31 +00:00			`def past_performance_earnings(ticker: str):`
			`# Read earnings csv`
			`earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0])`
Indexer da finire 2023-05-15 11:37:16 +00:00
Indexer finished with the last index, on monday we will discuss the implementation of the main dashboard with these indexes 2023-05-13 14:52:31 +00:00			`# Performance`
			`performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2)`

			`return performance_index`

Indexer da finire 2023-05-15 11:37:16 +00:00			`def normalizer():`
			`# Read Not_normalized .csv`
			`not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv')`

			`# Takes values for Valuation and compute normalization`
			`v_low, v_up = not_normalized['Valuation'].min(), not_normalized['Valuation'].max()`
			`# v_values = (100 - 0) * ((not_normalized['Valuation'] - v_low) / v_up - v_low) + 0`
			`v_values = 240 / not_normalized['Valuation']`
			`not_normalized['Valuation'] = v_values`

			`# # Takes values for financial health and compute normalization`
			`# fh_low, fh_up = not_normalized['Financial Health'],min(), not_normalized['Financial Health'].max()`
			`# fh_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0`
			`# not_normalized['Financial Health'] = fh_values`

			`# eg_low, eg_up = not_normalized['Estimated Growth'],min(), not_normalized['Estimated Growth'].max()`
			`# eg_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0`

			`print(not_normalized)`

			`def create_df(companies_list):`
			`# Dictionary`
			`d = {`
			`'Ticker': [],`
			`'Valuation' : [],`
			`'Financial Health': [],`
			`'Estimated Growth': [],`
			`'Past Performance': []`
			`}`

			`# Loop to get all the data`
			`for company in companies_list:`
			`d['Ticker'].append(company)`
			`d['Valuation'].append(get_peg(company))`
			`d['Financial Health'].append(get_financial_health(company))`
			`d['Estimated Growth'].append(estimated_growth(company))`
			`d['Past Performance'].append(past_performance_earnings(company))`

			`# Dataframe`
			`df = pd.DataFrame(data=d)`

			`# Save to csv`
			`df.to_csv("Elaborated_Data/Not_Normalized.csv")`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
Indexer da finire 2023-05-15 11:37:16 +00:00			`def main():`
			`# create_df(programming_crime_list)`
			`normalizer()`
			`# print(get_peg('GOOGL')) # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE)`
			`# print(get_financial_health('GOOGL')) # < 1 (GREEN); > 1 (RED); = 1 (ORANGE)`
			`# print(estimated_growth('GOOGL')) # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)`
			`# print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)`

			`if __name__ == '__main__':`
			`main()`