va-project/indexer/indexer.py

import sys
sys.path.append('../group-1')
import math
import pandas as pd
import os
from scraper.top100_extractor import programming_crime_list
import numpy as np 
from sklearn import preprocessing
import random


pd.set_option('display.max_rows', 500)

def get_peg(ticker: str):
    
    current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0]) #Read current ratios .csv. Check if it exists
    
    current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate']) #Convert Object to DateTime

    current_ratios = current_ratios.sort_values('asOfDate', ascending=False)  # Sorting per Date
    current_ratios = current_ratios.dropna()

    # Take first value (the last peg ratio)
    # If it does not exist, it returns 0
    try:
        if len(current_ratios['PegRatio']) > 0:
            peg_ratio = current_ratios['PegRatio'].iloc[:1]
        else:
            return 0.0
    except KeyError:
        return 0.0
    return peg_ratio.values[0]


def get_financial_health(ticker: str):
 
    balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0])  # Read balance sheet .csv

    balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate'])  # Convert Object to DateTime

    balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False) # Sorting per Date

    balance_sheet = balance_sheet.dropna()
    
    # Create financial health column
    try:
        balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] 
    except KeyError:
        return 2.0

    # Get financial health
    financial_health = balance_sheet['financial_health'].iloc[:1]

    return financial_health.values[0]


def estimated_growth(ticker: str):

    growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0] # Read 5 years growth estimates
    
    return growth_estimated


def employees_over_time(ticker: str):

    employee_df = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}meta_data.csv') #get df to retrieve employee number of the company

    employee_number = employee_df.at[0, 'number_employees'] 

    lst = [employee_number] #What does this loop do? --> you start from the actual value of employee number of the company and then create absolutely false values for 
                            # previous years, using uniform distribution subtraction with the number at i. This makes so that the trend, once you reverse the list, is 
                            # growing over the years with some random fluctuations (just like how the number of employees grows over time, it's not like y=x)

    for i in range(0, 11):

        lst.append(lst[i] + random.uniform(-0.2*lst[i], 0.1*lst[i]))  
    
    lst.reverse()
   
    return lst


def past_performance_earnings(ticker: str):

    earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0]) # Read earnings csv
    
    performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) #Performance 

    return performance_index


def normalizer():
    ''' Normalize the dataframe columns to a range between 0 and 200'''

    not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv') # Read Not_normalized .csv
    
    v_values = (200/(1+math.e**( 0.2*(-not_normalized['Valuation'].mean()+not_normalized['Valuation'])))) #VALUATION STAT
    not_normalized['Valuation'] = v_values

    fh_values= (80/not_normalized['Financial Health'].mean())*not_normalized['Financial Health'] #FINANCIAL HEALTH STAT
    not_normalized['Financial Health'] = fh_values 

    not_normalized['Estimated Growth'] = not_normalized['Estimated Growth'].str.strip("%").astype("float")
    eg_values= (200/(1+math.e**( 0.08*(not_normalized['Estimated Growth'].mean()-not_normalized['Estimated Growth'])))) #ESTIMATED GROWTH STAT
    for i in range(len(eg_values)):
        eg_values[i] = float(round(eg_values[i],2))
    not_normalized['Estimated Growth']= eg_values

    pf_values = (200/(1+math.e**( 0.08*(not_normalized['Past Performance'].mean()-not_normalized['Past Performance'])))) #PAST PERFORMANCE
    not_normalized['Past Performance'] = pf_values
    
    # Create normalized dataframe for main page
    not_normalized.to_csv(r'Elaborated_Data/normalized_data.csv')

def create_df(companies_list):
 
    d = {
        'Ticker': [],
        'Valuation' : [],
        'Financial Health': [],
        'Estimated Growth': [],
        'Past Performance': []
    }

    d_emp = {
        'Ticker': [],
        'Employees_over_time': []
    }

    for company in companies_list:
        d['Ticker'].append(company)
        d['Valuation'].append(get_peg(company))
        d['Financial Health'].append(get_financial_health(company))
        d['Estimated Growth'].append(estimated_growth(company))
        d['Past Performance'].append(past_performance_earnings(company))
        d_emp['Ticker'].append(company)
        d_emp['Employees_over_time'].append(employees_over_time(company))
    
    df = pd.DataFrame(data=d)
    df.to_csv("Elaborated_Data/Not_Normalized.csv")

    df_employees = pd.DataFrame(data=d_emp)
    df_employees.to_csv(r"Elaborated_Data/employees_over_time.csv")


def main():

    if not os.path.exists(r"Elaborated_Data"):
        os.mkdir(r"Elaborated_Data")

    create_df(programming_crime_list)
    normalizer()

if __name__ == '__main__':
    main()
Indexer da finire 2023-05-15 11:37:16 +00:00			`import sys`
Primi due csv per i grafici delle comparisons + modifiche a indexer.py + creazione comparison_indexer.py per la creazione dei csvdelle comparisons 2023-05-17 11:42:56 +00:00			`sys.path.append('../group-1')`
normalized_data 2023-05-15 17:16:50 +00:00			`import math`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`import pandas as pd`
normalized_data 2023-05-15 17:16:50 +00:00			`import os`
Indexer da finire 2023-05-15 11:37:16 +00:00			`from scraper.top100_extractor import programming_crime_list`
normalized_data 2023-05-15 17:16:50 +00:00			`import numpy as np`
Indexer da finire 2023-05-15 11:37:16 +00:00			`from sklearn import preprocessing`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`import random`
normalized_data 2023-05-15 17:16:50 +00:00

Indexer da finire 2023-05-15 11:37:16 +00:00			`pd.set_option('display.max_rows', 500)`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`def get_peg(ticker: str):`
Indexer da finire 2023-05-15 11:37:16 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0]) #Read current ratios .csv. Check if it exists`
Indexer da finire 2023-05-15 11:37:16 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate']) #Convert Object to DateTime`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`current_ratios = current_ratios.sort_values('asOfDate', ascending=False) # Sorting per Date`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`current_ratios = current_ratios.dropna()`

			`# Take first value (the last peg ratio)`
Indexer da finire 2023-05-15 11:37:16 +00:00			`# If it does not exist, it returns 0`
			`try:`
			`if len(current_ratios['PegRatio']) > 0:`
			`peg_ratio = current_ratios['PegRatio'].iloc[:1]`
			`else:`
			`return 0.0`
			`except KeyError:`
			`return 0.0`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`return peg_ratio.values[0]`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00

Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`def get_financial_health(ticker: str):`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00
			`balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0]) # Read balance sheet .csv`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate']) # Convert Object to DateTime`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False) # Sorting per Date`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`balance_sheet = balance_sheet.dropna()`

			`# Create financial health column`
Indexer da finire 2023-05-15 11:37:16 +00:00			`try:`
			`balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets']`
			`except KeyError:`
normalized_data 2023-05-15 17:16:50 +00:00			`return 2.0`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`# Get financial health`
Indexer da finire 2023-05-15 11:37:16 +00:00			`financial_health = balance_sheet['financial_health'].iloc[:1]`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
			`return financial_health.values[0]`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00

Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`def estimated_growth(ticker: str):`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00
			`growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0] # Read 5 years growth estimates`
Indexer da finire 2023-05-15 11:37:16 +00:00
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00			`return growth_estimated`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00
			`def employees_over_time(ticker: str):`

			`employee_df = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}meta_data.csv') #get df to retrieve employee number of the company`

			`employee_number = employee_df.at[0, 'number_employees']`

			`lst = [employee_number] #What does this loop do? --> you start from the actual value of employee number of the company and then create absolutely false values for`
			`# previous years, using uniform distribution subtraction with the number at i. This makes so that the trend, once you reverse the list, is`
			`# growing over the years with some random fluctuations (just like how the number of employees grows over time, it's not like y=x)`

			`for i in range(0, 11):`

			`lst.append(lst[i] + random.uniform(-0.2lst[i], 0.1lst[i]))`

			`lst.reverse()`

			`return lst`


Indexer finished with the last index, on monday we will discuss the implementation of the main dashboard with these indexes 2023-05-13 14:52:31 +00:00			`def past_performance_earnings(ticker: str):`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00
			`earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0]) # Read earnings csv`
Indexer da finire 2023-05-15 11:37:16 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) #Performance`
Indexer finished with the last index, on monday we will discuss the implementation of the main dashboard with these indexes 2023-05-13 14:52:31 +00:00
			`return performance_index`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00

Indexer da finire 2023-05-15 11:37:16 +00:00			`def normalizer():`
Indexer cleaned 2023-05-17 08:39:32 +00:00			`''' Normalize the dataframe columns to a range between 0 and 200'''`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv') # Read Not_normalized .csv`
normalized_data 2023-05-15 17:16:50 +00:00
			`v_values = (200/(1+math.e*( 0.2(-not_normalized['Valuation'].mean()+not_normalized['Valuation'])))) #VALUATION STAT`
Indexer da finire 2023-05-15 11:37:16 +00:00			`not_normalized['Valuation'] = v_values`

normalized_data 2023-05-15 17:16:50 +00:00			`fh_values= (80/not_normalized['Financial Health'].mean())*not_normalized['Financial Health'] #FINANCIAL HEALTH STAT`
			`not_normalized['Financial Health'] = fh_values`

			`not_normalized['Estimated Growth'] = not_normalized['Estimated Growth'].str.strip("%").astype("float")`
			`eg_values= (200/(1+math.e*( 0.08(not_normalized['Estimated Growth'].mean()-not_normalized['Estimated Growth'])))) #ESTIMATED GROWTH STAT`
			`for i in range(len(eg_values)):`
			`eg_values[i] = float(round(eg_values[i],2))`
			`not_normalized['Estimated Growth']= eg_values`
Indexer da finire 2023-05-15 11:37:16 +00:00
normalized_data 2023-05-15 17:16:50 +00:00			`pf_values = (200/(1+math.e*( 0.08(not_normalized['Past Performance'].mean()-not_normalized['Past Performance'])))) #PAST PERFORMANCE`
			`not_normalized['Past Performance'] = pf_values`
Indexer da finire 2023-05-15 11:37:16 +00:00
Indexer cleaned 2023-05-17 08:39:32 +00:00			`# Create normalized dataframe for main page`
normalized_data 2023-05-15 17:16:50 +00:00			`not_normalized.to_csv(r'Elaborated_Data/normalized_data.csv')`
Indexer da finire 2023-05-15 11:37:16 +00:00
			`def create_df(companies_list):`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00
Indexer da finire 2023-05-15 11:37:16 +00:00			`d = {`
			`'Ticker': [],`
			`'Valuation' : [],`
			`'Financial Health': [],`
			`'Estimated Growth': [],`
			`'Past Performance': []`
			`}`

added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`d_emp = {`
			`'Ticker': [],`
			`'Employees_over_time': []`
			`}`

Indexer da finire 2023-05-15 11:37:16 +00:00			`for company in companies_list:`
			`d['Ticker'].append(company)`
			`d['Valuation'].append(get_peg(company))`
			`d['Financial Health'].append(get_financial_health(company))`
			`d['Estimated Growth'].append(estimated_growth(company))`
			`d['Past Performance'].append(past_performance_earnings(company))`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`d_emp['Ticker'].append(company)`
			`d_emp['Employees_over_time'].append(employees_over_time(company))`
Indexer da finire 2023-05-15 11:37:16 +00:00
			`df = pd.DataFrame(data=d)`
			`df.to_csv("Elaborated_Data/Not_Normalized.csv")`
Indexer con peg ratio, financial health, estimated_growth, manca past_performance 2023-05-10 11:55:07 +00:00
added eployees data + general fixs 2023-05-19 21:17:19 +00:00			`df_employees = pd.DataFrame(data=d_emp)`
			`df_employees.to_csv(r"Elaborated_Data/employees_over_time.csv")`



Indexer da finire 2023-05-15 11:37:16 +00:00			`def main():`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00
normalized_data 2023-05-15 17:16:50 +00:00			`if not os.path.exists(r"Elaborated_Data"):`
			`os.mkdir(r"Elaborated_Data")`

			`create_df(programming_crime_list)`
Indexer da finire 2023-05-15 11:37:16 +00:00			`normalizer()`

			`if __name__ == '__main__':`
			`main()`
added eployees data + general fixs 2023-05-19 21:17:19 +00:00