146 lines
5 KiB
Python
146 lines
5 KiB
Python
import sys
|
|
sys.path.append('../VISUAL-AN-PROJECT')
|
|
import math
|
|
import pandas as pd
|
|
import os
|
|
from scraper.top100_extractor import programming_crime_list
|
|
import numpy as np
|
|
from sklearn import preprocessing
|
|
|
|
|
|
|
|
pd.set_option('display.max_rows', 500)
|
|
|
|
def get_peg(ticker: str):
|
|
# Read current ratios .csv. Check if it exists
|
|
|
|
current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0])
|
|
|
|
|
|
# Convert Object to DateTime
|
|
current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate'])
|
|
|
|
# Sorting per Date
|
|
current_ratios = current_ratios.sort_values('asOfDate', ascending=False)
|
|
|
|
# Drop NaN pandas values
|
|
current_ratios = current_ratios.dropna()
|
|
|
|
# Take first value (the last peg ratio)
|
|
# If it does not exist, it returns 0
|
|
|
|
try:
|
|
if len(current_ratios['PegRatio']) > 0:
|
|
peg_ratio = current_ratios['PegRatio'].iloc[:1]
|
|
else:
|
|
return 0.0
|
|
except KeyError:
|
|
return 0.0
|
|
|
|
return peg_ratio.values[0]
|
|
|
|
def get_financial_health(ticker: str):
|
|
# Read balance sheet .csv
|
|
balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0])
|
|
|
|
# Convert Object to DateTime
|
|
balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate'])
|
|
|
|
# Sorting per Date
|
|
balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False)
|
|
|
|
# Drop NaN pandas values
|
|
balance_sheet = balance_sheet.dropna()
|
|
|
|
# Create financial health column
|
|
try:
|
|
balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets']
|
|
except KeyError:
|
|
return 2.0
|
|
|
|
# Get financial health
|
|
financial_health = balance_sheet['financial_health'].iloc[:1]
|
|
|
|
return financial_health.values[0]
|
|
|
|
def estimated_growth(ticker: str):
|
|
# Read 5 years growth estimates
|
|
growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0]
|
|
|
|
return growth_estimated
|
|
|
|
def past_performance_earnings(ticker: str):
|
|
# Read earnings csv
|
|
earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0])
|
|
|
|
# Performance
|
|
performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2)
|
|
|
|
return performance_index
|
|
|
|
def normalizer():
|
|
''' Normalize the dataframe columns to a range between 0 and 200'''
|
|
|
|
# Read Not_normalized .csv
|
|
not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv')
|
|
|
|
# Elaborate Valuation column
|
|
v_values = (200/(1+math.e**( 0.2*(-not_normalized['Valuation'].mean()+not_normalized['Valuation'])))) #VALUATION STAT
|
|
not_normalized['Valuation'] = v_values
|
|
|
|
# Elaborate Financial health column
|
|
fh_values= (80/not_normalized['Financial Health'].mean())*not_normalized['Financial Health'] #FINANCIAL HEALTH STAT
|
|
not_normalized['Financial Health'] = fh_values
|
|
|
|
# Elaborate Estimated Growth column
|
|
not_normalized['Estimated Growth'] = not_normalized['Estimated Growth'].str.strip("%").astype("float")
|
|
eg_values= (200/(1+math.e**( 0.08*(not_normalized['Estimated Growth'].mean()-not_normalized['Estimated Growth'])))) #ESTIMATED GROWTH STAT
|
|
for i in range(len(eg_values)):
|
|
eg_values[i] = float(round(eg_values[i],2))
|
|
not_normalized['Estimated Growth']= eg_values
|
|
|
|
# Elaborate Past Performance Column
|
|
pf_values = (200/(1+math.e**( 0.08*(not_normalized['Past Performance'].mean()-not_normalized['Past Performance'])))) #PAST PERFORMANCE
|
|
not_normalized['Past Performance'] = pf_values
|
|
|
|
# Create normalized dataframe for main page
|
|
not_normalized.to_csv(r'Elaborated_Data/normalized_data.csv')
|
|
|
|
def create_df(companies_list):
|
|
# Dictionary
|
|
d = {
|
|
'Ticker': [],
|
|
'Valuation' : [],
|
|
'Financial Health': [],
|
|
'Estimated Growth': [],
|
|
'Past Performance': []
|
|
}
|
|
|
|
# Loop to get all the data
|
|
for company in companies_list:
|
|
d['Ticker'].append(company)
|
|
d['Valuation'].append(get_peg(company))
|
|
d['Financial Health'].append(get_financial_health(company))
|
|
d['Estimated Growth'].append(estimated_growth(company))
|
|
d['Past Performance'].append(past_performance_earnings(company))
|
|
|
|
# Dataframe
|
|
df = pd.DataFrame(data=d)
|
|
|
|
# Save to csv
|
|
df.to_csv("Elaborated_Data/Not_Normalized.csv")
|
|
|
|
def main():
|
|
|
|
if not os.path.exists(r"Elaborated_Data"):
|
|
os.mkdir(r"Elaborated_Data")
|
|
|
|
create_df(programming_crime_list)
|
|
normalizer()
|
|
# print(get_peg('GOOGL')) # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE)
|
|
# print(get_financial_health('GOOGL')) # < 1 (GREEN); > 1 (RED); = 1 (ORANGE)
|
|
# print(estimated_growth('GOOGL')) # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)
|
|
# print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|