import sys sys.path.append('../group-1') import math import pandas as pd import os from scraper.top100_extractor import programming_crime_list import numpy as np import random pd.set_option('display.max_rows', 500) def get_peg(ticker: str): current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0]) #Read current ratios .csv. Check if it exists current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate']) #Convert Object to DateTime current_ratios = current_ratios.sort_values('asOfDate', ascending=False) # Sorting per Date current_ratios = current_ratios.dropna() # Take first value (the last peg ratio) # If it does not exist, it returns 0 try: if len(current_ratios['PegRatio']) > 0: peg_ratio = current_ratios['PegRatio'].iloc[:1] else: return 0.0 except KeyError: return 0.0 return peg_ratio.values[0] def get_financial_health(ticker: str): balance_sheet = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_balance_sheet_4Y+4Q.csv', index_col=[0]) # Read balance sheet .csv balance_sheet['asOfDate'] = pd.to_datetime(balance_sheet['asOfDate']) # Convert Object to DateTime balance_sheet = balance_sheet.sort_values('asOfDate', ascending=False) # Sorting per Date balance_sheet = balance_sheet.dropna() # Create financial health column try: balance_sheet['financial_health'] = balance_sheet['TotalAssets'] / balance_sheet['TotalDebt'] except KeyError: return 2.0 # Get financial health financial_health = balance_sheet['financial_health'].iloc[:1] return financial_health.values[0] def estimated_growth(ticker: str): growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0] # Read 5 years growth estimates return growth_estimated def employees_over_time(ticker: str): employee_df = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}meta_data.csv') #get df to retrieve employee number of the company employee_number = employee_df.at[0, 'number_employees'] lst = [employee_number] #What does this loop do? --> you start from the actual value of employee number of the company and then create absolutely false values for # previous years, using uniform distribution subtraction with the number at i. This makes so that the trend, once you reverse the list, is # growing over the years with some random fluctuations (just like how the number of employees grows over time, it's not like y=x) for i in range(0, 11): lst.append(lst[i] + random.uniform(-0.2*lst[i], 0.1*lst[i])) lst.reverse() return lst def past_performance_earnings(ticker: str): earnings = pd.read_csv(f'Elaborated_Data/eps_comparison.csv', index_col=[0]) # Read earnings csv selected_rows = earnings[earnings['Ticker'] == ticker] # Select rows with ticker # performance_index = round(((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) #Performance performance_index = selected_rows['epsDifferential'].mean() return performance_index def normalizer(): ''' Normalize the dataframe columns to a range between 0 and 200''' not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv') # Read Not_normalized .csv # v_values = (200/(1+math.e**( 0.1*(-not_normalized['Valuation'].mean()+not_normalized['Valuation'])))) #VALUATION STAT v_values = (200/(1+(1/9*not_normalized['Valuation']**2))) # VALUATION STAT not_normalized['Valuation'] = v_values # fh_values= (200/(1+math.e**( -0.1*(-not_normalized['Financial Health'].mean()+not_normalized['Financial Health'])))) #FINANCIAL HEALTH STAT fh_values = (200-200*math.e**(-0.138*not_normalized['Financial Health'])) #FINANCIAL HEALTH STAT not_normalized['Financial Health'] = fh_values not_normalized['Estimated Growth'] = not_normalized['Estimated Growth'].str.strip("%").astype("float") eg_values= (200/(1+math.e**( -0.1*(-not_normalized['Estimated Growth'].mean()+not_normalized['Estimated Growth'])))) #ESTIMATED GROWTH STAT for i in range(len(eg_values)): eg_values[i] = float(round(eg_values[i],2)) not_normalized['Estimated Growth']= eg_values pf_values = (200/(1+math.e**( -0.05*(-not_normalized['Past Performance'].mean()+not_normalized['Past Performance'])))) #PAST PERFORMANCE not_normalized['Past Performance'] = pf_values # Create normalized dataframe for main page not_normalized.to_csv(r'Elaborated_Data/normalized_data.csv') def create_df(companies_list): d = { 'Ticker': [], 'Valuation' : [], 'Financial Health': [], 'Estimated Growth': [], 'Past Performance': [] } d_emp = { 'Ticker': [], 'Employees_over_time': [] } for company in companies_list: d['Ticker'].append(company) d['Valuation'].append(get_peg(company)) d['Financial Health'].append(get_financial_health(company)) d['Estimated Growth'].append(estimated_growth(company)) d['Past Performance'].append(past_performance_earnings(company)) d_emp['Ticker'].append(company) d_emp['Employees_over_time'].append(employees_over_time(company)) df = pd.DataFrame(data=d) df.to_csv("Elaborated_Data/Not_Normalized.csv") df_employees = pd.DataFrame(data=d_emp) df_employees.to_csv(r"Elaborated_Data/employees_over_time.csv") def main(): if not os.path.exists(r"Elaborated_Data"): os.mkdir(r"Elaborated_Data") create_df(programming_crime_list) normalizer() if __name__ == '__main__': main()