diff --git a/indexer/indexer.py b/indexer/indexer.py index 167a03f..5a8a25b 100644 --- a/indexer/indexer.py +++ b/indexer/indexer.py @@ -1,8 +1,18 @@ +import sys +sys.path.append('../group-1') + import pandas as pd +from scraper.top100_extractor import programming_crime_list + +from sklearn import preprocessing + +pd.set_option('display.max_rows', 500) def get_peg(ticker: str): - # Read current ratios .csv + # Read current ratios .csv. Check if it exists + current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0]) + # Convert Object to DateTime current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate']) @@ -14,7 +24,15 @@ def get_peg(ticker: str): current_ratios = current_ratios.dropna() # Take first value (the last peg ratio) - peg_ratio = current_ratios['PegRatio'][:1] + # If it does not exist, it returns 0 + print(ticker) + try: + if len(current_ratios['PegRatio']) > 0: + peg_ratio = current_ratios['PegRatio'].iloc[:1] + else: + return 0.0 + except KeyError: + return 0.0 return peg_ratio.values[0] @@ -32,31 +50,82 @@ def get_financial_health(ticker: str): balance_sheet = balance_sheet.dropna() # Create financial health column - balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] + try: + balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] + except KeyError: + return "NoDebt" # Get financial health - financial_health = balance_sheet['financial_health'][:1] + financial_health = balance_sheet['financial_health'].iloc[:1] return financial_health.values[0] def estimated_growth(ticker: str): # Read 5 years growth estimates growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0] - + return growth_estimated def past_performance_earnings(ticker: str): # Read earnings csv earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0]) - + # Performance performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) return performance_index -if __name__ == '__main__': - print(get_peg('GOOGL')) # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) - print(get_financial_health('GOOGL')) # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) - print(estimated_growth('GOOGL')) # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN) - print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN) +def normalizer(): + # Read Not_normalized .csv + not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv') + # Takes values for Valuation and compute normalization + v_low, v_up = not_normalized['Valuation'].min(), not_normalized['Valuation'].max() + # v_values = (100 - 0) * ((not_normalized['Valuation'] - v_low) / v_up - v_low) + 0 + v_values = 240 / not_normalized['Valuation'] + not_normalized['Valuation'] = v_values + + # # Takes values for financial health and compute normalization + # fh_low, fh_up = not_normalized['Financial Health'],min(), not_normalized['Financial Health'].max() + # fh_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0 + # not_normalized['Financial Health'] = fh_values + + # eg_low, eg_up = not_normalized['Estimated Growth'],min(), not_normalized['Estimated Growth'].max() + # eg_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0 + + print(not_normalized) + +def create_df(companies_list): + # Dictionary + d = { + 'Ticker': [], + 'Valuation' : [], + 'Financial Health': [], + 'Estimated Growth': [], + 'Past Performance': [] + } + + # Loop to get all the data + for company in companies_list: + d['Ticker'].append(company) + d['Valuation'].append(get_peg(company)) + d['Financial Health'].append(get_financial_health(company)) + d['Estimated Growth'].append(estimated_growth(company)) + d['Past Performance'].append(past_performance_earnings(company)) + + # Dataframe + df = pd.DataFrame(data=d) + + # Save to csv + df.to_csv("Elaborated_Data/Not_Normalized.csv") + +def main(): + # create_df(programming_crime_list) + normalizer() + # print(get_peg('GOOGL')) # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE) + # print(get_financial_health('GOOGL')) # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) + # print(estimated_growth('GOOGL')) # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN) + # print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN) + +if __name__ == '__main__': + main() diff --git a/scraper/top100_extractor.py b/scraper/top100_extractor.py index 55e66e4..5eea5ef 100644 --- a/scraper/top100_extractor.py +++ b/scraper/top100_extractor.py @@ -7,7 +7,6 @@ programming_crime_list = [ 'AMAT', 'AMGN', 'AMZN', - 'ANTM', 'APD', 'AVGO', 'BA',