Indexer da finire

2023-05-15 13:37:16 +02:00 · 2023-05-15 13:37:16 +02:00 · f7a1e1c2a2
commit f7a1e1c2a2
parent c666126661
2 changed files with 80 additions and 12 deletions
--- a/indexer/indexer.py
+++ b/indexer/indexer.py
@ -1,8 +1,18 @@
+import sys
+sys.path.append('../group-1')
+
 import pandas as pd
+from scraper.top100_extractor import programming_crime_list
+
+from sklearn import preprocessing
+
+pd.set_option('display.max_rows', 500)

 def get_peg(ticker: str):
-    # Read current ratios .csv
+    # Read current ratios .csv. Check if it exists
+    
    current_ratios = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}_current_ratios.csv', index_col=[0])
+    

    # Convert Object to DateTime
    current_ratios['asOfDate'] = pd.to_datetime(current_ratios['asOfDate'])
@ -14,7 +24,15 @@ def get_peg(ticker: str):
    current_ratios = current_ratios.dropna()

    # Take first value (the last peg ratio)
-    peg_ratio = current_ratios['PegRatio'][:1]
+    # If it does not exist, it returns 0
+    print(ticker)
+    try:
+        if len(current_ratios['PegRatio']) > 0:
+            peg_ratio = current_ratios['PegRatio'].iloc[:1]
+        else:
+            return 0.0
+    except KeyError:
+        return 0.0
    
    return peg_ratio.values[0]

@ -32,31 +50,82 @@ def get_financial_health(ticker: str):
    balance_sheet = balance_sheet.dropna()
    
    # Create financial health column
-    balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] 
+    try:
+        balance_sheet['financial_health'] = balance_sheet['TotalDebt'] / balance_sheet['TotalAssets'] 
+    except KeyError:
+        return "NoDebt"

    # Get financial health
-    financial_health = balance_sheet['financial_health'][:1]
+    financial_health = balance_sheet['financial_health'].iloc[:1]

    return financial_health.values[0]

 def estimated_growth(ticker: str):
     # Read 5 years growth estimates
    growth_estimated = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}5YGrowthEstimates.csv', index_col=[0])['5Y Growth estimate'].values[0]
-
+    
    return growth_estimated

 def past_performance_earnings(ticker: str):
    # Read earnings csv
    earnings = pd.read_csv(f'Companies_Data/{ticker}_Data/{ticker}earnings.csv', index_col=[0])
-
+    
    # Performance 
    performance_index = round((earnings['epsActual'].sum() - earnings['epsEstimate'].sum()) / earnings['epsEstimate'].sum() * 100, 2) 

    return performance_index

-if __name__ == '__main__':
-    print(get_peg('GOOGL'))  # < 1 (GREEN); > 1 (RED); = 1 (ORANGE)
-    print(get_financial_health('GOOGL'))  # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) 
-    print(estimated_growth('GOOGL'))  # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)
-    print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)
+def normalizer():
+    # Read Not_normalized .csv
+    not_normalized = pd.read_csv('Elaborated_Data/Not_Normalized.csv')

+    # Takes values for Valuation and compute normalization 
+    v_low, v_up = not_normalized['Valuation'].min(), not_normalized['Valuation'].max()
+    # v_values = (100 - 0) * ((not_normalized['Valuation'] - v_low) / v_up - v_low) + 0
+    v_values = 240 / not_normalized['Valuation']
+    not_normalized['Valuation'] = v_values
+
+    # # Takes values for financial health and compute normalization
+    # fh_low, fh_up = not_normalized['Financial Health'],min(), not_normalized['Financial Health'].max()
+    # fh_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0
+    # not_normalized['Financial Health'] = fh_values
+    
+    # eg_low, eg_up = not_normalized['Estimated Growth'],min(), not_normalized['Estimated Growth'].max()
+    # eg_values = (100 - 0) * ((not_normalized['Financial Health'] - fh_low) / fh_up - fh_low) + 0
+
+    print(not_normalized)
+
+def create_df(companies_list):
+    # Dictionary 
+    d = {
+        'Ticker': [],
+        'Valuation' : [],
+        'Financial Health': [],
+        'Estimated Growth': [],
+        'Past Performance': []
+    }
+
+    # Loop to get all the data
+    for company in companies_list:
+        d['Ticker'].append(company)
+        d['Valuation'].append(get_peg(company))
+        d['Financial Health'].append(get_financial_health(company))
+        d['Estimated Growth'].append(estimated_growth(company))
+        d['Past Performance'].append(past_performance_earnings(company))
+    
+    # Dataframe
+    df = pd.DataFrame(data=d)
+
+    # Save to csv
+    df.to_csv("Elaborated_Data/Not_Normalized.csv")
+
+def main():
+    # create_df(programming_crime_list)
+    normalizer()
+    # print(get_peg('GOOGL'))  # < 1 ( GREEN); > 1 (RED); = 1 (ORANGE)
+    # print(get_financial_health('GOOGL'))  # < 1 (GREEN); > 1 (RED); = 1 (ORANGE) 
+    # print(estimated_growth('GOOGL'))  # < 0 (RED); 0 < x < 8% (ORANGE); < 8 % (GREEN)
+    # print(past_performance_earnings('GOOGL'), "%") # -100 < x < 0 (RED); = 0 (ORANGE); 0 < x < 100 (GREEN)
+
+if __name__ == '__main__':
+    main()
--- a/scraper/top100_extractor.py
+++ b/scraper/top100_extractor.py
@ -7,7 +7,6 @@ programming_crime_list = [
    'AMAT',
    'AMGN',
    'AMZN',
-    'ANTM',
    'APD',
    'AVGO',
    'BA',