2023-05-08 09:26:25 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import os
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from yahooquery import Ticker
|
|
|
|
from top100_extractor import programming_crime_list
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from top100_extractor import programming_crime_list
|
|
|
|
from PIL import Image
|
2023-05-10 09:09:12 +00:00
|
|
|
import yfinance as yf
|
|
|
|
from yahoo_fin import stock_info as si
|
|
|
|
|
|
|
|
|
2023-05-10 09:35:58 +00:00
|
|
|
def get_market_cap(ticker1):
|
2023-05-10 09:09:12 +00:00
|
|
|
|
2023-05-10 09:35:58 +00:00
|
|
|
ticker = Ticker(ticker1)
|
|
|
|
|
|
|
|
summary = ticker.summary_detail
|
|
|
|
market_cap = summary[ticker1.upper()]['marketCap']
|
|
|
|
|
|
|
|
return market_cap
|
|
|
|
|
|
|
|
|
|
|
|
def get_analyst_estimates(ticker):
|
|
|
|
estimates = si.get_analysts_info(ticker)
|
2023-05-10 09:09:12 +00:00
|
|
|
next_5_years_estimates = estimates["Growth Estimates"].iloc[4].dropna()
|
|
|
|
return next_5_years_estimates[1]
|
2023-05-08 09:26:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_company_data(ticker):
|
|
|
|
|
2023-05-10 09:09:12 +00:00
|
|
|
df = pd.read_csv(os.path.join(os.path.dirname(__file__), r'companies.csv'))##
|
2023-05-08 09:26:25 +00:00
|
|
|
|
|
|
|
if ticker in df['ticker'].unique():
|
|
|
|
company_row = df[df['ticker'] == ticker]
|
|
|
|
company_name = company_row.iloc[0]['company name']
|
|
|
|
sector = company_row.iloc[0]['industry']
|
|
|
|
ceo = company_row.iloc[0]['ceo']
|
|
|
|
logo = company_row.iloc[0]['logo']
|
|
|
|
|
|
|
|
return company_name, sector, ceo, logo
|
|
|
|
else:
|
|
|
|
x = "not available"
|
|
|
|
y = "not available"
|
|
|
|
z = "not available"
|
|
|
|
f = "not available"
|
|
|
|
return x, y, z, f
|
|
|
|
|
|
|
|
|
|
|
|
def get_stock_data(ticker):
|
|
|
|
|
|
|
|
stock = Ticker(ticker)
|
|
|
|
stock_data = stock.history(period='max', interval='1d')
|
|
|
|
ratios = stock.valuation_measures
|
|
|
|
earnings_annual = stock.balance_sheet(frequency="a")
|
|
|
|
earnings_last_4q = stock.balance_sheet(frequency='q')
|
|
|
|
|
|
|
|
pd.options.display.float_format = '{:,.2f}'.format
|
|
|
|
|
|
|
|
earnings_last_4q = earnings_last_4q
|
|
|
|
earnings_annual = earnings_annual
|
|
|
|
|
|
|
|
balance_sheet = earnings_annual
|
|
|
|
|
|
|
|
if isinstance(balance_sheet, str):
|
|
|
|
return
|
|
|
|
|
|
|
|
balance_sheet = pd.concat(
|
|
|
|
[earnings_annual, earnings_last_4q], ignore_index=True)
|
|
|
|
|
|
|
|
income_statement = stock.income_statement(frequency="a")
|
|
|
|
|
|
|
|
All_Data = "Companies_Data"
|
|
|
|
if not os.path.exists(All_Data):
|
|
|
|
os.mkdir(All_Data)
|
|
|
|
|
|
|
|
folder_name = os.path.join(All_Data, ticker + "_Data")
|
|
|
|
|
|
|
|
if not os.path.exists(folder_name):
|
|
|
|
os.mkdir(folder_name)
|
|
|
|
|
|
|
|
price_data = pd.DataFrame({'Closing Price': stock_data['close']})
|
|
|
|
|
|
|
|
ratios_data = pd.DataFrame({})
|
|
|
|
balance_sheet_data = pd.DataFrame({})
|
|
|
|
company_name, sector, ceo, logo = get_company_data(ticker)
|
|
|
|
|
|
|
|
company_data = pd.DataFrame({
|
|
|
|
'company_name': [company_name],
|
|
|
|
'sector': [sector],
|
|
|
|
'ceo': [ceo],
|
|
|
|
'logo': [logo]
|
|
|
|
})
|
|
|
|
|
2023-05-10 09:09:12 +00:00
|
|
|
estimated_growth = get_analyst_estimates(ticker)
|
|
|
|
|
|
|
|
earnings_next5Y = pd.DataFrame({
|
|
|
|
'company_name': [ticker],
|
|
|
|
'5Y Growth estimate': [estimated_growth]
|
|
|
|
})
|
|
|
|
|
2023-05-10 09:35:58 +00:00
|
|
|
market_cap = get_market_cap(ticker)
|
|
|
|
|
|
|
|
market_cap_file = pd.DataFrame({
|
|
|
|
'company_name': [ticker],
|
|
|
|
'Market capitalization': [market_cap]
|
|
|
|
})
|
|
|
|
|
2023-05-08 09:26:25 +00:00
|
|
|
ratios_components = ['asOfDate', 'PeRatio',
|
|
|
|
'PegRatio', 'PsRatio', 'PbRatio']
|
|
|
|
balance_sheet_components = ['asOfDate', 'CurrentAssets',
|
|
|
|
'CurrentDebt', 'CashAndCashEquivalents', 'TotalAssets', 'TotalDebt']
|
|
|
|
|
|
|
|
for balance in balance_sheet_components:
|
|
|
|
|
|
|
|
if balance in balance_sheet.columns:
|
|
|
|
balance_sheet_data[balance] = balance_sheet[balance]
|
|
|
|
|
|
|
|
for ratio in ratios_components:
|
|
|
|
|
|
|
|
if ratio in ratios.columns:
|
|
|
|
ratios_data[ratio] = ratios[ratio]
|
|
|
|
|
|
|
|
price_data.to_csv(os.path.join(folder_name, ticker+"_price_history.csv"))
|
|
|
|
ratios_data.to_csv(os.path.join(folder_name, ticker+"_current_ratios.csv"))
|
2023-05-10 09:09:12 +00:00
|
|
|
balance_sheet_data.to_csv(os.path.join(folder_name, ticker + "_balance_sheet_4Y+4Q.csv"))
|
2023-05-08 09:26:25 +00:00
|
|
|
company_data.to_csv(os.path.join(folder_name, ticker + "meta_data.csv"))
|
2023-05-10 09:09:12 +00:00
|
|
|
earnings_next5Y.to_csv(os.path.join(folder_name, ticker + "5YGrowthEstimates.csv"))
|
2023-05-10 09:35:58 +00:00
|
|
|
market_cap_file.to_csv(os.path.join(folder_name, ticker + "MarketCap.csv"))
|
2023-05-10 09:09:12 +00:00
|
|
|
|
2023-05-08 09:26:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def download_all():
|
|
|
|
for company in programming_crime_list:
|
|
|
|
get_stock_data(company)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
download_all()
|
2023-05-10 09:09:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|