This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
va-project/scraper/retriever.py
Tommaso Verzegnassi ece1915d8f Scraper added
2023-05-08 11:26:25 +02:00

114 lines
3.1 KiB
Python
Executable file

#!/usr/bin/env python3
import pandas as pd
import os
from datetime import datetime, timedelta
from yahooquery import Ticker
from top100_extractor import programming_crime_list
import requests
from bs4 import BeautifulSoup
from top100_extractor import programming_crime_list
from PIL import Image
def get_company_data(ticker):
df = pd.read_csv(r'companies.csv')
if ticker in df['ticker'].unique():
company_row = df[df['ticker'] == ticker]
company_name = company_row.iloc[0]['company name']
sector = company_row.iloc[0]['industry']
ceo = company_row.iloc[0]['ceo']
logo = company_row.iloc[0]['logo']
return company_name, sector, ceo, logo
else:
x = "not available"
y = "not available"
z = "not available"
f = "not available"
return x, y, z, f
def get_stock_data(ticker):
stock = Ticker(ticker)
stock_data = stock.history(period='max', interval='1d')
ratios = stock.valuation_measures
earnings_annual = stock.balance_sheet(frequency="a")
earnings_last_4q = stock.balance_sheet(frequency='q')
pd.options.display.float_format = '{:,.2f}'.format
earnings_last_4q = earnings_last_4q
earnings_annual = earnings_annual
balance_sheet = earnings_annual
if isinstance(balance_sheet, str):
return
balance_sheet = pd.concat(
[earnings_annual, earnings_last_4q], ignore_index=True)
income_statement = stock.income_statement(frequency="a")
All_Data = "Companies_Data"
if not os.path.exists(All_Data):
os.mkdir(All_Data)
folder_name = os.path.join(All_Data, ticker + "_Data")
if not os.path.exists(folder_name):
os.mkdir(folder_name)
price_data = pd.DataFrame({'Closing Price': stock_data['close']})
ratios_data = pd.DataFrame({})
balance_sheet_data = pd.DataFrame({})
company_name, sector, ceo, logo = get_company_data(ticker)
company_data = pd.DataFrame({
'company_name': [company_name],
'sector': [sector],
'ceo': [ceo],
'logo': [logo]
})
ratios_components = ['asOfDate', 'PeRatio',
'PegRatio', 'PsRatio', 'PbRatio']
balance_sheet_components = ['asOfDate', 'CurrentAssets',
'CurrentDebt', 'CashAndCashEquivalents', 'TotalAssets', 'TotalDebt']
for balance in balance_sheet_components:
if balance in balance_sheet.columns:
balance_sheet_data[balance] = balance_sheet[balance]
for ratio in ratios_components:
if ratio in ratios.columns:
ratios_data[ratio] = ratios[ratio]
price_data.to_csv(os.path.join(folder_name, ticker+"_price_history.csv"))
ratios_data.to_csv(os.path.join(folder_name, ticker+"_current_ratios.csv"))
balance_sheet_data.to_csv(os.path.join(
folder_name, ticker + "_balance_sheet_4Y+4Q.csv"))
company_data.to_csv(os.path.join(folder_name, ticker + "meta_data.csv"))
def download_all():
for company in programming_crime_list:
get_stock_data(company)
if __name__ == '__main__':
download_all()