This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
va-project/indexer/Employees.ipynb
2023-05-24 10:02:09 +02:00

615 lines
66 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 71,
"id": "d9083f1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ticker</th>\n",
" <th>Employees_over_time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAPL</td>\n",
" <td>[91428.92277123446, 102233.67037565738, 115926...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ABBV</td>\n",
" <td>[25197.96677105695, 25004.40890529943, 23750.1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>ABT</td>\n",
" <td>[39483.100038636265, 44757.71382044369, 51525....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ACN</td>\n",
" <td>[336961.3689040502, 374390.13833588944, 360097...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ADBE</td>\n",
" <td>[23394.52554785587, 21940.978728008624, 26402....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>V</td>\n",
" <td>[9980.877628021311, 10405.029487238351, 10411....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>VZ</td>\n",
" <td>[88949.17354024998, 97860.29179349614, 121300....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>WFC</td>\n",
" <td>[119806.58574102176, 123482.12611072823, 15253...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>WMT</td>\n",
" <td>[1109806.346506345, 1309312.9509547795, 145320...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>XOM</td>\n",
" <td>[48648.808836794415, 44519.68145644413, 55037....</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>90 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Ticker Employees_over_time\n",
"0 AAPL [91428.92277123446, 102233.67037565738, 115926...\n",
"1 ABBV [25197.96677105695, 25004.40890529943, 23750.1...\n",
"2 ABT [39483.100038636265, 44757.71382044369, 51525....\n",
"3 ACN [336961.3689040502, 374390.13833588944, 360097...\n",
"4 ADBE [23394.52554785587, 21940.978728008624, 26402....\n",
".. ... ...\n",
"85 V [9980.877628021311, 10405.029487238351, 10411....\n",
"86 VZ [88949.17354024998, 97860.29179349614, 121300....\n",
"87 WFC [119806.58574102176, 123482.12611072823, 15253...\n",
"88 WMT [1109806.346506345, 1309312.9509547795, 145320...\n",
"89 XOM [48648.808836794415, 44519.68145644413, 55037....\n",
"\n",
"[90 rows x 2 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"employees_df = pd.read_csv('../Elaborated_Data/employees_over_time.csv', index_col=[0])\n",
"employees_df"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "9401e797",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ticker</th>\n",
" <th>Employees_over_time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAPL</td>\n",
" <td>91428.92277123446</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AAPL</td>\n",
" <td>102233.67037565738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AAPL</td>\n",
" <td>115926.34267742703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AAPL</td>\n",
" <td>137239.8786903178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AAPL</td>\n",
" <td>139194.57829987502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1075</th>\n",
" <td>XOM</td>\n",
" <td>47325.423122434426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1076</th>\n",
" <td>XOM</td>\n",
" <td>57436.23902499073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1077</th>\n",
" <td>XOM</td>\n",
" <td>53483.04798407412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1078</th>\n",
" <td>XOM</td>\n",
" <td>64366.11240308755</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1079</th>\n",
" <td>XOM</td>\n",
" <td>62000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1080 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Ticker Employees_over_time\n",
"0 AAPL 91428.92277123446\n",
"1 AAPL 102233.67037565738\n",
"2 AAPL 115926.34267742703\n",
"3 AAPL 137239.8786903178\n",
"4 AAPL 139194.57829987502\n",
"... ... ...\n",
"1075 XOM 47325.423122434426\n",
"1076 XOM 57436.23902499073\n",
"1077 XOM 53483.04798407412\n",
"1078 XOM 64366.11240308755\n",
"1079 XOM 62000\n",
"\n",
"[1080 rows x 2 columns]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"employees_df_exp = employees_df.set_index('Ticker').apply(lambda x: x.str.split(',').explode()).reset_index()\n",
"employees_df_exp['Employees_over_time'] = employees_df_exp['Employees_over_time'].str.replace('[', \"\", regex=True).str.replace(']', '', regex=True)\n",
"employees_df_exp"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "a273b5a2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1080\n"
]
}
],
"source": [
"first_date = '2012-01-01'\n",
"second_date = '2013-01-01'\n",
"third_date = '2014-01-01'\n",
"fourth_date = '2015-01-01'\n",
"fifth_date = '2016-01-01'\n",
"sixth_date = '2017-01-01'\n",
"seventh_date = '2018-01-01'\n",
"eight_date = '2019-01-01'\n",
"nineth_date = '2020-01-01'\n",
"tenth_date ='2021-01-01'\n",
"eleventh_date = '2022-01-01'\n",
"twelveth_date = '2023-01-01'\n",
"\n",
"date_list = []\n",
"\n",
"for i in range(0, 90):\n",
" date_list.append(first_date)\n",
" date_list.append(second_date)\n",
" date_list.append(third_date)\n",
" date_list.append(fourth_date)\n",
" date_list.append(fifth_date)\n",
" date_list.append(sixth_date)\n",
" date_list.append(seventh_date)\n",
" date_list.append(eight_date)\n",
" date_list.append(nineth_date)\n",
" date_list.append(tenth_date)\n",
" date_list.append(eleventh_date)\n",
" date_list.append(twelveth_date)\n",
" \n",
"print(len(date_list))"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "64055950",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ticker</th>\n",
" <th>Employees_over_time</th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAPL</td>\n",
" <td>91429.0</td>\n",
" <td>2012-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AAPL</td>\n",
" <td>102234.0</td>\n",
" <td>2013-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AAPL</td>\n",
" <td>115926.0</td>\n",
" <td>2014-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AAPL</td>\n",
" <td>137240.0</td>\n",
" <td>2015-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AAPL</td>\n",
" <td>139195.0</td>\n",
" <td>2016-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1075</th>\n",
" <td>XOM</td>\n",
" <td>47325.0</td>\n",
" <td>2019-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1076</th>\n",
" <td>XOM</td>\n",
" <td>57436.0</td>\n",
" <td>2020-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1077</th>\n",
" <td>XOM</td>\n",
" <td>53483.0</td>\n",
" <td>2021-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1078</th>\n",
" <td>XOM</td>\n",
" <td>64366.0</td>\n",
" <td>2022-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1079</th>\n",
" <td>XOM</td>\n",
" <td>62000.0</td>\n",
" <td>2023-01-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1080 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Ticker Employees_over_time date\n",
"0 AAPL 91429.0 2012-01-01\n",
"1 AAPL 102234.0 2013-01-01\n",
"2 AAPL 115926.0 2014-01-01\n",
"3 AAPL 137240.0 2015-01-01\n",
"4 AAPL 139195.0 2016-01-01\n",
"... ... ... ...\n",
"1075 XOM 47325.0 2019-01-01\n",
"1076 XOM 57436.0 2020-01-01\n",
"1077 XOM 53483.0 2021-01-01\n",
"1078 XOM 64366.0 2022-01-01\n",
"1079 XOM 62000.0 2023-01-01\n",
"\n",
"[1080 rows x 3 columns]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"employees_df_exp['date'] = date_list\n",
"employees_final['Employees_over_time'] = employees_final['Employees_over_time'].astype('float64')\n",
"employees_final = employees_final.round({'Employees_over_time': 0})\n",
"employees_final = "
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "dc7c1efc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ticker</th>\n",
" <th>Employees_over_time</th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AAPL</td>\n",
" <td>91429.0</td>\n",
" <td>2012-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AAPL</td>\n",
" <td>102234.0</td>\n",
" <td>2013-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AAPL</td>\n",
" <td>115926.0</td>\n",
" <td>2014-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AAPL</td>\n",
" <td>137240.0</td>\n",
" <td>2015-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AAPL</td>\n",
" <td>139195.0</td>\n",
" <td>2016-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>AAPL</td>\n",
" <td>130029.0</td>\n",
" <td>2017-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>AAPL</td>\n",
" <td>129131.0</td>\n",
" <td>2018-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>AAPL</td>\n",
" <td>143143.0</td>\n",
" <td>2019-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>AAPL</td>\n",
" <td>146079.0</td>\n",
" <td>2020-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>AAPL</td>\n",
" <td>141409.0</td>\n",
" <td>2021-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>AAPL</td>\n",
" <td>170843.0</td>\n",
" <td>2022-01-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>AAPL</td>\n",
" <td>164000.0</td>\n",
" <td>2023-01-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Ticker Employees_over_time date\n",
"0 AAPL 91429.0 2012-01-01\n",
"1 AAPL 102234.0 2013-01-01\n",
"2 AAPL 115926.0 2014-01-01\n",
"3 AAPL 137240.0 2015-01-01\n",
"4 AAPL 139195.0 2016-01-01\n",
"5 AAPL 130029.0 2017-01-01\n",
"6 AAPL 129131.0 2018-01-01\n",
"7 AAPL 143143.0 2019-01-01\n",
"8 AAPL 146079.0 2020-01-01\n",
"9 AAPL 141409.0 2021-01-01\n",
"10 AAPL 170843.0 2022-01-01\n",
"11 AAPL 164000.0 2023-01-01"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"employees_final.loc[employees_final['Ticker'] == 'AAPL'] "
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "63eb80de",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def employees_time_line(ticker: str, employees_dataframe: pd.DataFrame()):\n",
" ticker_df = employees_dataframe.loc[employees_dataframe['Ticker'] == ticker]\n",
" line_plot = sns.lineplot(data=ticker_df, x='date', y='Employees_over_time')\n",
" plt.xticks(rotation=30)\n",
" plt.show()\n",
"employees_time_line('XOM', employees_final)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f3d3c22",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Visual-Analytics",
"language": "python",
"name": "visual-analytics"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}