bachelorThesis/task_slowdown/task_slowdown_table.ipynb

314 lines
12 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Task slowdown"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sys\n",
"import gzip\n",
"import pandas\n",
"import seaborn as sns\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display, HTML"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Legend for columns:\n",
"- *n_fsh*: number of jobs that terminated with status 6 (FINISH)\n",
"- *n_non*: number of jobs that did not terminate with status 6 or did not terminate at all\n",
"- *finished%*: `n_fsh` / (`n_fsh` + `n_non`)\n",
"- *c_zero_end*: count of number of **Finished** jobs that have a last execution time of `0` \n",
"- *s_last*: sum of execution times for last events\n",
"- *m_last*: mean execution time for last event\n",
"- *s_all*: sum of all execution times for all events\n",
"- *m_all*: mean execution time for all events\n",
"- *s_slow*: sum of **slowdown** values computed for each job: `job_slowdown` = sum(`exec_time`) / last(`exec_time`)\n",
"- *m_slow*: mean job-wise **slowdown** value, i.e. `s_slow` / `n_fsh`\n",
"- *m_slow_2*: priority-wise mean **slowdown**, i.e. `s_all` / `s_last`"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\taskslowdown{Cluster A}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 212.62\\% & 71108.0 & 14201.0 & 5.17 \\\\\n",
" Free & 0.33\\% & 5769.0 & 1203.0 & 82.97 \\\\\n",
" Mid & 46.22\\% & 8510.0 & 9135.0 & 1.16 \\\\\n",
" Monitoring & 2.82\\% & 1200998.0 & 1054458.0 & 2.86 \\\\\n",
" Production & 27.21\\% & 4546.0 & 16845.0 & 4.12 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster B}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 71.84\\% & 1018454.0 & 550288.0 & 8.47 \\\\\n",
" Free & 45.21\\% & 12047.0 & 5588.0 & 1.18 \\\\\n",
" Mid & 8.82\\% & 225147.0 & 336262.0 & 1.11 \\\\\n",
" Monitoring & 4.12\\% & 2627612.0 & 2024679.0 & 1.51 \\\\\n",
" Production & 30.92\\% & 182604.0 & 466329.0 & 9.71 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster C}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 52.96\\% & 1236666.0 & 997117.0 & 7.40 \\\\\n",
" Free & 73.36\\% & 172214.0 & 5553.0 & 1.12 \\\\\n",
" Mid & 95.4\\% & 579844.0 & 248553.0 & 2.04 \\\\\n",
" Monitoring & 5.88\\% & 2159459.0 & 1761833.0 & 1.74 \\\\\n",
" Production & 3.61\\% & 352603.0 & 357993.0 & 4.14 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster D}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 50.56\\% & 1154060.0 & 1135023.0 & 12.04 \\\\\n",
" Free & 42.82\\% & 22831.0 & 5506.0 & 1.15 \\\\\n",
" Mid & 86.34\\% & 228762.0 & 225269.0 & 2.56 \\\\\n",
" Monitoring & 2.21\\% & 1588844.0 & 913816.0 & 2.16 \\\\\n",
" Production & 6.53\\% & 279565.0 & 349364.0 & 5.51 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster E}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 0.47\\% & 280811.0 & 205838.0 & 8.06 \\\\\n",
" Free & 48.15\\% & 33050.0 & 40073.0 & 1.44 \\\\\n",
" Mid & 0.46\\% & 62123.0 & 83322.0 & 10.31 \\\\\n",
" Monitoring & 37.71\\% & 1415296.0 & 1263746.0 & 2.82 \\\\\n",
" Production & 1.96\\% & 231639.0 & 414149.0 & 8.54 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster F}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 44.29\\% & 1368306.0 & 1563086.0 & 6.14 \\\\\n",
" Free & 45.86\\% & 187447.0 & 37069.0 & 1.09 \\\\\n",
" Mid & 31.36\\% & 200116.0 & 110201.0 & 7.60 \\\\\n",
" Monitoring & 8.42\\% & 2079134.0 & 1682711.0 & 2.08 \\\\\n",
" Production & 3.65\\% & 297168.0 & 492372.0 & 5.94 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster G}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 104.33\\% & 294959.0 & 184724.0 & 19.06 \\\\\n",
" Free & 33.85\\% & 64718.0 & 15473.0 & 1.14 \\\\\n",
" Mid & 49.06\\% & 732532.0 & 706124.0 & 3.86 \\\\\n",
" Monitoring & 4.36\\% & 1991341.0 & 1676276.0 & 1.72 \\\\\n",
" Production & 26.75\\% & 115953.0 & 399050.0 & 14.57 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\taskslowdown{Cluster H}{\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 107.03\\% & 947368.0 & 527812.0 & 7.33 \\\\\n",
" Free & 28.79\\% & 310534.0 & 290058.0 & 1.12 \\\\\n",
" Mid & 2.18\\% & 338883.0 & 197440.0 & 6.49 \\\\\n",
" Monitoring & 4.96\\% & 2309296.0 & 1808698.0 & 1.94 \\\\\n",
" Production & 2.7\\% & 298799.0 & 470783.0 & 5.80 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n",
"\\begin{tabular}{llrrr}\n",
"\\toprule\n",
" tier & finished\\% & m\\_last & m\\_all & m\\_slow\\_2 \\\\\n",
"\\midrule\n",
" Best effort batch & 11.06\\% & 4139.0 & 113.0 & 7.843097 \\\\\n",
" Free & 42.85\\% & 1374.0 & 8.0 & 1.145402 \\\\\n",
" Mid & 2.71\\% & 18187.0 & 157.0 & 2.548757 \\\\\n",
" Monitoring & 2.74\\% & 834226.0 & 130.0 & 2.047032 \\\\\n",
" Production & 13.54\\% & 54789.0 & 24.0 & 6.684155 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n",
"}\n"
]
}
],
"source": [
"CLUSTERS = \"abcdefgh\"\n",
"DIR = \"/Users/maggicl/Git/bachelorThesis/task_slowdown/\"\n",
"\n",
"df = {}\n",
"dftotal = None\n",
"\n",
"def tier(p):\n",
" if p <= 99:\n",
" return \"Free\"\n",
" elif p <= 115:\n",
" return \"Best effort batch\"\n",
" elif p <= 119:\n",
" return \"Mid\"\n",
" elif p <= 359:\n",
" return \"Production\"\n",
" else:\n",
" return \"Monitoring\"\n",
"\n",
"for cluster in CLUSTERS:\n",
" print(\"\\\\taskslowdown{Cluster \" + cluster.upper() + \"}{\")\n",
" df[cluster] = pandas.read_csv(DIR + \"/\" + cluster + \"_slowdown_table.csv\")\n",
" df[cluster][\"tier\"] = df[cluster][\"priority\"].apply(tier)\n",
" df[cluster] = df[cluster][df[cluster][\"priority\"].ge(0)]\n",
" df[cluster] = df[cluster][df[cluster][\"s_last\"] > 0]\n",
" del df[cluster][\"c_zero_end\"]\n",
" del df[cluster][\"Unnamed: 0\"]\n",
" \n",
" df[cluster] = df[cluster].groupby(\"tier\").sum().reset_index()\n",
" del df[cluster][\"priority\"]\n",
" \n",
" df[cluster][\"m_slow_2\"] = (df[cluster][\"s_all\"] / df[cluster][\"s_last\"]).round(2)\n",
" del df[cluster][\"s_slow\"]\n",
" del df[cluster][\"m_slow\"]\n",
" df[cluster][\"m_all\"] = (df[cluster][\"m_all\"] / 1000000).round(0)\n",
" df[cluster][\"m_last\"] = (df[cluster][\"m_last\"] / 1000000).round(0)\n",
"\n",
" df2 = df[cluster].copy()\n",
" df[cluster][\"c\"] = df2[\"m_all\"] / df2[\"s_all\"]\n",
" del df2[\"s_all\"]\n",
" del df2[\"s_last\"]\n",
" del df2[\"n_fsh\"]\n",
" del df2[\"n_non\"]\n",
" #del df2[\"c\"]\n",
" df2[\"finished%\"] = (df2[\"finished%\"] * 100).round(2).astype(str) + \"%\"\n",
" print(df2.to_latex(index=False))\n",
" print(\"}\")\n",
" \n",
" if dftotal is None:\n",
" dftotal = df[cluster]\n",
" else:\n",
" dftotal = dftotal.append(df[cluster])\n",
" \n",
"dftotal = dftotal.groupby(\"tier\").sum().reset_index()\n",
"dftotal[\"m_slow_2\"] = (dftotal[\"s_all\"] / dftotal[\"s_last\"]) \n",
"dftotal[\"finished%\"] = (dftotal[\"n_fsh\"]) / (dftotal[\"n_fsh\"] + dftotal[\"n_non\"])\n",
"dftotal[\"m_last\"] = (dftotal[\"s_last\"] / (dftotal[\"n_fsh\"] * 1000000)).round(0)\n",
"dftotal[\"m_all\"] = (dftotal[\"s_all\"] * dftotal[\"c\"] / 1000000).round(0)\n",
"del dftotal[\"s_all\"]\n",
"del dftotal[\"s_last\"]\n",
"del dftotal[\"n_fsh\"]\n",
"del dftotal[\"n_non\"]\n",
"del dftotal[\"c\"]\n",
"\n",
"df2 = dftotal.copy()\n",
"df2[\"m_all\"] = df2[\"m_all\"].round(0)\n",
"df2[\"m_last\"] = df2[\"m_last\"].round(0)\n",
"#del df2[\"c\"]\n",
"df2[\"finished%\"] = (df2[\"finished%\"] * 100).round(2).astype(str) + \"%\"\n",
"print(df2.to_latex(index=False))\n",
"print(\"}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'wc' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-55b47c0da7a2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mwc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'wc' is not defined"
]
}
],
"source": [
"wc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}