bachelorThesis/machine_time_waste/statuses_total_time.ipynb
2021-05-26 21:46:30 +02:00

501 lines
20 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Statuses total time\n",
"Sums the times instances spend in one of each states in the diagram saved as\n",
"\"statuses.drawio\". Unknown times are summed as \"unknown\""
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sys\n",
"import pandas\n",
"import seaborn as sns\n",
"import matplotlib as mpl\n",
"mpl.use(\"pgf\")\n",
"mpl.rcParams.update({\n",
" \"pgf.texsystem\": \"pdflatex\",\n",
" 'font.family': 'serif',\n",
" 'text.usetex': True,\n",
" 'pgf.rcfonts': False,\n",
"})\n",
"import matplotlib.pyplot as plt\n",
"pandas.options.display.float_format = '{:,.3f}'.format"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# QUEUE = set([\"0-2\", \"1-2\", \"assumptions:\", \"1-1\", \"1-0\"])\n",
"# RESUB = set([\"4-1\", \"4-0\", \"5-1\", \"6-1\", \"7-1\", \"8-1\", \"assumptions:\", \"5-0\", \"6-0\", \"7-0\", \\\n",
"# \"8-0\"])\n",
"# READY = set([\"0-3\", \"2-3\", \"0-9\", \"2-9\", \"9-3\", \"2-7\", \"2-8\", \"9-7\", \"9-8\", \\\n",
"# \"9-9\", \"0-7\", \"0-8\", \"assumptions:\", \"2-0\", \"2-4\", \"9-4\", \"9-1\"])\n",
"# RUN = set([\"3-1\", \"3-10\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"10-5\", \"10-6\", \\\n",
"# \"10-7\", \"10-8\", \"10-4\", \"10-10\", \"10-1\", \"assumptions:\", \"3-0\", \"10-0\", \"3-3\"])\n",
"\n",
"QUEUE = set([\"0-2\", \"1-2\"])\n",
"ENDED = set([\"5-1\", \"6-1\", \"7-1\", \"8-1\"])\n",
"READY = set([\"0-3\", \"0-9\", \"2-3\", \"2-9\", \"9-3\", \"9-9\"])\n",
"RUN = set([\"3-1\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"3-10\", \"10-1\", \"10-4\", \"10-5\", \"10-6\", \"10-7\", \"10-8\", \"10-10\"])\n",
"EVICT = set([\"4-1\", \"4-0\"])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"DIR = \"/Users/maggicl/git/bachelorThesis\""
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"def to_name(et):\n",
" if et == 4: \n",
" return 'EVICT'\n",
" elif et == 5: \n",
" return 'FAIL'\n",
" elif et == 6:\n",
" return 'FINISH'\n",
" elif et == 7:\n",
" return 'KILL'\n",
" elif et == 8:\n",
" return 'LOST'\n",
" else:\n",
" return 'NO_TERM'\n",
"\n",
"def create_df(cluster):\n",
" obj = {}\n",
"\n",
" filename = DIR + \"/machine_time_waste/\" + cluster + \"_state_changes.json\"\n",
"\n",
" with open(filename, 'r') as f:\n",
" obj = json.loads(f.read())\n",
" \n",
" data = {'Last termination': [], 'time_type': [], 'time_ms': []}\n",
" totals = 0\n",
" \n",
" def add_record(et, tt, time):\n",
" data['Last termination'].append(to_name(et))\n",
" data['time_type'].append(tt)\n",
" data['time_ms'].append(time / 1000)\n",
"\n",
" for pair in obj[\"data\"]:\n",
" qt = et = rt = xt = vt = ut = 0\n",
"\n",
" pair[0] = 0 if pair[0] is None else pair[0]\n",
" \n",
" # Filter useless terms\n",
" if pair[0] not in [4,5,6,7]:\n",
" continue\n",
"\n",
" x = pair[1]\n",
" for k in x.keys():\n",
" if k in QUEUE:\n",
" qt += x[k]\n",
" elif k in ENDED:\n",
" et += x[k]\n",
" elif k in READY:\n",
" rt += x[k]\n",
" elif k in RUN:\n",
" xt += x[k]\n",
" elif k in EVICT:\n",
" vt += x[k]\n",
" else:\n",
" ut += x[k]\n",
"\n",
" add_record(pair[0], 'Queue', qt + rt)\n",
" add_record(pair[0], 'Resubmission', et + vt)\n",
" add_record(pair[0], 'Running', xt)\n",
" add_record(pair[0], 'Unknown', ut)\n",
" totals += (qt + et + rt + xt + vt + ut) / 1000\n",
" \n",
" return (pandas.DataFrame(data, columns=['Last termination', 'time_type', 'time_ms']), totals)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Graph 1: Absolute total time spent per status per \"last termination\" type"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"def graph_1(df, cluster):\n",
" #sns.set_theme(style=\"ticks\")\n",
" g = sns.histplot(df, x=\"Last termination\", weights=\"time_ms\", shrink=.5,\n",
" hue=\"time_type\", multiple=\"dodge\", discrete=True, legend=False)\n",
" g.set_yscale(\"log\")\n",
" g.set_ylabel(\"Total (milliseconds)\") \n",
" g.set_title(\"Cluster \" + cluster + \": Absolute total time spent per status per \\\"last termination\\\" type\")\n",
" #g.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Graph 2: Relative total time spent per status per \"last termination\" type\n",
"\n",
"Values are proportions on total for each \"last termination\" type"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def graph_2(df, cluster, totals):\n",
" df = df.copy()\n",
" for i in [4,5,6,7]:\n",
" df.loc[df[\"Last termination\"] == to_name(i), \"time_ms\"] = 100 * df[\"time_ms\"][df[\"Last termination\"] == to_name(i)] / totals\n",
"\n",
" df.rename(columns = {'time_type': 'Execution phase'}, inplace = True)\n",
" \n",
" print(\"Cluster \"+cluster)\n",
" print(df)\n",
" \n",
" h = sns.histplot(df, x=\"Last termination\", \n",
" weights=\"time_ms\", shrink=.5, common_bins=True,\n",
" hue=\"Execution phase\", multiple=\"stack\", discrete=True, legend=True)\n",
" h.set_yticks([0,20,40,60,80,100])\n",
" h.set_ylabel(\"Time spent [%]\") \n",
" h.set_xlabel(\"Task termination\") \n",
" #ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
" if cluster == \"2011\":\n",
" h.set_title(\"2011 data\")\n",
" elif cluster == \"all\":\n",
" h.set_title(\"2019 data\")\n",
" else:\n",
" h.set_title((\"Cluster \" + cluster.upper()) if cluster != \"all\" else \"All clusters\" if cluster == \"all\" else \"2011 traces\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster a\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.051\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 1.564\n",
"3 EVICT Unknown 0.164\n",
"4 FAIL Queue 0.046\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.353\n",
"7 FAIL Unknown 0.136\n",
"8 FINISH Queue 1.612\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 1.841\n",
"11 FINISH Unknown 1.207\n",
"12 KILL Queue 3.637\n",
"13 KILL Resubmission 0.006\n",
"14 KILL Running 30.676\n",
"15 KILL Unknown 58.706\n",
"Cluster b\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.010\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.971\n",
"3 EVICT Unknown 0.483\n",
"4 FAIL Queue 0.003\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.282\n",
"7 FAIL Unknown 0.067\n",
"8 FINISH Queue 0.231\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 2.729\n",
"11 FINISH Unknown 3.404\n",
"12 KILL Queue 1.826\n",
"13 KILL Resubmission 0.001\n",
"14 KILL Running 19.337\n",
"15 KILL Unknown 70.655\n",
"Cluster c\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.010\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 1.401\n",
"3 EVICT Unknown 0.225\n",
"4 FAIL Queue 0.008\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.309\n",
"7 FAIL Unknown 0.161\n",
"8 FINISH Queue 0.388\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 3.824\n",
"11 FINISH Unknown 1.635\n",
"12 KILL Queue 2.376\n",
"13 KILL Resubmission 0.007\n",
"14 KILL Running 20.472\n",
"15 KILL Unknown 69.183\n",
"Cluster d\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.005\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 1.542\n",
"3 EVICT Unknown 0.265\n",
"4 FAIL Queue 0.001\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.425\n",
"7 FAIL Unknown 0.101\n",
"8 FINISH Queue 0.160\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 2.334\n",
"11 FINISH Unknown 2.546\n",
"12 KILL Queue 2.664\n",
"13 KILL Resubmission 0.020\n",
"14 KILL Running 16.846\n",
"15 KILL Unknown 73.091\n",
"Cluster e\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.000\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.472\n",
"3 EVICT Unknown 0.195\n",
"4 FAIL Queue 0.000\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.063\n",
"7 FAIL Unknown 0.011\n",
"8 FINISH Queue 0.021\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 1.182\n",
"11 FINISH Unknown 1.013\n",
"12 KILL Queue 3.858\n",
"13 KILL Resubmission 0.001\n",
"14 KILL Running 8.414\n",
"15 KILL Unknown 84.769\n",
"Cluster f\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.001\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 1.444\n",
"3 EVICT Unknown 0.457\n",
"4 FAIL Queue 0.000\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.111\n",
"7 FAIL Unknown 0.052\n",
"8 FINISH Queue 0.313\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 2.781\n",
"11 FINISH Unknown 2.736\n",
"12 KILL Queue 3.218\n",
"13 KILL Resubmission 0.004\n",
"14 KILL Running 12.836\n",
"15 KILL Unknown 76.047\n",
"Cluster g\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.002\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.196\n",
"3 EVICT Unknown 0.064\n",
"4 FAIL Queue 0.007\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.043\n",
"7 FAIL Unknown 0.015\n",
"8 FINISH Queue 0.019\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 0.808\n",
"11 FINISH Unknown 1.541\n",
"12 KILL Queue 2.650\n",
"13 KILL Resubmission 0.001\n",
"14 KILL Running 5.116\n",
"15 KILL Unknown 89.538\n",
"Cluster h\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.001\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.585\n",
"3 EVICT Unknown 0.200\n",
"4 FAIL Queue 0.000\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.074\n",
"7 FAIL Unknown 0.025\n",
"8 FINISH Queue 0.194\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 1.602\n",
"11 FINISH Unknown 1.255\n",
"12 KILL Queue 3.240\n",
"13 KILL Resubmission 0.000\n",
"14 KILL Running 8.055\n",
"15 KILL Unknown 84.770\n",
"Cluster all\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 0.007\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.925\n",
"3 EVICT Unknown 0.248\n",
"4 FAIL Queue 0.006\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.179\n",
"7 FAIL Unknown 0.061\n",
"8 FINISH Queue 0.288\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 2.036\n",
"11 FINISH Unknown 1.867\n",
"12 KILL Queue 2.945\n",
"13 KILL Resubmission 0.004\n",
"14 KILL Running 13.493\n",
"15 KILL Unknown 77.941\n",
"Cluster 2011\n",
" Last termination Execution phase time_ms\n",
"0 EVICT Queue 2.500\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 17.500\n",
"3 EVICT Unknown 0.000\n",
"4 FAIL Queue 0.000\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 5.000\n",
"7 FAIL Unknown 0.000\n",
"8 FINISH Queue 1.000\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 39.000\n",
"11 FINISH Unknown 0.000\n",
"12 KILL Queue 5.000\n",
"13 KILL Resubmission 1.000\n",
"14 KILL Running 30.000\n",
"15 KILL Unknown 0.000\n"
]
}
],
"source": [
"dft = None\n",
"tts = None\n",
"\n",
"for cluster in \"abcdefgh\":\n",
" df, totals = create_df(cluster)\n",
"\n",
" #plt.figure(figsize=(10,8))\n",
" #graph_1(df, cluster)\n",
" plt.figure(figsize=(4,3))\n",
" graph_2(df, cluster, totals)\n",
" plt.savefig('../report/figures/machine_time_waste/cluster_%s.pgf' % cluster, bbox_inches='tight')\n",
" \n",
" if dft is None:\n",
" dft = df\n",
" tts = totals\n",
" else:\n",
" dft.loc[:, \"time_ms\"] = dft[\"time_ms\"].add(df[\"time_ms\"], fill_value=0)\n",
" tts += totals\n",
"\n",
"#plt.figure(figsize=(10,8))\n",
"#graph_1(dft, \"all\")\n",
"plt.figure(figsize=(4,3))\n",
"graph_2(dft, \"all\", tts)\n",
"plt.savefig('../report/figures/machine_time_waste/cluster_all.pgf', bbox_inches='tight')\n",
"\n",
"d2011 = {'Last termination': [\"EVICT\"] * 4 + [\"FAIL\"] * 4 + [\"FINISH\"] * 4 + [\"KILL\"] * 4,\n",
" 'time_type': [\"Queue\", \"Resubmission\", \"Running\", \"Unknown\"] * 4,\n",
" 'time_ms': [2.5, 0., 17.5, 0, 0, 0, 5, 0, 1, 0, 39, 0, 5, 1, 30, 0]}\n",
"\n",
"d2011 = pandas.DataFrame(d2011, columns=['Last termination', 'time_type', 'time_ms'])\n",
"plt.figure(figsize=(4,3))\n",
"graph_2(d2011, \"2011\", 100)\n",
"plt.savefig('../report/figures/machine_time_waste/cluster_2011.pgf', bbox_inches='tight')\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Last termination time_type time_ms\n",
"0 EVICT Queue 0.000\n",
"1 EVICT Resubmission 0.000\n",
"2 EVICT Running 0.009\n",
"3 EVICT Unknown 0.002\n",
"4 FAIL Queue 0.000\n",
"5 FAIL Resubmission 0.000\n",
"6 FAIL Running 0.002\n",
"7 FAIL Unknown 0.001\n",
"8 FINISH Queue 0.003\n",
"9 FINISH Resubmission 0.000\n",
"10 FINISH Running 0.020\n",
"11 FINISH Unknown 0.019\n",
"12 KILL Queue 0.029\n",
"13 KILL Resubmission 0.000\n",
"14 KILL Running 0.135\n",
"15 KILL Unknown 0.779\n"
]
}
],
"source": [
"dft[\"time_ms\"] /= tts\n",
"print(dft)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}