bachelorThesis/machine_time_waste/statuses_total_time.ipynb

470 lines
18 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "776f8aa3",
"metadata": {},
"source": [
"# Statuses total time\n",
"Sums the times instances spend in one of each states in the diagram saved as\n",
"\"statuses.drawio\". Unknown times are summed as \"unknown\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7c77e2f1",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sys\n",
"import pandas\n",
"import seaborn as sns\n",
"import matplotlib as mpl\n",
"mpl.use(\"pgf\")\n",
"mpl.rcParams.update({\n",
" \"pgf.texsystem\": \"pdflatex\",\n",
" 'font.family': 'serif',\n",
" 'text.usetex': True,\n",
" 'pgf.rcfonts': False,\n",
"})\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cc6d3e6a",
"metadata": {},
"outputs": [],
"source": [
"# QUEUE = set([\"0-2\", \"1-2\", \"assumptions:\", \"1-1\", \"1-0\"])\n",
"# RESUB = set([\"4-1\", \"4-0\", \"5-1\", \"6-1\", \"7-1\", \"8-1\", \"assumptions:\", \"5-0\", \"6-0\", \"7-0\", \\\n",
"# \"8-0\"])\n",
"# READY = set([\"0-3\", \"2-3\", \"0-9\", \"2-9\", \"9-3\", \"2-7\", \"2-8\", \"9-7\", \"9-8\", \\\n",
"# \"9-9\", \"0-7\", \"0-8\", \"assumptions:\", \"2-0\", \"2-4\", \"9-4\", \"9-1\"])\n",
"# RUN = set([\"3-1\", \"3-10\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"10-5\", \"10-6\", \\\n",
"# \"10-7\", \"10-8\", \"10-4\", \"10-10\", \"10-1\", \"assumptions:\", \"3-0\", \"10-0\", \"3-3\"])\n",
"\n",
"QUEUE = set([\"0-2\", \"1-2\"])\n",
"ENDED = set([\"5-1\", \"6-1\", \"7-1\", \"8-1\"])\n",
"READY = set([\"0-3\", \"0-9\", \"2-3\", \"2-9\", \"9-3\", \"9-9\"])\n",
"RUN = set([\"3-1\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"3-10\", \"10-1\", \"10-4\", \"10-5\", \"10-6\", \"10-7\", \"10-8\", \"10-10\"])\n",
"EVICT = set([\"4-1\", \"4-0\"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "44c77966",
"metadata": {},
"outputs": [],
"source": [
"DIR = \"/home/claudio/hdd/git/bachelorThesis\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8038bdb2",
"metadata": {},
"outputs": [],
"source": [
"def to_name(et):\n",
" if et == 4: \n",
" return 'EVICT'\n",
" elif et == 5: \n",
" return 'FAIL'\n",
" elif et == 6:\n",
" return 'FINISH'\n",
" elif et == 7:\n",
" return 'KILL'\n",
" elif et == 8:\n",
" return 'LOST'\n",
" else:\n",
" return 'NO_TERM'\n",
"\n",
"def create_df(cluster):\n",
" obj = {}\n",
"\n",
" filename = DIR + \"/machine_time_waste/\" + cluster + \"_state_changes.json\"\n",
"\n",
" with open(filename, 'r') as f:\n",
" obj = json.loads(f.read())\n",
" \n",
" data = {'Last termination': [], 'time_type': [], 'time_ms': []}\n",
" totals = 0\n",
" \n",
" def add_record(et, tt, time):\n",
" data['Last termination'].append(to_name(et))\n",
" data['time_type'].append(tt)\n",
" data['time_ms'].append(time / 1000)\n",
"\n",
" for pair in obj[\"data\"]:\n",
" qt = et = rt = xt = vt = ut = 0\n",
"\n",
" pair[0] = 0 if pair[0] is None else pair[0]\n",
" \n",
" # Filter useless terms\n",
" if pair[0] not in [4,5,6,7]:\n",
" continue\n",
"\n",
" x = pair[1]\n",
" for k in x.keys():\n",
" if k in QUEUE:\n",
" qt += x[k]\n",
" elif k in ENDED:\n",
" et += x[k]\n",
" elif k in READY:\n",
" rt += x[k]\n",
" elif k in RUN:\n",
" xt += x[k]\n",
" elif k in EVICT:\n",
" vt += x[k]\n",
" else:\n",
" ut += x[k]\n",
"\n",
" add_record(pair[0], 'Queue', qt + rt)\n",
" add_record(pair[0], 'Resubmission', et + vt)\n",
" add_record(pair[0], 'Running', xt)\n",
" add_record(pair[0], 'Unknown', ut)\n",
" totals += (qt + et + rt + xt + vt + ut) / 1000\n",
" \n",
" return (pandas.DataFrame(data, columns=['Last termination', 'time_type', 'time_ms']), totals)"
]
},
{
"cell_type": "markdown",
"id": "31fdf065",
"metadata": {},
"source": [
"## Graph 1: Absolute total time spent per status per \"last termination\" type"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f44fef5d",
"metadata": {},
"outputs": [],
"source": [
"def graph_1(df, cluster):\n",
" #sns.set_theme(style=\"ticks\")\n",
" g = sns.histplot(df, x=\"Last termination\", weights=\"time_ms\", shrink=.5,\n",
" hue=\"time_type\", multiple=\"dodge\", discrete=True, legend=False)\n",
" g.set_yscale(\"log\")\n",
" g.set_ylabel(\"Total (milliseconds)\") \n",
" g.set_title(\"Cluster \" + cluster + \": Absolute total time spent per status per \\\"last termination\\\" type\")\n",
" #g.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)"
]
},
{
"cell_type": "markdown",
"id": "41bcaff4",
"metadata": {},
"source": [
"## Graph 2: Relative total time spent per status per \"last termination\" type\n",
"\n",
"Values are proportions on total for each \"last termination\" type"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3ba51241",
"metadata": {},
"outputs": [],
"source": [
"def graph_2(df, cluster, totals):\n",
" df = df.copy()\n",
" for i in [4,5,6,7]:\n",
" df.loc[df[\"Last termination\"] == to_name(i), \"time_ms\"] = 100 * df[\"time_ms\"][df[\"Last termination\"] == to_name(i)] / totals\n",
"\n",
" df.rename(columns = {'time_type': 'Execution phase'}, inplace = True)\n",
" \n",
" h = sns.histplot(df, x=\"Last termination\", \n",
" weights=\"time_ms\", shrink=.5, common_bins=True,\n",
" hue=\"Execution phase\", multiple=\"stack\", discrete=True, legend=True)\n",
" h.set_yticks([0,20,40,60,80,100])\n",
" h.set_ylabel(\"Time spent [%]\") \n",
" h.set_xlabel(\"Task termination\") \n",
" #ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
" if cluster == \"2011\":\n",
" h.set_title(\"2011 data\")\n",
" elif cluster == \"all\":\n",
" h.set_title(\"2019 data\")\n",
" else:\n",
" h.set_title((\"Cluster \" + cluster.upper()) if cluster != \"all\" else \"All clusters\" if cluster == \"all\" else \"2011 traces\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8fc1b568",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Last termination time_type time_ms\n",
"0 EVICT Queue 1.049774e+12\n",
"1 EVICT Resubmission 5.530617e+08\n",
"2 EVICT Running 3.218063e+13\n",
"3 EVICT Unknown 3.383291e+12\n",
"4 FAIL Queue 9.483261e+11\n",
"5 FAIL Resubmission 7.150500e+01\n",
"6 FAIL Running 7.265195e+12\n",
"7 FAIL Unknown 2.799674e+12\n",
"8 FINISH Queue 3.317009e+13\n",
"9 FINISH Resubmission 1.828825e+07\n",
"10 FINISH Running 3.788436e+13\n",
"11 FINISH Unknown 2.482661e+13\n",
"12 KILL Queue 7.482888e+13\n",
"13 KILL Resubmission 1.211419e+11\n",
"14 KILL Running 6.311166e+14\n",
"15 KILL Unknown 1.207792e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 2.991028e+11\n",
"1 EVICT Resubmission 1.360657e+09\n",
"2 EVICT Running 2.871365e+13\n",
"3 EVICT Unknown 1.428912e+13\n",
"4 FAIL Queue 9.376134e+10\n",
"5 FAIL Resubmission 1.225520e+02\n",
"6 FAIL Running 8.338530e+12\n",
"7 FAIL Unknown 1.989378e+12\n",
"8 FINISH Queue 6.817208e+12\n",
"9 FINISH Resubmission 1.493729e+03\n",
"10 FINISH Running 8.069421e+13\n",
"11 FINISH Unknown 1.006353e+14\n",
"12 KILL Queue 5.397953e+13\n",
"13 KILL Resubmission 1.842002e+10\n",
"14 KILL Running 5.716892e+14\n",
"15 KILL Unknown 2.088855e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 3.158380e+11\n",
"1 EVICT Resubmission 2.355575e+09\n",
"2 EVICT Running 4.229815e+13\n",
"3 EVICT Unknown 6.785277e+12\n",
"4 FAIL Queue 2.352869e+11\n",
"5 FAIL Resubmission 4.684500e+01\n",
"6 FAIL Running 9.316941e+12\n",
"7 FAIL Unknown 4.873943e+12\n",
"8 FINISH Queue 1.172189e+13\n",
"9 FINISH Resubmission 3.623451e+03\n",
"10 FINISH Running 1.154498e+14\n",
"11 FINISH Unknown 4.934279e+13\n",
"12 KILL Queue 7.171264e+13\n",
"13 KILL Resubmission 2.108520e+11\n",
"14 KILL Running 6.180005e+14\n",
"15 KILL Unknown 2.088457e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 1.415993e+11\n",
"1 EVICT Resubmission 2.835890e+08\n",
"2 EVICT Running 4.303187e+13\n",
"3 EVICT Unknown 7.410999e+12\n",
"4 FAIL Queue 2.231462e+10\n",
"5 FAIL Resubmission 1.073960e+02\n",
"6 FAIL Running 1.186956e+13\n",
"7 FAIL Unknown 2.829927e+12\n",
"8 FINISH Queue 4.455665e+12\n",
"9 FINISH Resubmission 1.577302e+03\n",
"10 FINISH Running 6.516562e+13\n",
"11 FINISH Unknown 7.106965e+13\n",
"12 KILL Queue 7.435926e+13\n",
"13 KILL Resubmission 5.556059e+11\n",
"14 KILL Running 4.702722e+14\n",
"15 KILL Unknown 2.040366e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 1.722618e+10\n",
"1 EVICT Resubmission 1.788932e+09\n",
"2 EVICT Running 1.710804e+13\n",
"3 EVICT Unknown 7.078678e+12\n",
"4 FAIL Queue 2.895755e+09\n",
"5 FAIL Resubmission 5.304400e+01\n",
"6 FAIL Running 2.281806e+12\n",
"7 FAIL Unknown 3.984907e+11\n",
"8 FINISH Queue 7.454410e+11\n",
"9 FINISH Resubmission 6.310360e+02\n",
"10 FINISH Running 4.284518e+13\n",
"11 FINISH Unknown 3.672368e+13\n",
"12 KILL Queue 1.398332e+14\n",
"13 KILL Resubmission 4.825723e+10\n",
"14 KILL Running 3.049664e+14\n",
"15 KILL Unknown 3.072445e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 3.140594e+10\n",
"1 EVICT Resubmission 1.504263e+10\n",
"2 EVICT Running 5.070239e+13\n",
"3 EVICT Unknown 1.602834e+13\n",
"4 FAIL Queue 5.523972e+09\n",
"5 FAIL Resubmission 2.352700e+01\n",
"6 FAIL Running 3.889624e+12\n",
"7 FAIL Unknown 1.833895e+12\n",
"8 FINISH Queue 1.098116e+13\n",
"9 FINISH Resubmission 6.319590e+02\n",
"10 FINISH Running 9.761364e+13\n",
"11 FINISH Unknown 9.603417e+13\n",
"12 KILL Queue 1.129539e+14\n",
"13 KILL Resubmission 1.356476e+11\n",
"14 KILL Running 4.505937e+14\n",
"15 KILL Unknown 2.669451e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 9.528645e+10\n",
"1 EVICT Resubmission 1.493116e+09\n",
"2 EVICT Running 8.513084e+12\n",
"3 EVICT Unknown 2.778074e+12\n",
"4 FAIL Queue 2.887122e+11\n",
"5 FAIL Resubmission 1.757300e+01\n",
"6 FAIL Running 1.867799e+12\n",
"7 FAIL Unknown 6.622832e+11\n",
"8 FINISH Queue 8.337090e+11\n",
"9 FINISH Resubmission 6.753141e+07\n",
"10 FINISH Running 3.514254e+13\n",
"11 FINISH Unknown 6.704536e+13\n",
"12 KILL Queue 1.152843e+14\n",
"13 KILL Resubmission 5.814544e+10\n",
"14 KILL Running 2.225128e+14\n",
"15 KILL Unknown 3.894626e+15\n",
" Last termination time_type time_ms\n",
"0 EVICT Queue 4.621613e+10\n",
"1 EVICT Resubmission 4.511340e+02\n",
"2 EVICT Running 2.786346e+13\n",
"3 EVICT Unknown 9.513981e+12\n",
"4 FAIL Queue 7.828423e+09\n",
"5 FAIL Resubmission 1.148130e+02\n",
"6 FAIL Running 3.509052e+12\n",
"7 FAIL Unknown 1.212378e+12\n",
"8 FINISH Queue 9.252380e+12\n",
"9 FINISH Resubmission 1.675400e+02\n",
"10 FINISH Running 7.635478e+13\n",
"11 FINISH Unknown 5.980213e+13\n",
"12 KILL Queue 1.543895e+14\n",
"13 KILL Resubmission 3.419664e+09\n",
"14 KILL Running 3.838571e+14\n",
"15 KILL Unknown 4.039843e+15\n"
]
}
],
"source": [
"dft = None\n",
"tts = None\n",
"\n",
"for cluster in \"abcdefgh\":\n",
" df, totals = create_df(cluster)\n",
" \n",
" print(df)\n",
"\n",
" #plt.figure(figsize=(10,8))\n",
" #graph_1(df, cluster)\n",
" plt.figure(figsize=(4,3))\n",
" graph_2(df, cluster, totals)\n",
" plt.savefig('../report/figures/machine_time_waste/cluster_%s.pgf' % cluster, bbox_inches='tight')\n",
" \n",
" if dft is None:\n",
" dft = df\n",
" tts = totals\n",
" else:\n",
" dft.loc[:, \"time_ms\"] = dft[\"time_ms\"].add(df[\"time_ms\"], fill_value=0)\n",
" tts += totals\n",
"\n",
"#plt.figure(figsize=(10,8))\n",
"#graph_1(dft, \"all\")\n",
"plt.figure(figsize=(4,3))\n",
"graph_2(dft, \"all\", tts)\n",
"plt.savefig('../report/figures/machine_time_waste/cluster_all.pgf', bbox_inches='tight')\n",
"\n",
"d2011 = {'Last termination': [\"EVICT\"] * 4 + [\"FAIL\"] * 4 + [\"FINISH\"] * 4 + [\"KILL\"] * 4,\n",
" 'time_type': [\"Queue\", \"Resubmission\", \"Running\", \"Unknown\"] * 4,\n",
" 'time_ms': [2.5, 0., 17.5, 0, 0, 0, 5, 0, 1, 0, 39, 0, 5, 1, 30, 0]}\n",
"\n",
"d2011 = pandas.DataFrame(d2011, columns=['Last termination', 'time_type', 'time_ms'])\n",
"plt.figure(figsize=(4,3))\n",
"graph_2(d2011, \"2011\", 100)\n",
"plt.savefig('../report/figures/machine_time_waste/cluster_2011.pgf', bbox_inches='tight')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5d112e3c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Last termination time_type time_ms\n",
"0 EVICT Queue 7.373993e-05\n",
"1 EVICT Resubmission 8.449953e-07\n",
"2 EVICT Running 9.249078e-03\n",
"3 EVICT Unknown 2.484572e-03\n",
"4 FAIL Queue 5.926861e-05\n",
"5 FAIL Resubmission 2.058252e-14\n",
"6 FAIL Running 1.785410e-03\n",
"7 FAIL Unknown 6.131290e-04\n",
"8 FINISH Queue 2.880144e-03\n",
"9 FINISH Resubmission 3.170097e-09\n",
"10 FINISH Running 2.035703e-02\n",
"11 FINISH Unknown 1.867017e-02\n",
"12 KILL Queue 2.945024e-02\n",
"13 KILL Resubmission 4.253091e-05\n",
"14 KILL Running 1.349259e-01\n",
"15 KILL Unknown 7.794080e-01\n"
]
}
],
"source": [
"dft[\"time_ms\"] /= tts\n",
"print(dft)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45819798",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ebdc37c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "81f69410",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}