328 lines
11 KiB
Text
328 lines
11 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Statuses total time\n",
|
|
"Sums the times instances spend in one of each states in the diagram saved as\n",
|
|
"\"statuses.drawio\". Unknown times are summed as \"unknown\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import json\n",
|
|
"import sys\n",
|
|
"import pandas\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib as mpl\n",
|
|
"mpl.use(\"pgf\")\n",
|
|
"mpl.rcParams.update({\n",
|
|
" \"pgf.texsystem\": \"pdflatex\",\n",
|
|
" 'font.family': 'serif',\n",
|
|
" 'text.usetex': True,\n",
|
|
" 'pgf.rcfonts': False,\n",
|
|
"})\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# QUEUE = set([\"0-2\", \"1-2\", \"assumptions:\", \"1-1\", \"1-0\"])\n",
|
|
"# RESUB = set([\"4-1\", \"4-0\", \"5-1\", \"6-1\", \"7-1\", \"8-1\", \"assumptions:\", \"5-0\", \"6-0\", \"7-0\", \\\n",
|
|
"# \"8-0\"])\n",
|
|
"# READY = set([\"0-3\", \"2-3\", \"0-9\", \"2-9\", \"9-3\", \"2-7\", \"2-8\", \"9-7\", \"9-8\", \\\n",
|
|
"# \"9-9\", \"0-7\", \"0-8\", \"assumptions:\", \"2-0\", \"2-4\", \"9-4\", \"9-1\"])\n",
|
|
"# RUN = set([\"3-1\", \"3-10\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"10-5\", \"10-6\", \\\n",
|
|
"# \"10-7\", \"10-8\", \"10-4\", \"10-10\", \"10-1\", \"assumptions:\", \"3-0\", \"10-0\", \"3-3\"])\n",
|
|
"\n",
|
|
"QUEUE = set([\"0-2\", \"1-2\"])\n",
|
|
"ENDED = set([\"5-1\", \"6-1\", \"7-1\", \"8-1\"])\n",
|
|
"READY = set([\"0-3\", \"0-9\", \"2-3\", \"2-9\", \"9-3\", \"9-9\"])\n",
|
|
"RUN = set([\"3-1\", \"3-4\", \"3-5\", \"3-6\", \"3-7\", \"3-8\", \"3-10\", \"10-1\", \"10-4\", \"10-5\", \"10-6\", \"10-7\", \"10-8\", \"10-10\"])\n",
|
|
"EVICT = set([\"4-1\", \"4-0\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DIR = \"/Users/maggicl/git/bachelorThesis\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def to_name(et):\n",
|
|
" if et == 4: \n",
|
|
" return 'EVICT'\n",
|
|
" elif et == 5: \n",
|
|
" return 'FAIL'\n",
|
|
" elif et == 6:\n",
|
|
" return 'FINISH'\n",
|
|
" elif et == 7:\n",
|
|
" return 'KILL'\n",
|
|
" elif et == 8:\n",
|
|
" return 'LOST'\n",
|
|
" else:\n",
|
|
" return 'NO_TERM'\n",
|
|
"\n",
|
|
"def create_df(cluster):\n",
|
|
" obj = {}\n",
|
|
"\n",
|
|
" filename = DIR + \"/machine_time_waste/\" + cluster + \"_state_changes.json\"\n",
|
|
"\n",
|
|
" with open(filename, 'r') as f:\n",
|
|
" obj = json.loads(f.read())\n",
|
|
" \n",
|
|
" data = {'Last termination': [], 'time_type': [], 'time_ms': []}\n",
|
|
" totals = 0\n",
|
|
" \n",
|
|
" def add_record(et, tt, time):\n",
|
|
" data['Last termination'].append(to_name(et))\n",
|
|
" data['time_type'].append(tt)\n",
|
|
" data['time_ms'].append(time / 1000)\n",
|
|
"\n",
|
|
" for pair in obj[\"data\"]:\n",
|
|
" qt = et = rt = xt = vt = ut = 0\n",
|
|
"\n",
|
|
" pair[0] = 0 if pair[0] is None else pair[0]\n",
|
|
" \n",
|
|
" # Filter useless terms\n",
|
|
" if pair[0] not in [4,5,6,7]:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" x = pair[1]\n",
|
|
" for k in x.keys():\n",
|
|
" if k in QUEUE:\n",
|
|
" qt += x[k]\n",
|
|
" elif k in ENDED:\n",
|
|
" et += x[k]\n",
|
|
" elif k in READY:\n",
|
|
" rt += x[k]\n",
|
|
" elif k in RUN:\n",
|
|
" xt += x[k]\n",
|
|
" elif k in EVICT:\n",
|
|
" vt += x[k]\n",
|
|
" else:\n",
|
|
" ut += x[k]\n",
|
|
"\n",
|
|
" add_record(pair[0], 'Queue', qt + rt)\n",
|
|
" add_record(pair[0], 'Resubmission', et + vt)\n",
|
|
" add_record(pair[0], 'Running', xt)\n",
|
|
" add_record(pair[0], 'Unknown', ut)\n",
|
|
" totals += (qt + et + rt + xt + vt + ut) / 1000\n",
|
|
" \n",
|
|
" return (pandas.DataFrame(data, columns=['Last termination', 'time_type', 'time_ms']), totals)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Graph 1: Absolute total time spent per status per \"last termination\" type"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def graph_1(df, cluster):\n",
|
|
" #sns.set_theme(style=\"ticks\")\n",
|
|
" g = sns.histplot(df, x=\"Last termination\", weights=\"time_ms\", shrink=.5,\n",
|
|
" hue=\"time_type\", multiple=\"dodge\", discrete=True, legend=False)\n",
|
|
" g.set_yscale(\"log\")\n",
|
|
" g.set_ylabel(\"Total (milliseconds)\") \n",
|
|
" g.set_title(\"Cluster \" + cluster + \": Absolute total time spent per status per \\\"last termination\\\" type\")\n",
|
|
" #g.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Graph 2: Relative total time spent per status per \"last termination\" type\n",
|
|
"\n",
|
|
"Values are proportions on total for each \"last termination\" type"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def graph_2(df, cluster, totals):\n",
|
|
" df = df.copy()\n",
|
|
" for i in [4,5,6,7]:\n",
|
|
" df.loc[df[\"Last termination\"] == to_name(i), \"time_ms\"] = df[\"time_ms\"][df[\"Last termination\"] == to_name(i)] / totals\n",
|
|
"\n",
|
|
" df.rename(columns = {'time_type': 'Execution phase'}, inplace = True)\n",
|
|
" \n",
|
|
" h = sns.histplot(df, x=\"Last termination\", \n",
|
|
" weights=\"time_ms\", shrink=.5, common_bins=True,\n",
|
|
" hue=\"Execution phase\", multiple=\"stack\", discrete=True, legend=True)\n",
|
|
" #ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
|
|
" h.set_title((\"Cluster \" + cluster.upper()) if cluster != \"all\" else \"All clusters\" if cluster == \"all\" else \"2011 traces\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" Last termination time_type time_ms\n",
|
|
"0 EVICT Queue 1.049774e+12\n",
|
|
"1 EVICT Resubmission 5.530617e+08\n",
|
|
"2 EVICT Running 3.218063e+13\n",
|
|
"3 EVICT Unknown 3.383291e+12\n",
|
|
"4 FAIL Queue 9.483261e+11\n",
|
|
"5 FAIL Resubmission 7.150500e+01\n",
|
|
"6 FAIL Running 7.265195e+12\n",
|
|
"7 FAIL Unknown 2.799674e+12\n",
|
|
"8 FINISH Queue 3.317009e+13\n",
|
|
"9 FINISH Resubmission 1.828825e+07\n",
|
|
"10 FINISH Running 3.788436e+13\n",
|
|
"11 FINISH Unknown 2.482661e+13\n",
|
|
"12 KILL Queue 7.482888e+13\n",
|
|
"13 KILL Resubmission 1.211419e+11\n",
|
|
"14 KILL Running 6.311166e+14\n",
|
|
"15 KILL Unknown 1.207792e+15\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"dft = None\n",
|
|
"tts = None\n",
|
|
"\n",
|
|
"for cluster in \"a\":\n",
|
|
" df, totals = create_df(cluster)\n",
|
|
" \n",
|
|
" print(df)\n",
|
|
"\n",
|
|
" #plt.figure(figsize=(10,8))\n",
|
|
" #graph_1(df, cluster)\n",
|
|
" plt.figure(figsize=(4,3))\n",
|
|
" graph_2(df, cluster, totals)\n",
|
|
" plt.savefig('../report/figures/machine_time_waste/cluster_%s.pgf' % cluster)\n",
|
|
" \n",
|
|
" if dft is None:\n",
|
|
" dft = df\n",
|
|
" tts = totals\n",
|
|
" else:\n",
|
|
" dft.loc[:, \"time_ms\"] = dft[\"time_ms\"].add(df[\"time_ms\"], fill_value=0)\n",
|
|
" tts += totals\n",
|
|
"\n",
|
|
"#plt.figure(figsize=(10,8))\n",
|
|
"#graph_1(dft, \"all\")\n",
|
|
"plt.figure(figsize=(4,3))\n",
|
|
"graph_2(dft, \"all\", tts)\n",
|
|
"plt.savefig('../report/figures/machine_time_waste/cluster_all.pgf')\n",
|
|
"\n",
|
|
"d2011 = {'Last termination': [\"EVICT\"] * 4 + [\"FAIL\"] * 4 + [\"FINISH\"] * 4 + [\"KILL\"] * 4,\n",
|
|
" 'time_type': [\"Queue\", \"Resubmission\", \"Running\", \"Unknown\"] * 4,\n",
|
|
" 'time_ms': [2.5, 0., 17.5, 0, 0, 0, 5, 0, 1, 0, 39, 0, 5, 1, 30, 0]}\n",
|
|
"\n",
|
|
"d2011 = pandas.DataFrame(d2011, columns=['Last termination', 'time_type', 'time_ms'])\n",
|
|
"plt.figure(figsize=(4,3))\n",
|
|
"graph_2(d2011, \"2011\", 100)\n",
|
|
"plt.savefig('../report/figures/machine_time_waste/cluster_2011.pgf')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" Last termination time_type time_ms\n",
|
|
"0 EVICT Queue 5.102510e-04\n",
|
|
"1 EVICT Resubmission 2.688201e-07\n",
|
|
"2 EVICT Running 1.564165e-02\n",
|
|
"3 EVICT Unknown 1.644476e-03\n",
|
|
"4 FAIL Queue 4.609415e-04\n",
|
|
"5 FAIL Resubmission 3.475558e-14\n",
|
|
"6 FAIL Running 3.531306e-03\n",
|
|
"7 FAIL Unknown 1.360804e-03\n",
|
|
"8 FINISH Queue 1.612259e-02\n",
|
|
"9 FINISH Resubmission 8.889150e-09\n",
|
|
"10 FINISH Running 1.841400e-02\n",
|
|
"11 FINISH Unknown 1.206717e-02\n",
|
|
"12 KILL Queue 3.637118e-02\n",
|
|
"13 KILL Resubmission 5.888201e-05\n",
|
|
"14 KILL Running 3.067593e-01\n",
|
|
"15 KILL Unknown 5.870572e-01\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"dft[\"time_ms\"] /= tts\n",
|
|
"print(dft)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|