report work
This commit is contained in:
parent
0bd6c475ec
commit
e200cea3ab
5 changed files with 390 additions and 308 deletions
|
@ -1 +0,0 @@
|
|||
,maggicl,Apple2gs.local,24.05.2021 14:04,file:///Users/maggicl/Library/Application%20Support/LibreOffice/4;
|
|
@ -11,7 +11,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -27,12 +27,13 @@
|
|||
" 'text.usetex': True,\n",
|
||||
" 'pgf.rcfonts': False,\n",
|
||||
"})\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"pandas.options.display.float_format = '{:,.3f}'.format"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -53,16 +54,16 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DIR = \"/home/claudio/hdd/git/bachelorThesis\""
|
||||
"DIR = \"/Users/maggicl/git/bachelorThesis\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -138,7 +139,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -163,7 +164,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -174,6 +175,9 @@
|
|||
"\n",
|
||||
" df.rename(columns = {'time_type': 'Execution phase'}, inplace = True)\n",
|
||||
" \n",
|
||||
" print(\"Cluster \"+cluster)\n",
|
||||
" print(df)\n",
|
||||
" \n",
|
||||
" h = sns.histplot(df, x=\"Last termination\", \n",
|
||||
" weights=\"time_ms\", shrink=.5, common_bins=True,\n",
|
||||
" hue=\"Execution phase\", multiple=\"stack\", discrete=True, legend=True)\n",
|
||||
|
@ -191,149 +195,193 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 1.049774e+12\n",
|
||||
"1 EVICT Resubmission 5.530617e+08\n",
|
||||
"2 EVICT Running 3.218063e+13\n",
|
||||
"3 EVICT Unknown 3.383291e+12\n",
|
||||
"4 FAIL Queue 9.483261e+11\n",
|
||||
"5 FAIL Resubmission 7.150500e+01\n",
|
||||
"6 FAIL Running 7.265195e+12\n",
|
||||
"7 FAIL Unknown 2.799674e+12\n",
|
||||
"8 FINISH Queue 3.317009e+13\n",
|
||||
"9 FINISH Resubmission 1.828825e+07\n",
|
||||
"10 FINISH Running 3.788436e+13\n",
|
||||
"11 FINISH Unknown 2.482661e+13\n",
|
||||
"12 KILL Queue 7.482888e+13\n",
|
||||
"13 KILL Resubmission 1.211419e+11\n",
|
||||
"14 KILL Running 6.311166e+14\n",
|
||||
"15 KILL Unknown 1.207792e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 2.991028e+11\n",
|
||||
"1 EVICT Resubmission 1.360657e+09\n",
|
||||
"2 EVICT Running 2.871365e+13\n",
|
||||
"3 EVICT Unknown 1.428912e+13\n",
|
||||
"4 FAIL Queue 9.376134e+10\n",
|
||||
"5 FAIL Resubmission 1.225520e+02\n",
|
||||
"6 FAIL Running 8.338530e+12\n",
|
||||
"7 FAIL Unknown 1.989378e+12\n",
|
||||
"8 FINISH Queue 6.817208e+12\n",
|
||||
"9 FINISH Resubmission 1.493729e+03\n",
|
||||
"10 FINISH Running 8.069421e+13\n",
|
||||
"11 FINISH Unknown 1.006353e+14\n",
|
||||
"12 KILL Queue 5.397953e+13\n",
|
||||
"13 KILL Resubmission 1.842002e+10\n",
|
||||
"14 KILL Running 5.716892e+14\n",
|
||||
"15 KILL Unknown 2.088855e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 3.158380e+11\n",
|
||||
"1 EVICT Resubmission 2.355575e+09\n",
|
||||
"2 EVICT Running 4.229815e+13\n",
|
||||
"3 EVICT Unknown 6.785277e+12\n",
|
||||
"4 FAIL Queue 2.352869e+11\n",
|
||||
"5 FAIL Resubmission 4.684500e+01\n",
|
||||
"6 FAIL Running 9.316941e+12\n",
|
||||
"7 FAIL Unknown 4.873943e+12\n",
|
||||
"8 FINISH Queue 1.172189e+13\n",
|
||||
"9 FINISH Resubmission 3.623451e+03\n",
|
||||
"10 FINISH Running 1.154498e+14\n",
|
||||
"11 FINISH Unknown 4.934279e+13\n",
|
||||
"12 KILL Queue 7.171264e+13\n",
|
||||
"13 KILL Resubmission 2.108520e+11\n",
|
||||
"14 KILL Running 6.180005e+14\n",
|
||||
"15 KILL Unknown 2.088457e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 1.415993e+11\n",
|
||||
"1 EVICT Resubmission 2.835890e+08\n",
|
||||
"2 EVICT Running 4.303187e+13\n",
|
||||
"3 EVICT Unknown 7.410999e+12\n",
|
||||
"4 FAIL Queue 2.231462e+10\n",
|
||||
"5 FAIL Resubmission 1.073960e+02\n",
|
||||
"6 FAIL Running 1.186956e+13\n",
|
||||
"7 FAIL Unknown 2.829927e+12\n",
|
||||
"8 FINISH Queue 4.455665e+12\n",
|
||||
"9 FINISH Resubmission 1.577302e+03\n",
|
||||
"10 FINISH Running 6.516562e+13\n",
|
||||
"11 FINISH Unknown 7.106965e+13\n",
|
||||
"12 KILL Queue 7.435926e+13\n",
|
||||
"13 KILL Resubmission 5.556059e+11\n",
|
||||
"14 KILL Running 4.702722e+14\n",
|
||||
"15 KILL Unknown 2.040366e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 1.722618e+10\n",
|
||||
"1 EVICT Resubmission 1.788932e+09\n",
|
||||
"2 EVICT Running 1.710804e+13\n",
|
||||
"3 EVICT Unknown 7.078678e+12\n",
|
||||
"4 FAIL Queue 2.895755e+09\n",
|
||||
"5 FAIL Resubmission 5.304400e+01\n",
|
||||
"6 FAIL Running 2.281806e+12\n",
|
||||
"7 FAIL Unknown 3.984907e+11\n",
|
||||
"8 FINISH Queue 7.454410e+11\n",
|
||||
"9 FINISH Resubmission 6.310360e+02\n",
|
||||
"10 FINISH Running 4.284518e+13\n",
|
||||
"11 FINISH Unknown 3.672368e+13\n",
|
||||
"12 KILL Queue 1.398332e+14\n",
|
||||
"13 KILL Resubmission 4.825723e+10\n",
|
||||
"14 KILL Running 3.049664e+14\n",
|
||||
"15 KILL Unknown 3.072445e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 3.140594e+10\n",
|
||||
"1 EVICT Resubmission 1.504263e+10\n",
|
||||
"2 EVICT Running 5.070239e+13\n",
|
||||
"3 EVICT Unknown 1.602834e+13\n",
|
||||
"4 FAIL Queue 5.523972e+09\n",
|
||||
"5 FAIL Resubmission 2.352700e+01\n",
|
||||
"6 FAIL Running 3.889624e+12\n",
|
||||
"7 FAIL Unknown 1.833895e+12\n",
|
||||
"8 FINISH Queue 1.098116e+13\n",
|
||||
"9 FINISH Resubmission 6.319590e+02\n",
|
||||
"10 FINISH Running 9.761364e+13\n",
|
||||
"11 FINISH Unknown 9.603417e+13\n",
|
||||
"12 KILL Queue 1.129539e+14\n",
|
||||
"13 KILL Resubmission 1.356476e+11\n",
|
||||
"14 KILL Running 4.505937e+14\n",
|
||||
"15 KILL Unknown 2.669451e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 9.528645e+10\n",
|
||||
"1 EVICT Resubmission 1.493116e+09\n",
|
||||
"2 EVICT Running 8.513084e+12\n",
|
||||
"3 EVICT Unknown 2.778074e+12\n",
|
||||
"4 FAIL Queue 2.887122e+11\n",
|
||||
"5 FAIL Resubmission 1.757300e+01\n",
|
||||
"6 FAIL Running 1.867799e+12\n",
|
||||
"7 FAIL Unknown 6.622832e+11\n",
|
||||
"8 FINISH Queue 8.337090e+11\n",
|
||||
"9 FINISH Resubmission 6.753141e+07\n",
|
||||
"10 FINISH Running 3.514254e+13\n",
|
||||
"11 FINISH Unknown 6.704536e+13\n",
|
||||
"12 KILL Queue 1.152843e+14\n",
|
||||
"13 KILL Resubmission 5.814544e+10\n",
|
||||
"14 KILL Running 2.225128e+14\n",
|
||||
"15 KILL Unknown 3.894626e+15\n",
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 4.621613e+10\n",
|
||||
"1 EVICT Resubmission 4.511340e+02\n",
|
||||
"2 EVICT Running 2.786346e+13\n",
|
||||
"3 EVICT Unknown 9.513981e+12\n",
|
||||
"4 FAIL Queue 7.828423e+09\n",
|
||||
"5 FAIL Resubmission 1.148130e+02\n",
|
||||
"6 FAIL Running 3.509052e+12\n",
|
||||
"7 FAIL Unknown 1.212378e+12\n",
|
||||
"8 FINISH Queue 9.252380e+12\n",
|
||||
"9 FINISH Resubmission 1.675400e+02\n",
|
||||
"10 FINISH Running 7.635478e+13\n",
|
||||
"11 FINISH Unknown 5.980213e+13\n",
|
||||
"12 KILL Queue 1.543895e+14\n",
|
||||
"13 KILL Resubmission 3.419664e+09\n",
|
||||
"14 KILL Running 3.838571e+14\n",
|
||||
"15 KILL Unknown 4.039843e+15\n"
|
||||
"Cluster a\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.051\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 1.564\n",
|
||||
"3 EVICT Unknown 0.164\n",
|
||||
"4 FAIL Queue 0.046\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.353\n",
|
||||
"7 FAIL Unknown 0.136\n",
|
||||
"8 FINISH Queue 1.612\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 1.841\n",
|
||||
"11 FINISH Unknown 1.207\n",
|
||||
"12 KILL Queue 3.637\n",
|
||||
"13 KILL Resubmission 0.006\n",
|
||||
"14 KILL Running 30.676\n",
|
||||
"15 KILL Unknown 58.706\n",
|
||||
"Cluster b\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.010\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.971\n",
|
||||
"3 EVICT Unknown 0.483\n",
|
||||
"4 FAIL Queue 0.003\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.282\n",
|
||||
"7 FAIL Unknown 0.067\n",
|
||||
"8 FINISH Queue 0.231\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 2.729\n",
|
||||
"11 FINISH Unknown 3.404\n",
|
||||
"12 KILL Queue 1.826\n",
|
||||
"13 KILL Resubmission 0.001\n",
|
||||
"14 KILL Running 19.337\n",
|
||||
"15 KILL Unknown 70.655\n",
|
||||
"Cluster c\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.010\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 1.401\n",
|
||||
"3 EVICT Unknown 0.225\n",
|
||||
"4 FAIL Queue 0.008\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.309\n",
|
||||
"7 FAIL Unknown 0.161\n",
|
||||
"8 FINISH Queue 0.388\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 3.824\n",
|
||||
"11 FINISH Unknown 1.635\n",
|
||||
"12 KILL Queue 2.376\n",
|
||||
"13 KILL Resubmission 0.007\n",
|
||||
"14 KILL Running 20.472\n",
|
||||
"15 KILL Unknown 69.183\n",
|
||||
"Cluster d\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.005\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 1.542\n",
|
||||
"3 EVICT Unknown 0.265\n",
|
||||
"4 FAIL Queue 0.001\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.425\n",
|
||||
"7 FAIL Unknown 0.101\n",
|
||||
"8 FINISH Queue 0.160\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 2.334\n",
|
||||
"11 FINISH Unknown 2.546\n",
|
||||
"12 KILL Queue 2.664\n",
|
||||
"13 KILL Resubmission 0.020\n",
|
||||
"14 KILL Running 16.846\n",
|
||||
"15 KILL Unknown 73.091\n",
|
||||
"Cluster e\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.000\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.472\n",
|
||||
"3 EVICT Unknown 0.195\n",
|
||||
"4 FAIL Queue 0.000\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.063\n",
|
||||
"7 FAIL Unknown 0.011\n",
|
||||
"8 FINISH Queue 0.021\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 1.182\n",
|
||||
"11 FINISH Unknown 1.013\n",
|
||||
"12 KILL Queue 3.858\n",
|
||||
"13 KILL Resubmission 0.001\n",
|
||||
"14 KILL Running 8.414\n",
|
||||
"15 KILL Unknown 84.769\n",
|
||||
"Cluster f\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.001\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 1.444\n",
|
||||
"3 EVICT Unknown 0.457\n",
|
||||
"4 FAIL Queue 0.000\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.111\n",
|
||||
"7 FAIL Unknown 0.052\n",
|
||||
"8 FINISH Queue 0.313\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 2.781\n",
|
||||
"11 FINISH Unknown 2.736\n",
|
||||
"12 KILL Queue 3.218\n",
|
||||
"13 KILL Resubmission 0.004\n",
|
||||
"14 KILL Running 12.836\n",
|
||||
"15 KILL Unknown 76.047\n",
|
||||
"Cluster g\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.002\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.196\n",
|
||||
"3 EVICT Unknown 0.064\n",
|
||||
"4 FAIL Queue 0.007\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.043\n",
|
||||
"7 FAIL Unknown 0.015\n",
|
||||
"8 FINISH Queue 0.019\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 0.808\n",
|
||||
"11 FINISH Unknown 1.541\n",
|
||||
"12 KILL Queue 2.650\n",
|
||||
"13 KILL Resubmission 0.001\n",
|
||||
"14 KILL Running 5.116\n",
|
||||
"15 KILL Unknown 89.538\n",
|
||||
"Cluster h\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.001\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.585\n",
|
||||
"3 EVICT Unknown 0.200\n",
|
||||
"4 FAIL Queue 0.000\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.074\n",
|
||||
"7 FAIL Unknown 0.025\n",
|
||||
"8 FINISH Queue 0.194\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 1.602\n",
|
||||
"11 FINISH Unknown 1.255\n",
|
||||
"12 KILL Queue 3.240\n",
|
||||
"13 KILL Resubmission 0.000\n",
|
||||
"14 KILL Running 8.055\n",
|
||||
"15 KILL Unknown 84.770\n",
|
||||
"Cluster all\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 0.007\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.925\n",
|
||||
"3 EVICT Unknown 0.248\n",
|
||||
"4 FAIL Queue 0.006\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.179\n",
|
||||
"7 FAIL Unknown 0.061\n",
|
||||
"8 FINISH Queue 0.288\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 2.036\n",
|
||||
"11 FINISH Unknown 1.867\n",
|
||||
"12 KILL Queue 2.945\n",
|
||||
"13 KILL Resubmission 0.004\n",
|
||||
"14 KILL Running 13.493\n",
|
||||
"15 KILL Unknown 77.941\n",
|
||||
"Cluster 2011\n",
|
||||
" Last termination Execution phase time_ms\n",
|
||||
"0 EVICT Queue 2.500\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 17.500\n",
|
||||
"3 EVICT Unknown 0.000\n",
|
||||
"4 FAIL Queue 0.000\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 5.000\n",
|
||||
"7 FAIL Unknown 0.000\n",
|
||||
"8 FINISH Queue 1.000\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 39.000\n",
|
||||
"11 FINISH Unknown 0.000\n",
|
||||
"12 KILL Queue 5.000\n",
|
||||
"13 KILL Resubmission 1.000\n",
|
||||
"14 KILL Running 30.000\n",
|
||||
"15 KILL Unknown 0.000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -343,8 +391,6 @@
|
|||
"\n",
|
||||
"for cluster in \"abcdefgh\":\n",
|
||||
" df, totals = create_df(cluster)\n",
|
||||
" \n",
|
||||
" print(df)\n",
|
||||
"\n",
|
||||
" #plt.figure(figsize=(10,8))\n",
|
||||
" #graph_1(df, cluster)\n",
|
||||
|
@ -377,7 +423,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -385,22 +431,22 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
" Last termination time_type time_ms\n",
|
||||
"0 EVICT Queue 7.373993e-05\n",
|
||||
"1 EVICT Resubmission 8.449953e-07\n",
|
||||
"2 EVICT Running 9.249078e-03\n",
|
||||
"3 EVICT Unknown 2.484572e-03\n",
|
||||
"4 FAIL Queue 5.926861e-05\n",
|
||||
"5 FAIL Resubmission 2.058252e-14\n",
|
||||
"6 FAIL Running 1.785410e-03\n",
|
||||
"7 FAIL Unknown 6.131290e-04\n",
|
||||
"8 FINISH Queue 2.880144e-03\n",
|
||||
"9 FINISH Resubmission 3.170097e-09\n",
|
||||
"10 FINISH Running 2.035703e-02\n",
|
||||
"11 FINISH Unknown 1.867017e-02\n",
|
||||
"12 KILL Queue 2.945024e-02\n",
|
||||
"13 KILL Resubmission 4.253091e-05\n",
|
||||
"14 KILL Running 1.349259e-01\n",
|
||||
"15 KILL Unknown 7.794080e-01\n"
|
||||
"0 EVICT Queue 0.000\n",
|
||||
"1 EVICT Resubmission 0.000\n",
|
||||
"2 EVICT Running 0.009\n",
|
||||
"3 EVICT Unknown 0.002\n",
|
||||
"4 FAIL Queue 0.000\n",
|
||||
"5 FAIL Resubmission 0.000\n",
|
||||
"6 FAIL Running 0.002\n",
|
||||
"7 FAIL Unknown 0.001\n",
|
||||
"8 FINISH Queue 0.003\n",
|
||||
"9 FINISH Resubmission 0.000\n",
|
||||
"10 FINISH Running 0.020\n",
|
||||
"11 FINISH Unknown 0.019\n",
|
||||
"12 KILL Queue 0.029\n",
|
||||
"13 KILL Resubmission 0.000\n",
|
||||
"14 KILL Running 0.135\n",
|
||||
"15 KILL Unknown 0.779\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
Binary file not shown.
|
@ -82,6 +82,15 @@ and stored in JSONL format)\cite{google-drive-marso}, requiring a considerable
|
|||
amount of computational power to analyze them and the implementation of special
|
||||
data engineering techniques for analysis of the data.
|
||||
|
||||
\input{figures/machine_configs}
|
||||
|
||||
An overview of the machine configurations in the cluster analyzed with the 2011
|
||||
traces and in the 8 clusters composing the 2019 traces can be found in
|
||||
figure~\ref{fig:machineconfigs}. Additionally, in
|
||||
figure~\ref{fig:machineconfigs-csts}, the same machine configuration data is
|
||||
provided for the 2019 traces providing a cluster-by-cluster distribution of the
|
||||
machines.
|
||||
|
||||
This project aims to repeat the analysis performed in 2015 to highlight
|
||||
similarities and differences in workload this decade brought, and expanding the
|
||||
old analysis to understand even better the causes of failures and how to prevent
|
||||
|
@ -441,102 +450,133 @@ deltas. Finally, the mean of the computed slowdown values is computed resulting
|
|||
in the clear and coincise tables found in figure~\ref{fig:taskslowdown}.
|
||||
|
||||
|
||||
\hypertarget{ad-hoc-presentation-of-some-analysis-scripts}{%
|
||||
\subsection{Ad-Hoc presentation of some analysis
|
||||
scripts}\label{ad-hoc-presentation-of-some-analysis-scripts}}
|
||||
|
||||
\textbf{TBD} (with diagrams)
|
||||
|
||||
\hypertarget{analysis-and-observations}{%
|
||||
\section{Analysis and observations}\label{analysis-and-observations}}
|
||||
|
||||
\hypertarget{overview-of-machine-configurations-in-each-cluster}{%
|
||||
\subsection{Overview of machine configurations in each
|
||||
cluster}\label{overview-of-machine-configurations-in-each-cluster}}
|
||||
|
||||
\input{figures/machine_configs}
|
||||
|
||||
Refer to figure \ref{fig:machineconfigs}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
machine configurations are definitely more varied than the ones in the
|
||||
2011 traces
|
||||
\item
|
||||
some clusters have more machine variability
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{analysis-of-execution-time-per-each-execution-phase}{%
|
||||
\subsection{Analysis of execution time per each execution
|
||||
phase}\label{analysis-of-execution-time-per-each-execution-phase}}
|
||||
|
||||
\section{Analysis: Performance Input of Unsuccessful Executions}
|
||||
\input{figures/machine_time_waste}
|
||||
|
||||
Refer to figures \ref{fig:machinetimewaste-abs} and
|
||||
\ref{fig:machinetimewaste-rel}.
|
||||
Our first investigation focuses on replicating the methodologies used in the
|
||||
2015 DSN Ros\'a et al.\ paper\cite{vino-paper} regarding usage of machine time
|
||||
and resources.
|
||||
|
||||
\textbf{Observations}:
|
||||
In this section we perform several analyses focusing on how machine time and
|
||||
resources are wasted, by means of a temporal vs. spatial resource analysis from
|
||||
the perspective of single tasks as well as jobs. We then compare the results
|
||||
from the 2019 traces to the ones that were obtained in 2015 to understand the
|
||||
workload evolution inside Borg between 2011 and 2019.
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Across all cluster almost 50\% of time is spent in ``unknown''
|
||||
transitions, i.e. there are some time slices that are related to a
|
||||
state transition that Google says are not ``typical'' transitions.
|
||||
This is mostly due to the trace log being intermittent when recording
|
||||
all state transitions.
|
||||
\item
|
||||
80\% of the time spent in KILL and LOST is unknown. This is
|
||||
predictable, since both states indicate that the job execution is not
|
||||
stable (in particular LOST is used when the state logging itself is
|
||||
unstable)
|
||||
\item
|
||||
From the absolute graph we see that the time ``wasted'' on non-finish
|
||||
terminated jobs is very significant
|
||||
\item
|
||||
Execution is the most significant task phase, followed by queuing time
|
||||
and scheduling time (``ready'' state)
|
||||
\item
|
||||
In the absolute graph we see that a significant amount of time is
|
||||
spent to re-schedule evicted jobs (``evicted'' state)
|
||||
\item
|
||||
Cluster A has unusually high queuing times
|
||||
\end{itemize}
|
||||
\subsection{Temporal Impact: Machine Time Waste}
|
||||
|
||||
\hypertarget{task-slowdown}{%
|
||||
\subsection{Task slowdown}\label{task-slowdown}}
|
||||
This analysis explores how machine time is distributed over task events and
|
||||
submissions. By partitioning the collection of all terminating tasks by their
|
||||
termination event, the analysis aims to measure the total time spent by tasks in
|
||||
3 different execution phases:
|
||||
|
||||
\begin{description}
|
||||
\item[resubmission time:] the total of all time deltas between every task
|
||||
termination event and the immediately succeding task submission event, i.e.
|
||||
the total time spent by tasks waiting to be resubmitted in Borg after a
|
||||
termination;
|
||||
\item[queue time:] the total of all time deltas between every task submission
|
||||
event and the following task scheduling event, i.e. the total time spent by
|
||||
tasks queuing before execution;
|
||||
\item[running time:] the total of all time deltas between every task scheduling
|
||||
event and the following task termination event, i.e. the total time spent by
|
||||
tasks ``executing'' (i.e. performing useful computations) in the clusters.
|
||||
\end{description}
|
||||
|
||||
In the 2019 traces, an additional ``Unknown'' measure is counted. This measure
|
||||
collects all the times in which the event transitions between the register
|
||||
events do not allow to safely assume in which execution phase a task may be.
|
||||
Unknown measures are mostly caused by faults and missed event writes in the task
|
||||
event log that was used to generate the traces.
|
||||
|
||||
The analysis results are depicted in figure~\ref{fig:machinetimewaste-rel} as a
|
||||
comparison between the 2011 and 2019 traces, aggregating the data from all
|
||||
clusters. Additionally, in figure~\ref{fig:machinetimewaste-rel-csts}
|
||||
cluster-by-cluster breakdown result is provided for the 2019 traces.
|
||||
|
||||
The striking difference between 2011 and 2019 data is in the machine time
|
||||
distribution per task termination type. In the 2019 traces, 94.38\% of global
|
||||
machine time is spent on tasks that are eventually \texttt{KILL}ed.
|
||||
\texttt{FINISH}, \texttt{EVICT} and \texttt{FAIL} tasks respectively register
|
||||
totals of 4.20\%, 1.18\% and 0.25\% machine time, maintaining a analogous
|
||||
distribution between them to their distribution in the 2011 traces.
|
||||
|
||||
Considering instead the distribution between execution phase times, the
|
||||
comparison shows very similar behaviour between the two traces, having the
|
||||
``Running'' time being dominant (at a total of 16.63\% across task terminations
|
||||
in 2019) over the queue and resubmission phases (with respective totals in 2019
|
||||
of 3.26\% and 0.004\%).
|
||||
|
||||
However, another noteworthy difference between 2011 and 2019 data lies in the new
|
||||
``Unknown'' trace dataset present only in the latter traces, registering a total
|
||||
80.12\% of global machine time across al terminations. This data can be
|
||||
interpreted as a strong indication of the ``poor quality'' of the 2019 traces
|
||||
w.r.t.\ of accuracy of task event logging.
|
||||
|
||||
Considering instead the behaviour of each single cluster in the 2019 traces, no
|
||||
significant difference beween them can be observed. The only notable difference
|
||||
lies between the ``Running time``-``Unknown time'' ratio in \texttt{KILL}ed
|
||||
tasks, which is at its highest in cluster A (at 30.78\% by 58.71\% of global
|
||||
machine time) and at its lowest in cluster H (at 8.06\% by 84.77\% of global
|
||||
machine time).
|
||||
|
||||
\subsection{Average Slowdown per Task}
|
||||
\input{figures/task_slowdown}
|
||||
|
||||
Refer to figure \ref{fig:taskslowdown}
|
||||
This analysis aims to measure the figure of ``slowdown'', which is defined as
|
||||
the ratio between the response time (i.e\. queue time and running time) of the
|
||||
last execution of a given task and the total response time across all
|
||||
executions of said task. This metric is especially useful to analyze the impact
|
||||
of unsuccesful executions on each task total execution time w.r.t.\ the intrinsic
|
||||
workload (i.e.\ computational time) of tasks.
|
||||
|
||||
\textbf{Observations}:
|
||||
Refer to figure~\ref{fig:taskslowdown} for a comparison between the 2011 and
|
||||
2019 mean task slowdown measures broke down by task priority. Additionally, said
|
||||
means are computed on a cluster-by-cluster basis for 2019 data in
|
||||
figure~\ref{fig:taskslowdown-csts}.
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Priority values are different from 0-11 values in the 2011 traces. A
|
||||
conversion table is provided by Google;
|
||||
\item
|
||||
For some priorities (e.g.~101 for cluster D) the relative number of
|
||||
finishing task is very low and the mean slowdown is very high (315).
|
||||
This behaviour differs from the relatively homogeneous values from the
|
||||
2011 traces.
|
||||
\item
|
||||
Some slowdown values cannot be computed since either some tasks have a
|
||||
0ns execution time or for some priorities no tasks in the traces
|
||||
terminate successfully. More raw data on those exception is in
|
||||
Jupyter.
|
||||
\item
|
||||
In 2015 Ros\'a et al.\cite{vino-paper} measured mean task slowdown per each task
|
||||
priority value, which at the time were $[0,11]$ numeric values. However,
|
||||
in 2019 traces, task priorities are given as a $[0,500]$ numeric value.
|
||||
Therefore, to allow for an easier comparison, mean task slowdown values are
|
||||
computed by task priority tier over the 2019 data. Priority tiers are
|
||||
semantically relevant priority ranges defined in the Tirmazi et al.
|
||||
2020\cite{google-marso-19} that introduced the 2019 traces. Equivalent priority
|
||||
tiers are also provided next to the 2011 priority values in the table covering
|
||||
the 2015 analysis.
|
||||
|
||||
In the given tables, the \textbf{\% finished} column corresponds to the
|
||||
percentage of \texttt{FINISH}ed tasks for that priority or tier. \textbf{Mean
|
||||
response [s] (last execution)} instead shows the mean response time of the last
|
||||
task execution of each task in that priority/tier.
|
||||
\textbf{Mean response [s] (all executions)} provides a very similar figure,
|
||||
though this column shows the mean response time across all executions.
|
||||
\textbf{Mean slowdown} instead provides the mean slowdown value for each task
|
||||
priority/tier.
|
||||
|
||||
Comparing the tables in figure~\ref{fig:taskslowdown} we observe that the
|
||||
maximum mean slowdown measure for 2019 data (i.e.\ 7.84, for the BEB tier) is almost
|
||||
double of the maximum measure in 2011 data (i.e.\ 3.39, for priority $3$
|
||||
corresponding to the BEB tier). The ``Best effort batch'' tier, as the name
|
||||
suggest, is a lower priority tier where failures are more tolerated. Therefore,
|
||||
due to the increased concurrency in the 2019 clusters compared to 2011 and the
|
||||
higher machine time spent for unsuccesful executions (as observed in the
|
||||
previous analysis) and increase slowdown rate for this class is not particularly
|
||||
surprising.
|
||||
|
||||
\textbf{TBD}
|
||||
The \% of finishing jobs is relatively low comparing with the 2011
|
||||
traces.
|
||||
\end{itemize}
|
||||
|
||||
\input{figures/spatial_resource_waste}
|
||||
\input{figures/table_iii} % has table III and table IV in it
|
||||
\input{figures/figure_5}
|
||||
|
||||
\hypertarget{reserved-and-actual-resource-usage-of-tasks}{%
|
||||
\subsection{Reserved and actual resource usage of
|
||||
tasks}\label{reserved-and-actual-resource-usage-of-tasks}}
|
||||
|
||||
\input{figures/spatial_resource_waste}
|
||||
|
||||
Refer to figures \ref{fig:spatialresourcewaste-actual} and
|
||||
\ref{fig:spatialresourcewaste-requested}.
|
||||
|
@ -553,6 +593,63 @@ Refer to figures \ref{fig:spatialresourcewaste-actual} and
|
|||
both CPU and RAM
|
||||
\end{itemize}
|
||||
|
||||
|
||||
Refer to figure \ref{fig:tableIII}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
The mean number of events per task is an order of magnitude higher
|
||||
than in the 2011 traces
|
||||
\item
|
||||
Generally speaking, the event type with higher mean is the termination
|
||||
event for the task
|
||||
\item
|
||||
The \# evts mean is higher than the sum of all other event type means,
|
||||
since it appears there are a lot more non-termination events in the
|
||||
2019 traces.
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{mean-number-of-tasks-and-event-distribution-per-job-type}{%
|
||||
\subsection{Mean number of tasks and event distribution per job
|
||||
type}\label{mean-number-of-tasks-and-event-distribution-per-job-type}}
|
||||
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Again the mean number of tasks is significantly higher than the 2011
|
||||
traces, indicating a higher complexity of workloads
|
||||
\item
|
||||
Cluster A has no evicted jobs
|
||||
\item
|
||||
The number of events is however lower than the event means in the 2011
|
||||
traces
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{probability-of-task-successful-termination-given-its-unsuccesful-events}{%
|
||||
\subsection{Probability of task successful termination given its
|
||||
unsuccesful
|
||||
events}\label{probability-of-task-successful-termination-given-its-unsuccesful-events}}
|
||||
|
||||
|
||||
Refer to figure \ref{fig:figureV}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Behaviour is very different from cluster to cluster
|
||||
\item
|
||||
There is no easy conclusion, unlike in 2011, on the correlation
|
||||
between succesful probability and \# of events of a specific type.
|
||||
\item
|
||||
Clusters B, C and D in particular have very unsmooth lines that vary a
|
||||
lot for small \# evts differences. This may be due to an uneven
|
||||
distribution of \# evts in the traces.
|
||||
\end{itemize}
|
||||
\hypertarget{correlation-between-task-events-metadata-and-task-termination}{%
|
||||
\subsection{Correlation between task events' metadata and task
|
||||
termination}\label{correlation-between-task-events-metadata-and-task-termination}}
|
||||
|
@ -620,66 +717,6 @@ Refer to figures \ref{fig:figureIX-a}, \ref{fig:figureIX-b}, and
|
|||
\subsection{Mean number of tasks and event distribution per task
|
||||
type}\label{mean-number-of-tasks-and-event-distribution-per-task-type}}
|
||||
|
||||
\input{figures/table_iii}
|
||||
|
||||
Refer to figure \ref{fig:tableIII}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
The mean number of events per task is an order of magnitude higher
|
||||
than in the 2011 traces
|
||||
\item
|
||||
Generally speaking, the event type with higher mean is the termination
|
||||
event for the task
|
||||
\item
|
||||
The \# evts mean is higher than the sum of all other event type means,
|
||||
since it appears there are a lot more non-termination events in the
|
||||
2019 traces.
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{mean-number-of-tasks-and-event-distribution-per-job-type}{%
|
||||
\subsection{Mean number of tasks and event distribution per job
|
||||
type}\label{mean-number-of-tasks-and-event-distribution-per-job-type}}
|
||||
|
||||
Refer to figure \ref{fig:tableIV}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Again the mean number of tasks is significantly higher than the 2011
|
||||
traces, indicating a higher complexity of workloads
|
||||
\item
|
||||
Cluster A has no evicted jobs
|
||||
\item
|
||||
The number of events is however lower than the event means in the 2011
|
||||
traces
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{probability-of-task-successful-termination-given-its-unsuccesful-events}{%
|
||||
\subsection{Probability of task successful termination given its
|
||||
unsuccesful
|
||||
events}\label{probability-of-task-successful-termination-given-its-unsuccesful-events}}
|
||||
|
||||
\input{figures/figure_5}
|
||||
|
||||
Refer to figure \ref{fig:figureV}.
|
||||
|
||||
\textbf{Observations}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Behaviour is very different from cluster to cluster
|
||||
\item
|
||||
There is no easy conclusion, unlike in 2011, on the correlation
|
||||
between succesful probability and \# of events of a specific type.
|
||||
\item
|
||||
Clusters B, C and D in particular have very unsmooth lines that vary a
|
||||
lot for small \# evts differences. This may be due to an uneven
|
||||
distribution of \# evts in the traces.
|
||||
\end{itemize}
|
||||
|
||||
\hypertarget{potential-causes-of-unsuccesful-executions}{%
|
||||
\subsection{Potential causes of unsuccesful
|
||||
|
|
|
@ -6,13 +6,13 @@
|
|||
%\hfill
|
||||
\end{subfigure}}
|
||||
|
||||
\begin{figure}
|
||||
\begin{figure}[p]
|
||||
\spatialresourcewaste[0.5\textwidth]{used-2011}
|
||||
\spatialresourcewaste[0.5\textwidth]{used-all}
|
||||
\caption{Percentages of CPU and RAM resources used by tasks w.r.t. task termination type in 2011 and 2019 traces (total of clusters A to D). The x axis is the type of resource, y-axis is the percentage of resource used and color represents task termination. Numeric values are displayed below the graph as a table.}\label{fig:spatialresourcewaste-requested}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\begin{figure}[p]
|
||||
\spatialresourcewaste{used-a}
|
||||
\spatialresourcewaste{used-b}
|
||||
\spatialresourcewaste{used-c}
|
||||
|
@ -20,13 +20,13 @@
|
|||
\caption{Percentages of CPU and RAM resources used by tasks w.r.t. task termination type for clusters A to D in 2019 traces. Refer to figure~\ref{fig:spatialresourcewaste-requested} for plot explaination.}\label{fig:spatialresourcewaste-actual-csts}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\begin{figure}[p]
|
||||
\spatialresourcewaste[0.5\textwidth]{requested-2011}
|
||||
\spatialresourcewaste[0.5\textwidth]{requested-all}
|
||||
\caption{Percentages of CPU and RAM resources requested by tasks w.r.t. task termination type in 2011 and 2019 traces. The x axis is the type of resource, y-axis is the percentage of resource used and color represents task termination. Numeric values are displayed below the graph as a table.}\label{fig:spatialresourcewaste-actual}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\begin{figure}[p]
|
||||
\spatialresourcewaste{requested-a}
|
||||
\spatialresourcewaste{requested-b}
|
||||
\spatialresourcewaste{requested-c}
|
||||
|
|
Loading…
Reference in a new issue