diff --git a/machine_configs/machine_configs-Copy1.ipynb b/machine_configs/machine_configs-Copy1.ipynb new file mode 100644 index 00000000..5882ecc2 --- /dev/null +++ b/machine_configs/machine_configs-Copy1.ipynb @@ -0,0 +1,1585 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine configurations\n", + "\n", + "This query returns all the distinct NCU/NMU configurations in the borg clusters, including how many machines ids match for any specific configuration.\n", + "\n", + "Please note that for simplicity's sake the we are technically counting the number of ADD or UPDATE events for each configuration, and not the actual count of machines. Therefore a machine configuration may change over time and count twice or more." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# For pretty printing\n", + "from IPython.display import display\n", + "\n", + "# Disables row ellipsis\n", + "pd.set_option('display.max_rows', 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load all machine event rows in a single DataFrame, and add a \"cluster\" column to differentiate\n", + "# between clusters\n", + "df = None\n", + "for l in \"abcdefgh\":\n", + " dfl = pd.read_csv(\"~/google_2019/machine_events/\" + l + \"_machine_events.csv\")\n", + " dfl[\"cluster\"] = l\n", + " if df is None:\n", + " df = dfl\n", + " else:\n", + " df = pd.concat([df, dfl], axis=0)\n", + "\n", + "# Filter only ADD or UPDATE events\n", + "df = df[(df.type==1)|(df.type==3)]\n", + "\n", + "# P.S: ADD=1, REMOVE=2, UPDATE=3\n", + " \n", + "df = df[[\"capacity.cpus\", \"capacity.memory\", \"cluster\", \n", + " \"missing_data_reason\", \"machine_id\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
capacity.cpuscapacity.memoryclustermachine_id
missing_data_reason
NaN523781523781532510532510
\n", + "
" + ], + "text/plain": [ + " capacity.cpus capacity.memory cluster machine_id\n", + "missing_data_reason \n", + "NaN 523781 523781 532510 532510" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking if we need to deal with particular missing data\n", + "# No columns returned, so missing data can be safely ignored\n", + "df.groupby(by=[\"missing_data_reason\"], dropna=False).count()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def do_group_by(df):\n", + " # Exclude \"cluster\" column and perform group-by\n", + " dfg = df[df.columns.difference(['cluster'])]. \\\n", + " groupby(by=[\"capacity.cpus\",\"capacity.memory\"], \n", + " dropna=False).count()\n", + " \n", + " # Compute relative number of machines\n", + " total_machines = dfg['machine_id'].sum()\n", + " dfg[\"machine_id_perc\"] = dfg[\"machine_id\"] * 100 / total_machines\n", + " \n", + " # Sort descending\n", + " dfg = dfg.sort_values(\"machine_id_perc\", ascending=False)\n", + " \n", + " display(dfg)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster a:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962948734.758469
1.0000000.5000001344015.842705
0.7089840.3334961249514.728764
0.3867190.333496905710.676144
0.16674852656.206238
0.7089840.66699246085.431784
1.0000001.00000044465.240823
0.5917970.16674824842.928071
NaNNaN13771.623170
0.9589840.50000011431.347337
1.0000006540.770917
1.0000000.2500003660.431431
0.4794920.25000060.007073
0.7089840.25000060.007073
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 29487 34.758469\n", + "1.000000 0.500000 13440 15.842705\n", + "0.708984 0.333496 12495 14.728764\n", + "0.386719 0.333496 9057 10.676144\n", + " 0.166748 5265 6.206238\n", + "0.708984 0.666992 4608 5.431784\n", + "1.000000 1.000000 4446 5.240823\n", + "0.591797 0.166748 2484 2.928071\n", + "NaN NaN 1377 1.623170\n", + "0.958984 0.500000 1143 1.347337\n", + " 1.000000 654 0.770917\n", + "1.000000 0.250000 366 0.431431\n", + "0.479492 0.250000 6 0.007073\n", + "0.708984 0.250000 6 0.007073" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster b:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334961618431.982926
1.0000000.500000979019.347061
0.7089840.333496844816.694992
0.9589840.500000550210.873088
0.7089840.66699238327.572823
1.0000001.00000022144.375321
0.5917970.16674821524.252796
0.3867190.3334968161.612584
0.9589841.0000006181.221296
0.5917970.6669925000.988103
0.3867190.1667484120.814197
NaNNaN1340.264812
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 16184 31.982926\n", + "1.000000 0.500000 9790 19.347061\n", + "0.708984 0.333496 8448 16.694992\n", + "0.958984 0.500000 5502 10.873088\n", + "0.708984 0.666992 3832 7.572823\n", + "1.000000 1.000000 2214 4.375321\n", + "0.591797 0.166748 2152 4.252796\n", + "0.386719 0.333496 816 1.612584\n", + "0.958984 1.000000 618 1.221296\n", + "0.591797 0.666992 500 0.988103\n", + "0.386719 0.166748 412 0.814197\n", + "NaN NaN 134 0.264812" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster c:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481575424.439204
0.3867190.3334961110417.225652
0.5917970.3334961040416.139741
0.9589840.500000663410.291334
1.0000000.50000056548.771059
0.3867190.16674835805.553660
0.7089840.66699229004.498774
1.0000001.00000027364.244361
0.25000021323.307375
NaNNaN14662.274208
0.9589841.0000007661.188297
0.7089840.3334966200.961807
0.9589840.2500006000.930781
0.5917970.1667481120.173746
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15754 24.439204\n", + "0.386719 0.333496 11104 17.225652\n", + "0.591797 0.333496 10404 16.139741\n", + "0.958984 0.500000 6634 10.291334\n", + "1.000000 0.500000 5654 8.771059\n", + "0.386719 0.166748 3580 5.553660\n", + "0.708984 0.666992 2900 4.498774\n", + "1.000000 1.000000 2736 4.244361\n", + " 0.250000 2132 3.307375\n", + "NaN NaN 1466 2.274208\n", + "0.958984 1.000000 766 1.188297\n", + "0.708984 0.333496 620 0.961807\n", + "0.958984 0.250000 600 0.930781\n", + "0.591797 0.166748 112 0.173746" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster d:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962839445.288376
0.3867190.333496840213.401174
0.2592770.166748802012.791885
0.3867190.16674858069.260559
0.7089840.66699243806.986092
0.33349639246.258772
0.5917970.16674825484.064055
NaNNaN4980.794309
0.2592770.3334964260.679469
1.0000000.5000002920.465739
0.5917970.25000040.006380
0.7089840.50000020.003190
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 28394 45.288376\n", + "0.386719 0.333496 8402 13.401174\n", + "0.259277 0.166748 8020 12.791885\n", + "0.386719 0.166748 5806 9.260559\n", + "0.708984 0.666992 4380 6.986092\n", + " 0.333496 3924 6.258772\n", + "0.591797 0.166748 2548 4.064055\n", + "NaN NaN 498 0.794309\n", + "0.259277 0.333496 426 0.679469\n", + "1.000000 0.500000 292 0.465739\n", + "0.591797 0.250000 4 0.006380\n", + "0.708984 0.500000 2 0.003190" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster e:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667483845248.202377
0.7089840.3334961178614.774608
0.9589840.500000864610.838389
0.7089840.66699276069.534674
1.0000000.50000055867.002457
0.3867190.16674844705.603470
0.2592770.33349612681.589530
0.0833746340.794765
NaNNaN5360.671915
0.5917970.3334963240.406158
1.0000000.2500002680.335957
1.0000001380.172993
0.5000000.062500540.067693
0.25000040.005014
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 38452 48.202377\n", + "0.708984 0.333496 11786 14.774608\n", + "0.958984 0.500000 8646 10.838389\n", + "0.708984 0.666992 7606 9.534674\n", + "1.000000 0.500000 5586 7.002457\n", + "0.386719 0.166748 4470 5.603470\n", + "0.259277 0.333496 1268 1.589530\n", + " 0.083374 634 0.794765\n", + "NaN NaN 536 0.671915\n", + "0.591797 0.333496 324 0.406158\n", + "1.000000 0.250000 268 0.335957\n", + " 1.000000 138 0.172993\n", + "0.500000 0.062500 54 0.067693\n", + " 0.250000 4 0.005014" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster f:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000004134066.396839
0.7089840.333496687811.046866
0.5917970.33349655648.936430
0.9589840.50000021723.488484
0.3867190.16674815442.479843
NaNNaN14322.299958
0.7089840.66699212441.998008
1.0000000.2500007921.272044
0.9589841.0000005360.860878
0.3867190.3334963980.639234
1.0000001.0000003440.552504
0.5000000.250000180.028910
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 41340 66.396839\n", + "0.708984 0.333496 6878 11.046866\n", + "0.591797 0.333496 5564 8.936430\n", + "0.958984 0.500000 2172 3.488484\n", + "0.386719 0.166748 1544 2.479843\n", + "NaN NaN 1432 2.299958\n", + "0.708984 0.666992 1244 1.998008\n", + "1.000000 0.250000 792 1.272044\n", + "0.958984 1.000000 536 0.860878\n", + "0.386719 0.333496 398 0.639234\n", + "1.000000 1.000000 344 0.552504\n", + "0.500000 0.250000 18 0.028910" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster g:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481585222.892958
1.0000000.5000001180817.052741
0.7089840.333496796811.507134
0.5917970.333496783011.307839
0.3867190.16674846906.773150
0.7089840.66699242586.149269
0.9589840.50000041966.059731
0.3867190.33349638645.580267
0.5917970.16674826063.763503
1.0000000.25000021003.032754
NaNNaN15662.261568
0.2592770.33349613301.920744
0.9589841.0000007781.123563
1.0000001.0000003780.545896
0.5000000.250000120.017330
0.4794920.25000060.008665
0.50000020.002888
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15852 22.892958\n", + "1.000000 0.500000 11808 17.052741\n", + "0.708984 0.333496 7968 11.507134\n", + "0.591797 0.333496 7830 11.307839\n", + "0.386719 0.166748 4690 6.773150\n", + "0.708984 0.666992 4258 6.149269\n", + "0.958984 0.500000 4196 6.059731\n", + "0.386719 0.333496 3864 5.580267\n", + "0.591797 0.166748 2606 3.763503\n", + "1.000000 0.250000 2100 3.032754\n", + "NaN NaN 1566 2.261568\n", + "0.259277 0.333496 1330 1.920744\n", + "0.958984 1.000000 778 1.123563\n", + "1.000000 1.000000 378 0.545896\n", + "0.500000 0.250000 12 0.017330\n", + "0.479492 0.250000 6 0.008665\n", + " 0.500000 2 0.002888" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster h:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000003632461.946178
0.5917970.33349648268.230158
0.7089840.33349636826.279205
0.9589840.50000028584.873973
0.3867190.33349625964.427163
1.0000001.00000020303.461919
0.25000018923.226577
NaNNaN17202.933251
0.3867190.16674812442.121491
0.7089840.6669927661.306320
0.5917970.6669925000.852689
0.9589841.0000002000.341076
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 36324 61.946178\n", + "0.591797 0.333496 4826 8.230158\n", + "0.708984 0.333496 3682 6.279205\n", + "0.958984 0.500000 2858 4.873973\n", + "0.386719 0.333496 2596 4.427163\n", + "1.000000 1.000000 2030 3.461919\n", + " 0.250000 1892 3.226577\n", + "NaN NaN 1720 2.933251\n", + "0.386719 0.166748 1244 2.121491\n", + "0.708984 0.666992 766 1.306320\n", + "0.591797 0.666992 500 0.852689\n", + "0.958984 1.000000 200 0.341076" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " For all clusters:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.50000012423423.329891
0.5917970.33349610301319.344801
0.2592770.1667487807814.662260
0.7089840.3334965580110.478864
0.3867190.333496362376.804943
0.9589840.500000311515.849843
0.7089840.666992295945.557454
0.3867190.166748270115.072393
1.0000001.000000122862.307187
0.5917970.16674899021.859496
NaNNaN87291.639218
1.0000000.25000075501.417814
0.9589841.00000035520.667030
0.2592770.33349630240.567877
0.5917970.66699210000.187790
0.2592770.0833746340.119059
0.9589840.2500006000.112674
0.5000000.062500540.010141
0.250000340.006385
0.4794920.250000120.002253
0.7089840.25000060.001127
0.5917970.25000040.000751
0.7089840.50000020.000376
0.4794920.50000020.000376
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 124234 23.329891\n", + "0.591797 0.333496 103013 19.344801\n", + "0.259277 0.166748 78078 14.662260\n", + "0.708984 0.333496 55801 10.478864\n", + "0.386719 0.333496 36237 6.804943\n", + "0.958984 0.500000 31151 5.849843\n", + "0.708984 0.666992 29594 5.557454\n", + "0.386719 0.166748 27011 5.072393\n", + "1.000000 1.000000 12286 2.307187\n", + "0.591797 0.166748 9902 1.859496\n", + "NaN NaN 8729 1.639218\n", + "1.000000 0.250000 7550 1.417814\n", + "0.958984 1.000000 3552 0.667030\n", + "0.259277 0.333496 3024 0.567877\n", + "0.591797 0.666992 1000 0.187790\n", + "0.259277 0.083374 634 0.119059\n", + "0.958984 0.250000 600 0.112674\n", + "0.500000 0.062500 54 0.010141\n", + " 0.250000 34 0.006385\n", + "0.479492 0.250000 12 0.002253\n", + "0.708984 0.250000 6 0.001127\n", + "0.591797 0.250000 4 0.000751\n", + "0.708984 0.500000 2 0.000376\n", + "0.479492 0.500000 2 0.000376" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Generate machine configurations table per cluster + a global table\n", + "\n", + "df = df[df.columns.difference(['missing_data_reason'])]\n", + "\n", + "for l in \"abcdefgh\":\n", + " print(\"\\nFor cluster \" + l + \":\\n\")\n", + " do_group_by(df[df.cluster==l])\n", + "\n", + "print(\"\\n For all clusters:\")\n", + "do_group_by(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/machine_time_waste/statuses_total_time.ipynb b/machine_time_waste/statuses_total_time.ipynb index 6a4ea3bb..dddd11b4 100644 --- a/machine_time_waste/statuses_total_time.ipynb +++ b/machine_time_waste/statuses_total_time.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "776f8aa3", "metadata": {}, "source": [ "# Statuses total time\n", @@ -13,7 +12,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "7c77e2f1", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +33,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "cc6d3e6a", "metadata": {}, "outputs": [], "source": [ @@ -57,7 +54,6 @@ { "cell_type": "code", "execution_count": 3, - "id": "44c77966", "metadata": {}, "outputs": [], "source": [ @@ -67,7 +63,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "8038bdb2", "metadata": {}, "outputs": [], "source": [ @@ -136,7 +131,6 @@ }, { "cell_type": "markdown", - "id": "31fdf065", "metadata": {}, "source": [ "## Graph 1: Absolute total time spent per status per \"last termination\" type" @@ -145,7 +139,6 @@ { "cell_type": "code", "execution_count": 5, - "id": "f44fef5d", "metadata": {}, "outputs": [], "source": [ @@ -161,7 +154,6 @@ }, { "cell_type": "markdown", - "id": "41bcaff4", "metadata": {}, "source": [ "## Graph 2: Relative total time spent per status per \"last termination\" type\n", @@ -172,7 +164,6 @@ { "cell_type": "code", "execution_count": 9, - "id": "3ba51241", "metadata": {}, "outputs": [], "source": [ @@ -201,7 +192,6 @@ { "cell_type": "code", "execution_count": 10, - "id": "8fc1b568", "metadata": {}, "outputs": [ { @@ -388,7 +378,6 @@ { "cell_type": "code", "execution_count": 8, - "id": "5d112e3c", "metadata": {}, "outputs": [ { @@ -423,7 +412,6 @@ { "cell_type": "code", "execution_count": null, - "id": "45819798", "metadata": {}, "outputs": [], "source": [] @@ -431,7 +419,6 @@ { "cell_type": "code", "execution_count": null, - "id": "7ebdc37c", "metadata": {}, "outputs": [], "source": [] @@ -439,7 +426,6 @@ { "cell_type": "code", "execution_count": null, - "id": "81f69410", "metadata": {}, "outputs": [], "source": [] @@ -447,9 +433,9 @@ ], "metadata": { "kernelspec": { - "display_name": "venv", + "display_name": "Python 3", "language": "python", - "name": "venv" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -461,7 +447,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.8.3" } }, "nbformat": 4, diff --git a/report/Claudio_Maggioni_report.pdf b/report/Claudio_Maggioni_report.pdf index c52ee91c..a31e64f5 100644 Binary files a/report/Claudio_Maggioni_report.pdf and b/report/Claudio_Maggioni_report.pdf differ diff --git a/report/figures/machine_configs.tex b/report/figures/machine_configs.tex index e4e02ee7..f9d28352 100644 --- a/report/figures/machine_configs.tex +++ b/report/figures/machine_configs.tex @@ -1,6 +1,6 @@ -\newcommand{\machineconfigs}[3][0.9\textwidth]{ - \begin{subfigure}{0.32\textwidth} - \begin{minipage}[c][#1]{\textwidth}% +\newcommand{\machineconfigs}[4][0.32\textwidth]{ + \begin{subfigure}{#1} + \vspace{0.5cm} \resizebox{\textwidth}{!}{ \centering \begin{tabular}{llll} @@ -10,14 +10,61 @@ \midrule #3 \bottomrule + #4 \end{tabular}} - \end{minipage} \caption{#2} \end{subfigure}} -\begin{figure} -\centering -\machineconfigs[1.2\textwidth]{All clusters}{ +\newcommand{\sidesplit}[5][0.33]{% + \begin{subfigure}{\textwidth} + \centering + \begin{minipage}{#1\textwidth} + \resizebox{\textwidth}{!}{#2} + \end{minipage} + \begin{minipage}{#1\textwidth} + \resizebox{\textwidth}{!}{#3} + \end{minipage} + \begin{minipage}{#1\textwidth} + \resizebox{\textwidth}{!}{#4} + \end{minipage} + \caption{#5} + \end{subfigure}} + + +\begin{figure}[p] + \sidesplit[0.20]{\begin{tabular}{llll} +\toprule +\textbf{CPU} & \textbf{RAM} & \textbf{\% Machines} \\ +\midrule + 0.5 & 0.5 & 53.47\%\\ + 0.5 & 0.25 & 30.74\%\\ + 0.5 & 0.75 & 7.95\%\\ + 1 & 1 & 6.32\%\\ +\bottomrule +\end{tabular}}{\begin{tabular}{llll} +\toprule +\textbf{CPU} & \textbf{RAM} & \textbf{\% Machines} \\ +\midrule + 0.25 & 0.25 & 0.99\%\\ + 0.5 & 0.12 & 0.43\%\\ + 0.5 & 0.03 & 0.04\%\\ + &&\\ +\bottomrule +\end{tabular}}{\begin{tabular}{llll} +\toprule +\textbf{CPU} & \textbf{RAM} & \textbf{\% Machines} \\ +\midrule + 0.5 & 0.97 & 0.03\%\\ + 1 & 0.5 & 0.02\%\\ + 0.5 & 0.06 & 0.01\%\\ + &&\\ +\bottomrule +\end{tabular}}{2011 data}\vspace{0.5cm} +\sidesplit{\begin{tabular}{llll} +\toprule +\textbf{CPU (NCU)} & \textbf{RAM (NMU)} & \textbf{Machine count} & +\textbf{\% Machines} \\ +\midrule Unknown & Unknown & 8729 & 1.639218\% \\ 1.000000 & 0.500000 & 124234 & 23.329891\% \\ 0.591797 & 0.333496 & 103013 & 19.344801\% \\ @@ -26,6 +73,12 @@ Unknown & Unknown & 8729 & 1.639218\% \\ 0.386719 & 0.333496 & 36237 & 6.804943\% \\ 0.958984 & 0.500000 & 31151 & 5.849843\% \\ 0.708984 & 0.666992 & 29594 & 5.557454\% \\ +\bottomrule +\end{tabular}}{\begin{tabular}{llll} +\toprule +\textbf{CPU (NCU)} & \textbf{RAM (NMU)} & \textbf{Machine count} & +\textbf{\% Machines} \\ +\midrule 0.386719 & 0.166748 & 27011 & 5.072393\% \\ 1.000000 & 1.000000 & 12286 & 2.307187\% \\ 0.591797 & 0.166748 & 9902 & 1.859496\% \\ @@ -34,6 +87,12 @@ Unknown & Unknown & 8729 & 1.639218\% \\ 0.259277 & 0.333496 & 3024 & 0.567877\% \\ 0.591797 & 0.666992 & 1000 & 0.187790\% \\ 0.259277 & 0.083374 & 634 & 0.119059\% \\ +\bottomrule +\end{tabular}}{\begin{tabular}{llll} +\toprule +\textbf{CPU (NCU)} & \textbf{RAM (NMU)} & \textbf{Machine count} & +\textbf{\% Machines} \\ +\midrule 0.958984 & 0.250000 & 600 & 0.112674\% \\ 0.500000 & 0.062500 & 54 & 0.010141\% \\ 0.500000 & 0.250000 & 34 & 0.006385\% \\ @@ -42,8 +101,14 @@ Unknown & Unknown & 8729 & 1.639218\% \\ 0.591797 & 0.250000 & 4 & 0.000751\% \\ 0.708984 & 0.500000 & 2 & 0.000376\% \\ 0.479492 & 0.500000 & 2 & 0.000376\% \\ -} -\machineconfigs[1.2\textwidth]{A cluster}{ +\bottomrule +\end{tabular}}{2019 data} +\caption{Overview of machine configurations in term of CPU and Memory power in 2011 and 2019 (all clusters aggregated) traces. In the 2019 traces NCU stands for ``Normalized Compute Unit'' and NMU stands for ``Normalized Compute Unit'': both are $[0,1]$ normalizations of resource values. While memory was measured in terms of capacity, CPU power was measured in ``Google Compute Units'' (GCUs), an opaque umbrella metric used by Google that factors in CPU clock, number of cores/processors, and CPU ISA architecture.}\label{fig:machineconfigs} +\end{figure} + +\begin{figure}[p] +\centering +\machineconfigs{A cluster}{ Unknown & Unknown & 1377 & 1.623170\% \\ 0.591797 & 0.333496 & 29487 & 34.758469\% \\ 1.000000 & 0.500000 & 13440 & 15.842705\% \\ @@ -58,8 +123,8 @@ Unknown & Unknown & 1377 & 1.623170\% \\ 1.000000 & 0.250000 & 366 & 0.431431\% \\ 0.479492 & 0.250000 & 6 & 0.007073\% \\ 0.708984 & 0.250000 & 6 & 0.007073\% \\ -} -\machineconfigs[1.2\textwidth]{Cluster B}{ +}{} +\machineconfigs{Cluster B}{ Unknown & Unknown & 134 & 0.264812\% \\ 0.591797 & 0.333496 & 16184 & 31.982926\% \\ 1.000000 & 0.500000 & 9790 & 19.347061\% \\ @@ -72,7 +137,7 @@ Unknown & Unknown & 134 & 0.264812\% \\ 0.958984 & 1.000000 & 618 & 1.221296\% \\ 0.591797 & 0.666992 & 500 & 0.988103\% \\ 0.386719 & 0.166748 & 412 & 0.814197\% \\ -} +}{\\\\} \machineconfigs{Cluster C}{ Unknown & Unknown & 1466 & 2.274208\% \\ 0.259277 & 0.166748 & 15754 & 24.439204\% \\ @@ -88,7 +153,7 @@ Unknown & Unknown & 1466 & 2.274208\% \\ 0.708984 & 0.333496 & 620 & 0.961807\% \\ 0.958984 & 0.250000 & 600 & 0.930781\% \\ 0.591797 & 0.166748 & 112 & 0.173746\% \\ -} +}{} \machineconfigs{Cluster D}{ Unknown & Unknown & 498 & 0.794309\% \\ 0.591797 & 0.333496 & 28394 & 45.288376\% \\ @@ -102,7 +167,7 @@ Unknown & Unknown & 498 & 0.794309\% \\ 1.000000 & 0.500000 & 292 & 0.465739\% \\ 0.591797 & 0.250000 & 4 & 0.006380\% \\ 0.708984 & 0.500000 & 2 & 0.003190\% \\ -} +}{\\\\} \machineconfigs{Cluster E}{ Unknown & Unknown & 536 & 0.671915\% \\ 0.259277 & 0.166748 & 38452 & 48.202377\% \\ @@ -118,7 +183,7 @@ Unknown & Unknown & 536 & 0.671915\% \\ 1.000000 & 1.000000 & 138 & 0.172993\% \\ 0.500000 & 0.062500 & 54 & 0.067693\% \\ 0.500000 & 0.250000 & 4 & 0.005014\% \\ -} +}{} \machineconfigs{Cluster F}{ Unknown & Unknown & 1432 & 2.299958\% \\ 1.000000 & 0.500000 & 41340 & 66.396839\% \\ @@ -132,7 +197,7 @@ Unknown & Unknown & 1432 & 2.299958\% \\ 0.386719 & 0.333496 & 398 & 0.639234\% \\ 1.000000 & 1.000000 & 344 & 0.552504\% \\ 0.500000 & 0.250000 & 18 & 0.028910\% \\ -} +}{\\\\} \machineconfigs{Cluster G}{ Unknown & Unknown & 1566 & 2.261568\% \\ 0.259277 & 0.166748 & 15852 & 22.892958\% \\ @@ -151,7 +216,7 @@ Unknown & Unknown & 1566 & 2.261568\% \\ 0.500000 & 0.250000 & 12 & 0.017330\% \\ 0.479492 & 0.250000 & 6 & 0.008665\% \\ 0.479492 & 0.500000 & 2 & 0.002888\% \\ -} +}{} \machineconfigs{Cluster H}{ Unknown & Unknown & 1720 & 2.933251\% \\ 1.000000 & 0.500000 & 36324 & 61.946178\% \\ @@ -165,6 +230,6 @@ Unknown & Unknown & 1720 & 2.933251\% \\ 0.708984 & 0.666992 & 766 & 1.306320\% \\ 0.591797 & 0.666992 & 500 & 0.852689\% \\ 0.958984 & 1.000000 & 200 & 0.341076\% \\ -} -\caption{Overview of machine configurations in terms of CPU and RAM resources for each cluster}\label{fig:machineconfigs} +}{\\\\\\\\\\} +\caption{Overview of machine configurations in terms of CPU and RAM resources for each cluster in the 2019 traces. Refer to figure~\ref{fig:machineconfig} for a column legend.}\label{fig:machineconfigs-csts} \end{figure}