commit c28c2051e769532747f43d2889242afb97be7ed8 Author: Claudio Maggioni (maggicl) Date: Mon Feb 15 10:34:20 2021 +0000 Added machine configuration analysis diff --git a/machine_configs/.ipynb_checkpoints/machine_configs-checkpoint.ipynb b/machine_configs/.ipynb_checkpoints/machine_configs-checkpoint.ipynb new file mode 100644 index 00000000..bf43e41b --- /dev/null +++ b/machine_configs/.ipynb_checkpoints/machine_configs-checkpoint.ipynb @@ -0,0 +1,1592 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "built-symbol", + "metadata": {}, + "source": [ + "# Machine configurations\n", + "\n", + "This query returns all the distinct NCU/NMU configurations in the borg clusters, including how many machines ids match for any specific configuration.\n", + "\n", + "Please note that for simplicity's sake the we are technically counting the number of ADD or UPDATE events for each configuration, and not the actual count of machines. Therefore a machine configuration may change over time and count twice or more." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "stuffed-lightning", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# For pretty printing\n", + "from IPython.display import display\n", + "\n", + "# Disables row ellipsis\n", + "pd.set_option('display.max_rows', 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "upper-lloyd", + "metadata": {}, + "outputs": [], + "source": [ + "# Load all machine event rows in a single DataFrame, and add a \"cluster\" column to differentiate\n", + "# between clusters\n", + "df = None\n", + "for l in \"abcdefgh\":\n", + " dfl = pd.read_csv(\"~/google_2019/machine_events/\" + l + \"_machine_events.csv\")\n", + " dfl[\"cluster\"] = l\n", + " if df is None:\n", + " df = dfl\n", + " else:\n", + " df = pd.concat([df, dfl], axis=0)\n", + "\n", + "# Filter only ADD or UPDATE events\n", + "df = df[(df.type==1)|(df.type==3)]\n", + "\n", + "# P.S: ADD=1, REMOVE=2, UPDATE=3\n", + " \n", + "df = df[[\"capacity.cpus\", \"capacity.memory\", \"cluster\", \n", + " \"missing_data_reason\", \"machine_id\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "presidential-farmer", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
capacity.cpuscapacity.memoryclustermachine_id
missing_data_reason
NaN523781523781532510532510
\n", + "
" + ], + "text/plain": [ + " capacity.cpus capacity.memory cluster machine_id\n", + "missing_data_reason \n", + "NaN 523781 523781 532510 532510" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking if we need to deal with particular missing data\n", + "# No columns returned, so missing data can be safely ignored\n", + "df.groupby(by=[\"missing_data_reason\"], dropna=False).count()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "informative-vietnam", + "metadata": {}, + "outputs": [], + "source": [ + "def do_group_by(df):\n", + " # Exclude \"cluster\" column and perform group-by\n", + " dfg = df[df.columns.difference(['cluster'])]. \\\n", + " groupby(by=[\"capacity.cpus\",\"capacity.memory\"], \n", + " dropna=False).count()\n", + " \n", + " # Compute relative number of machines\n", + " total_machines = dfg['machine_id'].sum()\n", + " dfg[\"machine_id_perc\"] = dfg[\"machine_id\"] * 100 / total_machines\n", + " \n", + " # Sort descending\n", + " dfg = dfg.sort_values(\"machine_id_perc\", ascending=False)\n", + " \n", + " display(dfg)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "pretty-taiwan", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster a:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962948734.758469
1.0000000.5000001344015.842705
0.7089840.3334961249514.728764
0.3867190.333496905710.676144
0.16674852656.206238
0.7089840.66699246085.431784
1.0000001.00000044465.240823
0.5917970.16674824842.928071
NaNNaN13771.623170
0.9589840.50000011431.347337
1.0000006540.770917
1.0000000.2500003660.431431
0.4794920.25000060.007073
0.7089840.25000060.007073
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 29487 34.758469\n", + "1.000000 0.500000 13440 15.842705\n", + "0.708984 0.333496 12495 14.728764\n", + "0.386719 0.333496 9057 10.676144\n", + " 0.166748 5265 6.206238\n", + "0.708984 0.666992 4608 5.431784\n", + "1.000000 1.000000 4446 5.240823\n", + "0.591797 0.166748 2484 2.928071\n", + "NaN NaN 1377 1.623170\n", + "0.958984 0.500000 1143 1.347337\n", + " 1.000000 654 0.770917\n", + "1.000000 0.250000 366 0.431431\n", + "0.479492 0.250000 6 0.007073\n", + "0.708984 0.250000 6 0.007073" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster b:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334961618431.982926
1.0000000.500000979019.347061
0.7089840.333496844816.694992
0.9589840.500000550210.873088
0.7089840.66699238327.572823
1.0000001.00000022144.375321
0.5917970.16674821524.252796
0.3867190.3334968161.612584
0.9589841.0000006181.221296
0.5917970.6669925000.988103
0.3867190.1667484120.814197
NaNNaN1340.264812
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 16184 31.982926\n", + "1.000000 0.500000 9790 19.347061\n", + "0.708984 0.333496 8448 16.694992\n", + "0.958984 0.500000 5502 10.873088\n", + "0.708984 0.666992 3832 7.572823\n", + "1.000000 1.000000 2214 4.375321\n", + "0.591797 0.166748 2152 4.252796\n", + "0.386719 0.333496 816 1.612584\n", + "0.958984 1.000000 618 1.221296\n", + "0.591797 0.666992 500 0.988103\n", + "0.386719 0.166748 412 0.814197\n", + "NaN NaN 134 0.264812" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster c:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481575424.439204
0.3867190.3334961110417.225652
0.5917970.3334961040416.139741
0.9589840.500000663410.291334
1.0000000.50000056548.771059
0.3867190.16674835805.553660
0.7089840.66699229004.498774
1.0000001.00000027364.244361
0.25000021323.307375
NaNNaN14662.274208
0.9589841.0000007661.188297
0.7089840.3334966200.961807
0.9589840.2500006000.930781
0.5917970.1667481120.173746
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15754 24.439204\n", + "0.386719 0.333496 11104 17.225652\n", + "0.591797 0.333496 10404 16.139741\n", + "0.958984 0.500000 6634 10.291334\n", + "1.000000 0.500000 5654 8.771059\n", + "0.386719 0.166748 3580 5.553660\n", + "0.708984 0.666992 2900 4.498774\n", + "1.000000 1.000000 2736 4.244361\n", + " 0.250000 2132 3.307375\n", + "NaN NaN 1466 2.274208\n", + "0.958984 1.000000 766 1.188297\n", + "0.708984 0.333496 620 0.961807\n", + "0.958984 0.250000 600 0.930781\n", + "0.591797 0.166748 112 0.173746" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster d:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962839445.288376
0.3867190.333496840213.401174
0.2592770.166748802012.791885
0.3867190.16674858069.260559
0.7089840.66699243806.986092
0.33349639246.258772
0.5917970.16674825484.064055
NaNNaN4980.794309
0.2592770.3334964260.679469
1.0000000.5000002920.465739
0.5917970.25000040.006380
0.7089840.50000020.003190
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 28394 45.288376\n", + "0.386719 0.333496 8402 13.401174\n", + "0.259277 0.166748 8020 12.791885\n", + "0.386719 0.166748 5806 9.260559\n", + "0.708984 0.666992 4380 6.986092\n", + " 0.333496 3924 6.258772\n", + "0.591797 0.166748 2548 4.064055\n", + "NaN NaN 498 0.794309\n", + "0.259277 0.333496 426 0.679469\n", + "1.000000 0.500000 292 0.465739\n", + "0.591797 0.250000 4 0.006380\n", + "0.708984 0.500000 2 0.003190" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster e:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667483845248.202377
0.7089840.3334961178614.774608
0.9589840.500000864610.838389
0.7089840.66699276069.534674
1.0000000.50000055867.002457
0.3867190.16674844705.603470
0.2592770.33349612681.589530
0.0833746340.794765
NaNNaN5360.671915
0.5917970.3334963240.406158
1.0000000.2500002680.335957
1.0000001380.172993
0.5000000.062500540.067693
0.25000040.005014
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 38452 48.202377\n", + "0.708984 0.333496 11786 14.774608\n", + "0.958984 0.500000 8646 10.838389\n", + "0.708984 0.666992 7606 9.534674\n", + "1.000000 0.500000 5586 7.002457\n", + "0.386719 0.166748 4470 5.603470\n", + "0.259277 0.333496 1268 1.589530\n", + " 0.083374 634 0.794765\n", + "NaN NaN 536 0.671915\n", + "0.591797 0.333496 324 0.406158\n", + "1.000000 0.250000 268 0.335957\n", + " 1.000000 138 0.172993\n", + "0.500000 0.062500 54 0.067693\n", + " 0.250000 4 0.005014" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster f:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000004134066.396839
0.7089840.333496687811.046866
0.5917970.33349655648.936430
0.9589840.50000021723.488484
0.3867190.16674815442.479843
NaNNaN14322.299958
0.7089840.66699212441.998008
1.0000000.2500007921.272044
0.9589841.0000005360.860878
0.3867190.3334963980.639234
1.0000001.0000003440.552504
0.5000000.250000180.028910
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 41340 66.396839\n", + "0.708984 0.333496 6878 11.046866\n", + "0.591797 0.333496 5564 8.936430\n", + "0.958984 0.500000 2172 3.488484\n", + "0.386719 0.166748 1544 2.479843\n", + "NaN NaN 1432 2.299958\n", + "0.708984 0.666992 1244 1.998008\n", + "1.000000 0.250000 792 1.272044\n", + "0.958984 1.000000 536 0.860878\n", + "0.386719 0.333496 398 0.639234\n", + "1.000000 1.000000 344 0.552504\n", + "0.500000 0.250000 18 0.028910" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster g:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481585222.892958
1.0000000.5000001180817.052741
0.7089840.333496796811.507134
0.5917970.333496783011.307839
0.3867190.16674846906.773150
0.7089840.66699242586.149269
0.9589840.50000041966.059731
0.3867190.33349638645.580267
0.5917970.16674826063.763503
1.0000000.25000021003.032754
NaNNaN15662.261568
0.2592770.33349613301.920744
0.9589841.0000007781.123563
1.0000001.0000003780.545896
0.5000000.250000120.017330
0.4794920.25000060.008665
0.50000020.002888
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15852 22.892958\n", + "1.000000 0.500000 11808 17.052741\n", + "0.708984 0.333496 7968 11.507134\n", + "0.591797 0.333496 7830 11.307839\n", + "0.386719 0.166748 4690 6.773150\n", + "0.708984 0.666992 4258 6.149269\n", + "0.958984 0.500000 4196 6.059731\n", + "0.386719 0.333496 3864 5.580267\n", + "0.591797 0.166748 2606 3.763503\n", + "1.000000 0.250000 2100 3.032754\n", + "NaN NaN 1566 2.261568\n", + "0.259277 0.333496 1330 1.920744\n", + "0.958984 1.000000 778 1.123563\n", + "1.000000 1.000000 378 0.545896\n", + "0.500000 0.250000 12 0.017330\n", + "0.479492 0.250000 6 0.008665\n", + " 0.500000 2 0.002888" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster h:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000003632461.946178
0.5917970.33349648268.230158
0.7089840.33349636826.279205
0.9589840.50000028584.873973
0.3867190.33349625964.427163
1.0000001.00000020303.461919
0.25000018923.226577
NaNNaN17202.933251
0.3867190.16674812442.121491
0.7089840.6669927661.306320
0.5917970.6669925000.852689
0.9589841.0000002000.341076
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 36324 61.946178\n", + "0.591797 0.333496 4826 8.230158\n", + "0.708984 0.333496 3682 6.279205\n", + "0.958984 0.500000 2858 4.873973\n", + "0.386719 0.333496 2596 4.427163\n", + "1.000000 1.000000 2030 3.461919\n", + " 0.250000 1892 3.226577\n", + "NaN NaN 1720 2.933251\n", + "0.386719 0.166748 1244 2.121491\n", + "0.708984 0.666992 766 1.306320\n", + "0.591797 0.666992 500 0.852689\n", + "0.958984 1.000000 200 0.341076" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " For all clusters:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.50000012423423.329891
0.5917970.33349610301319.344801
0.2592770.1667487807814.662260
0.7089840.3334965580110.478864
0.3867190.333496362376.804943
0.9589840.500000311515.849843
0.7089840.666992295945.557454
0.3867190.166748270115.072393
1.0000001.000000122862.307187
0.5917970.16674899021.859496
NaNNaN87291.639218
1.0000000.25000075501.417814
0.9589841.00000035520.667030
0.2592770.33349630240.567877
0.5917970.66699210000.187790
0.2592770.0833746340.119059
0.9589840.2500006000.112674
0.5000000.062500540.010141
0.250000340.006385
0.4794920.250000120.002253
0.7089840.25000060.001127
0.5917970.25000040.000751
0.7089840.50000020.000376
0.4794920.50000020.000376
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 124234 23.329891\n", + "0.591797 0.333496 103013 19.344801\n", + "0.259277 0.166748 78078 14.662260\n", + "0.708984 0.333496 55801 10.478864\n", + "0.386719 0.333496 36237 6.804943\n", + "0.958984 0.500000 31151 5.849843\n", + "0.708984 0.666992 29594 5.557454\n", + "0.386719 0.166748 27011 5.072393\n", + "1.000000 1.000000 12286 2.307187\n", + "0.591797 0.166748 9902 1.859496\n", + "NaN NaN 8729 1.639218\n", + "1.000000 0.250000 7550 1.417814\n", + "0.958984 1.000000 3552 0.667030\n", + "0.259277 0.333496 3024 0.567877\n", + "0.591797 0.666992 1000 0.187790\n", + "0.259277 0.083374 634 0.119059\n", + "0.958984 0.250000 600 0.112674\n", + "0.500000 0.062500 54 0.010141\n", + " 0.250000 34 0.006385\n", + "0.479492 0.250000 12 0.002253\n", + "0.708984 0.250000 6 0.001127\n", + "0.591797 0.250000 4 0.000751\n", + "0.708984 0.500000 2 0.000376\n", + "0.479492 0.500000 2 0.000376" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Generate machine configurations table per cluster + a global table\n", + "\n", + "df = df[df.columns.difference(['missing_data_reason'])]\n", + "\n", + "for l in \"abcdefgh\":\n", + " print(\"\\nFor cluster \" + l + \":\\n\")\n", + " do_group_by(df[df.cluster==l])\n", + "\n", + "print(\"\\n For all clusters:\")\n", + "do_group_by(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "supreme-hepatitis", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/machine_configs/machine_configs.ipynb b/machine_configs/machine_configs.ipynb new file mode 100644 index 00000000..bf43e41b --- /dev/null +++ b/machine_configs/machine_configs.ipynb @@ -0,0 +1,1592 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "built-symbol", + "metadata": {}, + "source": [ + "# Machine configurations\n", + "\n", + "This query returns all the distinct NCU/NMU configurations in the borg clusters, including how many machines ids match for any specific configuration.\n", + "\n", + "Please note that for simplicity's sake the we are technically counting the number of ADD or UPDATE events for each configuration, and not the actual count of machines. Therefore a machine configuration may change over time and count twice or more." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "stuffed-lightning", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# For pretty printing\n", + "from IPython.display import display\n", + "\n", + "# Disables row ellipsis\n", + "pd.set_option('display.max_rows', 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "upper-lloyd", + "metadata": {}, + "outputs": [], + "source": [ + "# Load all machine event rows in a single DataFrame, and add a \"cluster\" column to differentiate\n", + "# between clusters\n", + "df = None\n", + "for l in \"abcdefgh\":\n", + " dfl = pd.read_csv(\"~/google_2019/machine_events/\" + l + \"_machine_events.csv\")\n", + " dfl[\"cluster\"] = l\n", + " if df is None:\n", + " df = dfl\n", + " else:\n", + " df = pd.concat([df, dfl], axis=0)\n", + "\n", + "# Filter only ADD or UPDATE events\n", + "df = df[(df.type==1)|(df.type==3)]\n", + "\n", + "# P.S: ADD=1, REMOVE=2, UPDATE=3\n", + " \n", + "df = df[[\"capacity.cpus\", \"capacity.memory\", \"cluster\", \n", + " \"missing_data_reason\", \"machine_id\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "presidential-farmer", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
capacity.cpuscapacity.memoryclustermachine_id
missing_data_reason
NaN523781523781532510532510
\n", + "
" + ], + "text/plain": [ + " capacity.cpus capacity.memory cluster machine_id\n", + "missing_data_reason \n", + "NaN 523781 523781 532510 532510" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking if we need to deal with particular missing data\n", + "# No columns returned, so missing data can be safely ignored\n", + "df.groupby(by=[\"missing_data_reason\"], dropna=False).count()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "informative-vietnam", + "metadata": {}, + "outputs": [], + "source": [ + "def do_group_by(df):\n", + " # Exclude \"cluster\" column and perform group-by\n", + " dfg = df[df.columns.difference(['cluster'])]. \\\n", + " groupby(by=[\"capacity.cpus\",\"capacity.memory\"], \n", + " dropna=False).count()\n", + " \n", + " # Compute relative number of machines\n", + " total_machines = dfg['machine_id'].sum()\n", + " dfg[\"machine_id_perc\"] = dfg[\"machine_id\"] * 100 / total_machines\n", + " \n", + " # Sort descending\n", + " dfg = dfg.sort_values(\"machine_id_perc\", ascending=False)\n", + " \n", + " display(dfg)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "pretty-taiwan", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster a:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962948734.758469
1.0000000.5000001344015.842705
0.7089840.3334961249514.728764
0.3867190.333496905710.676144
0.16674852656.206238
0.7089840.66699246085.431784
1.0000001.00000044465.240823
0.5917970.16674824842.928071
NaNNaN13771.623170
0.9589840.50000011431.347337
1.0000006540.770917
1.0000000.2500003660.431431
0.4794920.25000060.007073
0.7089840.25000060.007073
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 29487 34.758469\n", + "1.000000 0.500000 13440 15.842705\n", + "0.708984 0.333496 12495 14.728764\n", + "0.386719 0.333496 9057 10.676144\n", + " 0.166748 5265 6.206238\n", + "0.708984 0.666992 4608 5.431784\n", + "1.000000 1.000000 4446 5.240823\n", + "0.591797 0.166748 2484 2.928071\n", + "NaN NaN 1377 1.623170\n", + "0.958984 0.500000 1143 1.347337\n", + " 1.000000 654 0.770917\n", + "1.000000 0.250000 366 0.431431\n", + "0.479492 0.250000 6 0.007073\n", + "0.708984 0.250000 6 0.007073" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster b:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334961618431.982926
1.0000000.500000979019.347061
0.7089840.333496844816.694992
0.9589840.500000550210.873088
0.7089840.66699238327.572823
1.0000001.00000022144.375321
0.5917970.16674821524.252796
0.3867190.3334968161.612584
0.9589841.0000006181.221296
0.5917970.6669925000.988103
0.3867190.1667484120.814197
NaNNaN1340.264812
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 16184 31.982926\n", + "1.000000 0.500000 9790 19.347061\n", + "0.708984 0.333496 8448 16.694992\n", + "0.958984 0.500000 5502 10.873088\n", + "0.708984 0.666992 3832 7.572823\n", + "1.000000 1.000000 2214 4.375321\n", + "0.591797 0.166748 2152 4.252796\n", + "0.386719 0.333496 816 1.612584\n", + "0.958984 1.000000 618 1.221296\n", + "0.591797 0.666992 500 0.988103\n", + "0.386719 0.166748 412 0.814197\n", + "NaN NaN 134 0.264812" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster c:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481575424.439204
0.3867190.3334961110417.225652
0.5917970.3334961040416.139741
0.9589840.500000663410.291334
1.0000000.50000056548.771059
0.3867190.16674835805.553660
0.7089840.66699229004.498774
1.0000001.00000027364.244361
0.25000021323.307375
NaNNaN14662.274208
0.9589841.0000007661.188297
0.7089840.3334966200.961807
0.9589840.2500006000.930781
0.5917970.1667481120.173746
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15754 24.439204\n", + "0.386719 0.333496 11104 17.225652\n", + "0.591797 0.333496 10404 16.139741\n", + "0.958984 0.500000 6634 10.291334\n", + "1.000000 0.500000 5654 8.771059\n", + "0.386719 0.166748 3580 5.553660\n", + "0.708984 0.666992 2900 4.498774\n", + "1.000000 1.000000 2736 4.244361\n", + " 0.250000 2132 3.307375\n", + "NaN NaN 1466 2.274208\n", + "0.958984 1.000000 766 1.188297\n", + "0.708984 0.333496 620 0.961807\n", + "0.958984 0.250000 600 0.930781\n", + "0.591797 0.166748 112 0.173746" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster d:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.5917970.3334962839445.288376
0.3867190.333496840213.401174
0.2592770.166748802012.791885
0.3867190.16674858069.260559
0.7089840.66699243806.986092
0.33349639246.258772
0.5917970.16674825484.064055
NaNNaN4980.794309
0.2592770.3334964260.679469
1.0000000.5000002920.465739
0.5917970.25000040.006380
0.7089840.50000020.003190
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.591797 0.333496 28394 45.288376\n", + "0.386719 0.333496 8402 13.401174\n", + "0.259277 0.166748 8020 12.791885\n", + "0.386719 0.166748 5806 9.260559\n", + "0.708984 0.666992 4380 6.986092\n", + " 0.333496 3924 6.258772\n", + "0.591797 0.166748 2548 4.064055\n", + "NaN NaN 498 0.794309\n", + "0.259277 0.333496 426 0.679469\n", + "1.000000 0.500000 292 0.465739\n", + "0.591797 0.250000 4 0.006380\n", + "0.708984 0.500000 2 0.003190" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster e:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667483845248.202377
0.7089840.3334961178614.774608
0.9589840.500000864610.838389
0.7089840.66699276069.534674
1.0000000.50000055867.002457
0.3867190.16674844705.603470
0.2592770.33349612681.589530
0.0833746340.794765
NaNNaN5360.671915
0.5917970.3334963240.406158
1.0000000.2500002680.335957
1.0000001380.172993
0.5000000.062500540.067693
0.25000040.005014
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 38452 48.202377\n", + "0.708984 0.333496 11786 14.774608\n", + "0.958984 0.500000 8646 10.838389\n", + "0.708984 0.666992 7606 9.534674\n", + "1.000000 0.500000 5586 7.002457\n", + "0.386719 0.166748 4470 5.603470\n", + "0.259277 0.333496 1268 1.589530\n", + " 0.083374 634 0.794765\n", + "NaN NaN 536 0.671915\n", + "0.591797 0.333496 324 0.406158\n", + "1.000000 0.250000 268 0.335957\n", + " 1.000000 138 0.172993\n", + "0.500000 0.062500 54 0.067693\n", + " 0.250000 4 0.005014" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster f:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000004134066.396839
0.7089840.333496687811.046866
0.5917970.33349655648.936430
0.9589840.50000021723.488484
0.3867190.16674815442.479843
NaNNaN14322.299958
0.7089840.66699212441.998008
1.0000000.2500007921.272044
0.9589841.0000005360.860878
0.3867190.3334963980.639234
1.0000001.0000003440.552504
0.5000000.250000180.028910
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 41340 66.396839\n", + "0.708984 0.333496 6878 11.046866\n", + "0.591797 0.333496 5564 8.936430\n", + "0.958984 0.500000 2172 3.488484\n", + "0.386719 0.166748 1544 2.479843\n", + "NaN NaN 1432 2.299958\n", + "0.708984 0.666992 1244 1.998008\n", + "1.000000 0.250000 792 1.272044\n", + "0.958984 1.000000 536 0.860878\n", + "0.386719 0.333496 398 0.639234\n", + "1.000000 1.000000 344 0.552504\n", + "0.500000 0.250000 18 0.028910" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster g:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
0.2592770.1667481585222.892958
1.0000000.5000001180817.052741
0.7089840.333496796811.507134
0.5917970.333496783011.307839
0.3867190.16674846906.773150
0.7089840.66699242586.149269
0.9589840.50000041966.059731
0.3867190.33349638645.580267
0.5917970.16674826063.763503
1.0000000.25000021003.032754
NaNNaN15662.261568
0.2592770.33349613301.920744
0.9589841.0000007781.123563
1.0000001.0000003780.545896
0.5000000.250000120.017330
0.4794920.25000060.008665
0.50000020.002888
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "0.259277 0.166748 15852 22.892958\n", + "1.000000 0.500000 11808 17.052741\n", + "0.708984 0.333496 7968 11.507134\n", + "0.591797 0.333496 7830 11.307839\n", + "0.386719 0.166748 4690 6.773150\n", + "0.708984 0.666992 4258 6.149269\n", + "0.958984 0.500000 4196 6.059731\n", + "0.386719 0.333496 3864 5.580267\n", + "0.591797 0.166748 2606 3.763503\n", + "1.000000 0.250000 2100 3.032754\n", + "NaN NaN 1566 2.261568\n", + "0.259277 0.333496 1330 1.920744\n", + "0.958984 1.000000 778 1.123563\n", + "1.000000 1.000000 378 0.545896\n", + "0.500000 0.250000 12 0.017330\n", + "0.479492 0.250000 6 0.008665\n", + " 0.500000 2 0.002888" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "For cluster h:\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.5000003632461.946178
0.5917970.33349648268.230158
0.7089840.33349636826.279205
0.9589840.50000028584.873973
0.3867190.33349625964.427163
1.0000001.00000020303.461919
0.25000018923.226577
NaNNaN17202.933251
0.3867190.16674812442.121491
0.7089840.6669927661.306320
0.5917970.6669925000.852689
0.9589841.0000002000.341076
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 36324 61.946178\n", + "0.591797 0.333496 4826 8.230158\n", + "0.708984 0.333496 3682 6.279205\n", + "0.958984 0.500000 2858 4.873973\n", + "0.386719 0.333496 2596 4.427163\n", + "1.000000 1.000000 2030 3.461919\n", + " 0.250000 1892 3.226577\n", + "NaN NaN 1720 2.933251\n", + "0.386719 0.166748 1244 2.121491\n", + "0.708984 0.666992 766 1.306320\n", + "0.591797 0.666992 500 0.852689\n", + "0.958984 1.000000 200 0.341076" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " For all clusters:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
machine_idmachine_id_perc
capacity.cpuscapacity.memory
1.0000000.50000012423423.329891
0.5917970.33349610301319.344801
0.2592770.1667487807814.662260
0.7089840.3334965580110.478864
0.3867190.333496362376.804943
0.9589840.500000311515.849843
0.7089840.666992295945.557454
0.3867190.166748270115.072393
1.0000001.000000122862.307187
0.5917970.16674899021.859496
NaNNaN87291.639218
1.0000000.25000075501.417814
0.9589841.00000035520.667030
0.2592770.33349630240.567877
0.5917970.66699210000.187790
0.2592770.0833746340.119059
0.9589840.2500006000.112674
0.5000000.062500540.010141
0.250000340.006385
0.4794920.250000120.002253
0.7089840.25000060.001127
0.5917970.25000040.000751
0.7089840.50000020.000376
0.4794920.50000020.000376
\n", + "
" + ], + "text/plain": [ + " machine_id machine_id_perc\n", + "capacity.cpus capacity.memory \n", + "1.000000 0.500000 124234 23.329891\n", + "0.591797 0.333496 103013 19.344801\n", + "0.259277 0.166748 78078 14.662260\n", + "0.708984 0.333496 55801 10.478864\n", + "0.386719 0.333496 36237 6.804943\n", + "0.958984 0.500000 31151 5.849843\n", + "0.708984 0.666992 29594 5.557454\n", + "0.386719 0.166748 27011 5.072393\n", + "1.000000 1.000000 12286 2.307187\n", + "0.591797 0.166748 9902 1.859496\n", + "NaN NaN 8729 1.639218\n", + "1.000000 0.250000 7550 1.417814\n", + "0.958984 1.000000 3552 0.667030\n", + "0.259277 0.333496 3024 0.567877\n", + "0.591797 0.666992 1000 0.187790\n", + "0.259277 0.083374 634 0.119059\n", + "0.958984 0.250000 600 0.112674\n", + "0.500000 0.062500 54 0.010141\n", + " 0.250000 34 0.006385\n", + "0.479492 0.250000 12 0.002253\n", + "0.708984 0.250000 6 0.001127\n", + "0.591797 0.250000 4 0.000751\n", + "0.708984 0.500000 2 0.000376\n", + "0.479492 0.500000 2 0.000376" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Generate machine configurations table per cluster + a global table\n", + "\n", + "df = df[df.columns.difference(['missing_data_reason'])]\n", + "\n", + "for l in \"abcdefgh\":\n", + " print(\"\\nFor cluster \" + l + \":\\n\")\n", + " do_group_by(df[df.cluster==l])\n", + "\n", + "print(\"\\n For all clusters:\")\n", + "do_group_by(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "supreme-hepatitis", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}