bachelorThesis/machine_configs/machine_configs-Copy1.ipynb

1586 lines
50 KiB
Text
Raw Normal View History

2021-05-24 19:49:48 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Machine configurations\n",
"\n",
"This query returns all the distinct NCU/NMU configurations in the borg clusters, including how many machines ids match for any specific configuration.\n",
"\n",
"Please note that for simplicity's sake the we are technically counting the number of ADD or UPDATE events for each configuration, and not the actual count of machines. Therefore a machine configuration may change over time and count twice or more."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# For pretty printing\n",
"from IPython.display import display\n",
"\n",
"# Disables row ellipsis\n",
"pd.set_option('display.max_rows', 200)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Load all machine event rows in a single DataFrame, and add a \"cluster\" column to differentiate\n",
"# between clusters\n",
"df = None\n",
"for l in \"abcdefgh\":\n",
" dfl = pd.read_csv(\"~/google_2019/machine_events/\" + l + \"_machine_events.csv\")\n",
" dfl[\"cluster\"] = l\n",
" if df is None:\n",
" df = dfl\n",
" else:\n",
" df = pd.concat([df, dfl], axis=0)\n",
"\n",
"# Filter only ADD or UPDATE events\n",
"df = df[(df.type==1)|(df.type==3)]\n",
"\n",
"# P.S: ADD=1, REMOVE=2, UPDATE=3\n",
" \n",
"df = df[[\"capacity.cpus\", \"capacity.memory\", \"cluster\", \n",
" \"missing_data_reason\", \"machine_id\"]]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th>cluster</th>\n",
" <th>machine_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>missing_data_reason</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <td>523781</td>\n",
" <td>523781</td>\n",
" <td>532510</td>\n",
" <td>532510</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" capacity.cpus capacity.memory cluster machine_id\n",
"missing_data_reason \n",
"NaN 523781 523781 532510 532510"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Checking if we need to deal with particular missing data\n",
"# No columns returned, so missing data can be safely ignored\n",
"df.groupby(by=[\"missing_data_reason\"], dropna=False).count()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def do_group_by(df):\n",
" # Exclude \"cluster\" column and perform group-by\n",
" dfg = df[df.columns.difference(['cluster'])]. \\\n",
" groupby(by=[\"capacity.cpus\",\"capacity.memory\"], \n",
" dropna=False).count()\n",
" \n",
" # Compute relative number of machines\n",
" total_machines = dfg['machine_id'].sum()\n",
" dfg[\"machine_id_perc\"] = dfg[\"machine_id\"] * 100 / total_machines\n",
" \n",
" # Sort descending\n",
" dfg = dfg.sort_values(\"machine_id_perc\", ascending=False)\n",
" \n",
" display(dfg)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster a:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>29487</td>\n",
" <td>34.758469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>13440</td>\n",
" <td>15.842705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>12495</td>\n",
" <td>14.728764</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>9057</td>\n",
" <td>10.676144</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.166748</th>\n",
" <td>5265</td>\n",
" <td>6.206238</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>4608</td>\n",
" <td>5.431784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>4446</td>\n",
" <td>5.240823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>2484</td>\n",
" <td>2.928071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>1377</td>\n",
" <td>1.623170</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>1143</td>\n",
" <td>1.347337</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <td>654</td>\n",
" <td>0.770917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.250000</th>\n",
" <td>366</td>\n",
" <td>0.431431</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.479492</th>\n",
" <th>0.250000</th>\n",
" <td>6</td>\n",
" <td>0.007073</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.250000</th>\n",
" <td>6</td>\n",
" <td>0.007073</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.591797 0.333496 29487 34.758469\n",
"1.000000 0.500000 13440 15.842705\n",
"0.708984 0.333496 12495 14.728764\n",
"0.386719 0.333496 9057 10.676144\n",
" 0.166748 5265 6.206238\n",
"0.708984 0.666992 4608 5.431784\n",
"1.000000 1.000000 4446 5.240823\n",
"0.591797 0.166748 2484 2.928071\n",
"NaN NaN 1377 1.623170\n",
"0.958984 0.500000 1143 1.347337\n",
" 1.000000 654 0.770917\n",
"1.000000 0.250000 366 0.431431\n",
"0.479492 0.250000 6 0.007073\n",
"0.708984 0.250000 6 0.007073"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster b:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>16184</td>\n",
" <td>31.982926</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>9790</td>\n",
" <td>19.347061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>8448</td>\n",
" <td>16.694992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>5502</td>\n",
" <td>10.873088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>3832</td>\n",
" <td>7.572823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>2214</td>\n",
" <td>4.375321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>2152</td>\n",
" <td>4.252796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>816</td>\n",
" <td>1.612584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>618</td>\n",
" <td>1.221296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.666992</th>\n",
" <td>500</td>\n",
" <td>0.988103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>412</td>\n",
" <td>0.814197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>134</td>\n",
" <td>0.264812</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.591797 0.333496 16184 31.982926\n",
"1.000000 0.500000 9790 19.347061\n",
"0.708984 0.333496 8448 16.694992\n",
"0.958984 0.500000 5502 10.873088\n",
"0.708984 0.666992 3832 7.572823\n",
"1.000000 1.000000 2214 4.375321\n",
"0.591797 0.166748 2152 4.252796\n",
"0.386719 0.333496 816 1.612584\n",
"0.958984 1.000000 618 1.221296\n",
"0.591797 0.666992 500 0.988103\n",
"0.386719 0.166748 412 0.814197\n",
"NaN NaN 134 0.264812"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster c:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.166748</th>\n",
" <td>15754</td>\n",
" <td>24.439204</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>11104</td>\n",
" <td>17.225652</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>10404</td>\n",
" <td>16.139741</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>6634</td>\n",
" <td>10.291334</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>5654</td>\n",
" <td>8.771059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>3580</td>\n",
" <td>5.553660</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>2900</td>\n",
" <td>4.498774</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>2736</td>\n",
" <td>4.244361</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.250000</th>\n",
" <td>2132</td>\n",
" <td>3.307375</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>1466</td>\n",
" <td>2.274208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>766</td>\n",
" <td>1.188297</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>620</td>\n",
" <td>0.961807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.250000</th>\n",
" <td>600</td>\n",
" <td>0.930781</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>112</td>\n",
" <td>0.173746</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.259277 0.166748 15754 24.439204\n",
"0.386719 0.333496 11104 17.225652\n",
"0.591797 0.333496 10404 16.139741\n",
"0.958984 0.500000 6634 10.291334\n",
"1.000000 0.500000 5654 8.771059\n",
"0.386719 0.166748 3580 5.553660\n",
"0.708984 0.666992 2900 4.498774\n",
"1.000000 1.000000 2736 4.244361\n",
" 0.250000 2132 3.307375\n",
"NaN NaN 1466 2.274208\n",
"0.958984 1.000000 766 1.188297\n",
"0.708984 0.333496 620 0.961807\n",
"0.958984 0.250000 600 0.930781\n",
"0.591797 0.166748 112 0.173746"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster d:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>28394</td>\n",
" <td>45.288376</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>8402</td>\n",
" <td>13.401174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.166748</th>\n",
" <td>8020</td>\n",
" <td>12.791885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>5806</td>\n",
" <td>9.260559</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>4380</td>\n",
" <td>6.986092</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.333496</th>\n",
" <td>3924</td>\n",
" <td>6.258772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>2548</td>\n",
" <td>4.064055</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>498</td>\n",
" <td>0.794309</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.333496</th>\n",
" <td>426</td>\n",
" <td>0.679469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>292</td>\n",
" <td>0.465739</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.250000</th>\n",
" <td>4</td>\n",
" <td>0.006380</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.500000</th>\n",
" <td>2</td>\n",
" <td>0.003190</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.591797 0.333496 28394 45.288376\n",
"0.386719 0.333496 8402 13.401174\n",
"0.259277 0.166748 8020 12.791885\n",
"0.386719 0.166748 5806 9.260559\n",
"0.708984 0.666992 4380 6.986092\n",
" 0.333496 3924 6.258772\n",
"0.591797 0.166748 2548 4.064055\n",
"NaN NaN 498 0.794309\n",
"0.259277 0.333496 426 0.679469\n",
"1.000000 0.500000 292 0.465739\n",
"0.591797 0.250000 4 0.006380\n",
"0.708984 0.500000 2 0.003190"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster e:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.166748</th>\n",
" <td>38452</td>\n",
" <td>48.202377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>11786</td>\n",
" <td>14.774608</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>8646</td>\n",
" <td>10.838389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>7606</td>\n",
" <td>9.534674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>5586</td>\n",
" <td>7.002457</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>4470</td>\n",
" <td>5.603470</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.259277</th>\n",
" <th>0.333496</th>\n",
" <td>1268</td>\n",
" <td>1.589530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.083374</th>\n",
" <td>634</td>\n",
" <td>0.794765</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>536</td>\n",
" <td>0.671915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>324</td>\n",
" <td>0.406158</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">1.000000</th>\n",
" <th>0.250000</th>\n",
" <td>268</td>\n",
" <td>0.335957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <td>138</td>\n",
" <td>0.172993</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.500000</th>\n",
" <th>0.062500</th>\n",
" <td>54</td>\n",
" <td>0.067693</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.250000</th>\n",
" <td>4</td>\n",
" <td>0.005014</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.259277 0.166748 38452 48.202377\n",
"0.708984 0.333496 11786 14.774608\n",
"0.958984 0.500000 8646 10.838389\n",
"0.708984 0.666992 7606 9.534674\n",
"1.000000 0.500000 5586 7.002457\n",
"0.386719 0.166748 4470 5.603470\n",
"0.259277 0.333496 1268 1.589530\n",
" 0.083374 634 0.794765\n",
"NaN NaN 536 0.671915\n",
"0.591797 0.333496 324 0.406158\n",
"1.000000 0.250000 268 0.335957\n",
" 1.000000 138 0.172993\n",
"0.500000 0.062500 54 0.067693\n",
" 0.250000 4 0.005014"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster f:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>41340</td>\n",
" <td>66.396839</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>6878</td>\n",
" <td>11.046866</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>5564</td>\n",
" <td>8.936430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>2172</td>\n",
" <td>3.488484</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>1544</td>\n",
" <td>2.479843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>1432</td>\n",
" <td>2.299958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>1244</td>\n",
" <td>1.998008</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.250000</th>\n",
" <td>792</td>\n",
" <td>1.272044</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>536</td>\n",
" <td>0.860878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>398</td>\n",
" <td>0.639234</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>344</td>\n",
" <td>0.552504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.500000</th>\n",
" <th>0.250000</th>\n",
" <td>18</td>\n",
" <td>0.028910</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"1.000000 0.500000 41340 66.396839\n",
"0.708984 0.333496 6878 11.046866\n",
"0.591797 0.333496 5564 8.936430\n",
"0.958984 0.500000 2172 3.488484\n",
"0.386719 0.166748 1544 2.479843\n",
"NaN NaN 1432 2.299958\n",
"0.708984 0.666992 1244 1.998008\n",
"1.000000 0.250000 792 1.272044\n",
"0.958984 1.000000 536 0.860878\n",
"0.386719 0.333496 398 0.639234\n",
"1.000000 1.000000 344 0.552504\n",
"0.500000 0.250000 18 0.028910"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster g:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.166748</th>\n",
" <td>15852</td>\n",
" <td>22.892958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>11808</td>\n",
" <td>17.052741</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>7968</td>\n",
" <td>11.507134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>7830</td>\n",
" <td>11.307839</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>4690</td>\n",
" <td>6.773150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>4258</td>\n",
" <td>6.149269</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>4196</td>\n",
" <td>6.059731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>3864</td>\n",
" <td>5.580267</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>2606</td>\n",
" <td>3.763503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.250000</th>\n",
" <td>2100</td>\n",
" <td>3.032754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>1566</td>\n",
" <td>2.261568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.333496</th>\n",
" <td>1330</td>\n",
" <td>1.920744</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>778</td>\n",
" <td>1.123563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>378</td>\n",
" <td>0.545896</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.500000</th>\n",
" <th>0.250000</th>\n",
" <td>12</td>\n",
" <td>0.017330</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.479492</th>\n",
" <th>0.250000</th>\n",
" <td>6</td>\n",
" <td>0.008665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.500000</th>\n",
" <td>2</td>\n",
" <td>0.002888</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"0.259277 0.166748 15852 22.892958\n",
"1.000000 0.500000 11808 17.052741\n",
"0.708984 0.333496 7968 11.507134\n",
"0.591797 0.333496 7830 11.307839\n",
"0.386719 0.166748 4690 6.773150\n",
"0.708984 0.666992 4258 6.149269\n",
"0.958984 0.500000 4196 6.059731\n",
"0.386719 0.333496 3864 5.580267\n",
"0.591797 0.166748 2606 3.763503\n",
"1.000000 0.250000 2100 3.032754\n",
"NaN NaN 1566 2.261568\n",
"0.259277 0.333496 1330 1.920744\n",
"0.958984 1.000000 778 1.123563\n",
"1.000000 1.000000 378 0.545896\n",
"0.500000 0.250000 12 0.017330\n",
"0.479492 0.250000 6 0.008665\n",
" 0.500000 2 0.002888"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"For cluster h:\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>36324</td>\n",
" <td>61.946178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>4826</td>\n",
" <td>8.230158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>3682</td>\n",
" <td>6.279205</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>2858</td>\n",
" <td>4.873973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>2596</td>\n",
" <td>4.427163</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>2030</td>\n",
" <td>3.461919</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.250000</th>\n",
" <td>1892</td>\n",
" <td>3.226577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>1720</td>\n",
" <td>2.933251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>1244</td>\n",
" <td>2.121491</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>766</td>\n",
" <td>1.306320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.666992</th>\n",
" <td>500</td>\n",
" <td>0.852689</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>200</td>\n",
" <td>0.341076</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"1.000000 0.500000 36324 61.946178\n",
"0.591797 0.333496 4826 8.230158\n",
"0.708984 0.333496 3682 6.279205\n",
"0.958984 0.500000 2858 4.873973\n",
"0.386719 0.333496 2596 4.427163\n",
"1.000000 1.000000 2030 3.461919\n",
" 0.250000 1892 3.226577\n",
"NaN NaN 1720 2.933251\n",
"0.386719 0.166748 1244 2.121491\n",
"0.708984 0.666992 766 1.306320\n",
"0.591797 0.666992 500 0.852689\n",
"0.958984 1.000000 200 0.341076"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" For all clusters:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>machine_id</th>\n",
" <th>machine_id_perc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>capacity.cpus</th>\n",
" <th>capacity.memory</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.500000</th>\n",
" <td>124234</td>\n",
" <td>23.329891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.333496</th>\n",
" <td>103013</td>\n",
" <td>19.344801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.166748</th>\n",
" <td>78078</td>\n",
" <td>14.662260</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.333496</th>\n",
" <td>55801</td>\n",
" <td>10.478864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.333496</th>\n",
" <td>36237</td>\n",
" <td>6.804943</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.500000</th>\n",
" <td>31151</td>\n",
" <td>5.849843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.666992</th>\n",
" <td>29594</td>\n",
" <td>5.557454</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.386719</th>\n",
" <th>0.166748</th>\n",
" <td>27011</td>\n",
" <td>5.072393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>1.000000</th>\n",
" <td>12286</td>\n",
" <td>2.307187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.166748</th>\n",
" <td>9902</td>\n",
" <td>1.859496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td>8729</td>\n",
" <td>1.639218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.000000</th>\n",
" <th>0.250000</th>\n",
" <td>7550</td>\n",
" <td>1.417814</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>1.000000</th>\n",
" <td>3552</td>\n",
" <td>0.667030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.333496</th>\n",
" <td>3024</td>\n",
" <td>0.567877</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.666992</th>\n",
" <td>1000</td>\n",
" <td>0.187790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.259277</th>\n",
" <th>0.083374</th>\n",
" <td>634</td>\n",
" <td>0.119059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.958984</th>\n",
" <th>0.250000</th>\n",
" <td>600</td>\n",
" <td>0.112674</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0.500000</th>\n",
" <th>0.062500</th>\n",
" <td>54</td>\n",
" <td>0.010141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.250000</th>\n",
" <td>34</td>\n",
" <td>0.006385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.479492</th>\n",
" <th>0.250000</th>\n",
" <td>12</td>\n",
" <td>0.002253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.250000</th>\n",
" <td>6</td>\n",
" <td>0.001127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.591797</th>\n",
" <th>0.250000</th>\n",
" <td>4</td>\n",
" <td>0.000751</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.708984</th>\n",
" <th>0.500000</th>\n",
" <td>2</td>\n",
" <td>0.000376</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.479492</th>\n",
" <th>0.500000</th>\n",
" <td>2</td>\n",
" <td>0.000376</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" machine_id machine_id_perc\n",
"capacity.cpus capacity.memory \n",
"1.000000 0.500000 124234 23.329891\n",
"0.591797 0.333496 103013 19.344801\n",
"0.259277 0.166748 78078 14.662260\n",
"0.708984 0.333496 55801 10.478864\n",
"0.386719 0.333496 36237 6.804943\n",
"0.958984 0.500000 31151 5.849843\n",
"0.708984 0.666992 29594 5.557454\n",
"0.386719 0.166748 27011 5.072393\n",
"1.000000 1.000000 12286 2.307187\n",
"0.591797 0.166748 9902 1.859496\n",
"NaN NaN 8729 1.639218\n",
"1.000000 0.250000 7550 1.417814\n",
"0.958984 1.000000 3552 0.667030\n",
"0.259277 0.333496 3024 0.567877\n",
"0.591797 0.666992 1000 0.187790\n",
"0.259277 0.083374 634 0.119059\n",
"0.958984 0.250000 600 0.112674\n",
"0.500000 0.062500 54 0.010141\n",
" 0.250000 34 0.006385\n",
"0.479492 0.250000 12 0.002253\n",
"0.708984 0.250000 6 0.001127\n",
"0.591797 0.250000 4 0.000751\n",
"0.708984 0.500000 2 0.000376\n",
"0.479492 0.500000 2 0.000376"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Generate machine configurations table per cluster + a global table\n",
"\n",
"df = df[df.columns.difference(['missing_data_reason'])]\n",
"\n",
"for l in \"abcdefgh\":\n",
" print(\"\\nFor cluster \" + l + \":\\n\")\n",
" do_group_by(df[df.cluster==l])\n",
"\n",
"print(\"\\n For all clusters:\")\n",
"do_group_by(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}