# Machine configurations

This query returns all the distinct NCU/NMU configurations in the borg clusters, including how many machines ids match for any specific configuration.

Please note that for simplicity's sake the we are technically counting the number of ADD or UPDATE events for each configuration, and not the actual count of machines. Therefore a machine configuration may change over time and count twice or more.

In [1]:
import pandas as pd

# For pretty printing
from IPython.display import display

# Disables row ellipsis
pd.set_option('display.max_rows', 200)

In [3]:
# Load all machine event rows in a single DataFrame, and add a "cluster" column to differentiate
# between clusters
df = None
for l in "abcdefgh":
    dfl = pd.read_csv("~/google_2019/machine_events/" + l + "_machine_events.csv")
    dfl["cluster"] = l
    if df is None:
        df = dfl
    else:
        df = pd.concat([df, dfl], axis=0)

# Filter only ADD or UPDATE events
df = df[(df.type==1)|(df.type==3)]

# P.S: ADD=1, REMOVE=2, UPDATE=3
        
df = df[["capacity.cpus", "capacity.memory", "cluster", 
         "missing_data_reason", "machine_id"]]

In [4]:
# Checking if we need to deal with particular missing data
# No columns returned, so missing data can be safely ignored
df.groupby(by=["missing_data_reason"], dropna=False).count()

Unnamed: 0_level_0,capacity.cpus,capacity.memory,cluster,machine_id
missing_data_reason,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
,523781,523781,532510,532510


In [5]:
def do_group_by(df):
    # Exclude "cluster" column and perform group-by
    dfg = df[df.columns.difference(['cluster'])]. \
          groupby(by=["capacity.cpus","capacity.memory"], 
        dropna=False).count()
    
    # Compute relative number of machines
    total_machines = dfg['machine_id'].sum()
    dfg["machine_id_perc"] = dfg["machine_id"] * 100 / total_machines
    
    # Sort descending
    dfg = dfg.sort_values("machine_id_perc", ascending=False)
    
    display(dfg)

In [6]:
# Generate machine configurations table per cluster + a global table

df = df[df.columns.difference(['missing_data_reason'])]

for l in "abcdefgh":
    print("\nFor cluster " + l + ":\n")
    do_group_by(df[df.cluster==l])

print("\n For all clusters:")
do_group_by(df)


For cluster a:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.591797,0.333496,29487,34.758469
1.0,0.5,13440,15.842705
0.708984,0.333496,12495,14.728764
0.386719,0.333496,9057,10.676144
0.386719,0.166748,5265,6.206238
0.708984,0.666992,4608,5.431784
1.0,1.0,4446,5.240823
0.591797,0.166748,2484,2.928071
,,1377,1.62317
0.958984,0.5,1143,1.347337



For cluster b:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.591797,0.333496,16184,31.982926
1.0,0.5,9790,19.347061
0.708984,0.333496,8448,16.694992
0.958984,0.5,5502,10.873088
0.708984,0.666992,3832,7.572823
1.0,1.0,2214,4.375321
0.591797,0.166748,2152,4.252796
0.386719,0.333496,816,1.612584
0.958984,1.0,618,1.221296
0.591797,0.666992,500,0.988103



For cluster c:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.259277,0.166748,15754,24.439204
0.386719,0.333496,11104,17.225652
0.591797,0.333496,10404,16.139741
0.958984,0.5,6634,10.291334
1.0,0.5,5654,8.771059
0.386719,0.166748,3580,5.55366
0.708984,0.666992,2900,4.498774
1.0,1.0,2736,4.244361
1.0,0.25,2132,3.307375
,,1466,2.274208



For cluster d:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.591797,0.333496,28394,45.288376
0.386719,0.333496,8402,13.401174
0.259277,0.166748,8020,12.791885
0.386719,0.166748,5806,9.260559
0.708984,0.666992,4380,6.986092
0.708984,0.333496,3924,6.258772
0.591797,0.166748,2548,4.064055
,,498,0.794309
0.259277,0.333496,426,0.679469
1.0,0.5,292,0.465739



For cluster e:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.259277,0.166748,38452,48.202377
0.708984,0.333496,11786,14.774608
0.958984,0.5,8646,10.838389
0.708984,0.666992,7606,9.534674
1.0,0.5,5586,7.002457
0.386719,0.166748,4470,5.60347
0.259277,0.333496,1268,1.58953
0.259277,0.083374,634,0.794765
,,536,0.671915
0.591797,0.333496,324,0.406158



For cluster f:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,0.5,41340,66.396839
0.708984,0.333496,6878,11.046866
0.591797,0.333496,5564,8.93643
0.958984,0.5,2172,3.488484
0.386719,0.166748,1544,2.479843
,,1432,2.299958
0.708984,0.666992,1244,1.998008
1.0,0.25,792,1.272044
0.958984,1.0,536,0.860878
0.386719,0.333496,398,0.639234



For cluster g:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
0.259277,0.166748,15852,22.892958
1.0,0.5,11808,17.052741
0.708984,0.333496,7968,11.507134
0.591797,0.333496,7830,11.307839
0.386719,0.166748,4690,6.77315
0.708984,0.666992,4258,6.149269
0.958984,0.5,4196,6.059731
0.386719,0.333496,3864,5.580267
0.591797,0.166748,2606,3.763503
1.0,0.25,2100,3.032754



For cluster h:



Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,0.5,36324,61.946178
0.591797,0.333496,4826,8.230158
0.708984,0.333496,3682,6.279205
0.958984,0.5,2858,4.873973
0.386719,0.333496,2596,4.427163
1.0,1.0,2030,3.461919
1.0,0.25,1892,3.226577
,,1720,2.933251
0.386719,0.166748,1244,2.121491
0.708984,0.666992,766,1.30632



 For all clusters:


Unnamed: 0_level_0,Unnamed: 1_level_0,machine_id,machine_id_perc
capacity.cpus,capacity.memory,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,0.5,124234,23.329891
0.591797,0.333496,103013,19.344801
0.259277,0.166748,78078,14.66226
0.708984,0.333496,55801,10.478864
0.386719,0.333496,36237,6.804943
0.958984,0.5,31151,5.849843
0.708984,0.666992,29594,5.557454
0.386719,0.166748,27011,5.072393
1.0,1.0,12286,2.307187
0.591797,0.166748,9902,1.859496
