# Task slowdown

In [3]:
import json
import sys
import gzip
import pandas
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display, HTML

### Legend for columns:
- *n_fsh*: number of jobs that terminated with status 6 (FINISH)
- *n_non*: number of jobs that did not terminate with status 6 or did not terminate at all
- *finished%*: `n_fsh` / (`n_fsh` + `n_non`)
- *c_zero_end*: count of number of **Finished** jobs that have a last execution time of `0` 
- *s_last*: sum of execution times for last events
- *m_last*: mean execution time for last event
- *s_all*: sum of all execution times for all events
- *m_all*: mean execution time for all events
- *s_slow*: sum of **slowdown** values computed for each job: `job_slowdown` = sum(`exec_time`) / last(`exec_time`)
- *m_slow*: mean job-wise **slowdown** value, i.e. `s_slow` / `n_fsh`
- *m_slow_2*: priority-wise mean **slowdown**, i.e. `s_all` / `s_last`

In [24]:
CLUSTERS = "abcdefgh"
DIR = "/Users/maggicl/Git/bachelorThesis/task_slowdown/"

df = {}
dftotal = None

def tier(p):
    if p <= 99:
        return "Free"
    elif p <= 115:
        return "Best effort batch"
    elif p <= 119:
        return "Mid"
    elif p <= 359:
        return "Production"
    else:
        return "Monitoring"

for cluster in CLUSTERS:
    print("\\taskslowdown{Cluster " + cluster.upper() + "}{")
    df[cluster] = pandas.read_csv(DIR + "/" + cluster + "_slowdown_table.csv")
    df[cluster]["tier"] = df[cluster]["priority"].apply(tier)
    df[cluster] = df[cluster][df[cluster]["priority"].ge(0)]
    df[cluster] = df[cluster][df[cluster]["s_last"] > 0]
    del df[cluster]["c_zero_end"]
    del df[cluster]["Unnamed: 0"]
    
    df[cluster] = df[cluster].groupby("tier").sum().reset_index()
    del df[cluster]["priority"]
    
    df[cluster]["m_slow_2"] = (df[cluster]["s_all"] / df[cluster]["s_last"]).round(2)
    del df[cluster]["s_slow"]
    del df[cluster]["m_slow"]
    df[cluster]["m_all"] = (df[cluster]["m_all"] / 1000000).round(0)
    df[cluster]["m_last"] = (df[cluster]["m_last"] / 1000000).round(0)

    df2 = df[cluster].copy()
    df[cluster]["c"] = df2["m_all"] / df2["s_all"]
    del df2["s_all"]
    del df2["s_last"]
    del df2["n_fsh"]
    del df2["n_non"]
    #del df2["c"]
    df2["finished%"] = (df2["finished%"] * 100).round(2).astype(str) + "%"
    print(df2.to_latex(index=False))
    print("}")
    
    if dftotal is None:
        dftotal = df[cluster]
    else:
        dftotal = dftotal.append(df[cluster])
        
dftotal = dftotal.groupby("tier").sum().reset_index()
dftotal["m_slow_2"] = (dftotal["s_all"] / dftotal["s_last"]) 
dftotal["finished%"] = (dftotal["n_fsh"]) / (dftotal["n_fsh"] + dftotal["n_non"])
dftotal["m_last"] = (dftotal["s_last"] / (dftotal["n_fsh"] * 1000000)).round(0)
dftotal["m_all"] = (dftotal["s_all"] * dftotal["c"] / 1000000).round(0)
del dftotal["s_all"]
del dftotal["s_last"]
del dftotal["n_fsh"]
del dftotal["n_non"]
del dftotal["c"]

df2 = dftotal.copy()
df2["m_all"] = df2["m_all"].round(0)
df2["m_last"] = df2["m_last"].round(0)
#del df2["c"]
df2["finished%"] = (df2["finished%"] * 100).round(2).astype(str) + "%"
print(df2.to_latex(index=False))
print("}")


\taskslowdown{Cluster A}{
\begin{tabular}{llrrr}
\toprule
              tier & finished\% &     m\_last &      m\_all &  m\_slow\_2 \\
\midrule
 Best effort batch &   212.62\% &    71108.0 &    14201.0 &      5.17 \\
              Free &     0.33\% &     5769.0 &     1203.0 &     82.97 \\
               Mid &    46.22\% &     8510.0 &     9135.0 &      1.16 \\
        Monitoring &     2.82\% &  1200998.0 &  1054458.0 &      2.86 \\
        Production &    27.21\% &     4546.0 &    16845.0 &      4.12 \\
\bottomrule
\end{tabular}

}
\taskslowdown{Cluster B}{
\begin{tabular}{llrrr}
\toprule
              tier & finished\% &     m\_last &      m\_all &  m\_slow\_2 \\
\midrule
 Best effort batch &    71.84\% &  1018454.0 &   550288.0 &      8.47 \\
              Free &    45.21\% &    12047.0 &     5588.0 &      1.18 \\
               Mid &     8.82\% &   225147.0 &   336262.0 &      1.11 \\
        Monitoring &     4.12\% &  2627612.0 &  2024679.0 &      1.51 \\
        Production &    30

In [5]:
wc

NameError: name 'wc' is not defined

In [None]:
tc