# Task slowdown

In [5]:
import json
import sys
import gzip
import pandas
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display, HTML

### Legend for columns:
- *n_fsh*: number of jobs that terminated with status 6 (FINISH)
- *n_non*: number of jobs that did not terminate with status 6 or did not terminate at all
- *finished%*: `n_fsh` / (`n_fsh` + `n_non`)
- *c_zero_end*: count of number of **Finished** jobs that have a last execution time of `0` 
- *s_last*: sum of execution times for last events
- *m_last*: mean execution time for last event
- *s_all*: sum of all execution times for all events
- *m_all*: mean execution time for all events
- *s_slow*: sum of **slowdown** values computed for each job: `job_slowdown` = sum(`exec_time`) / last(`exec_time`)
- *m_slow*: mean job-wise **slowdown** value, i.e. `s_slow` / `n_fsh`
- *m_slow_2*: priority-wise mean **slowdown**, i.e. `s_all` / `s_last`

In [38]:
CLUSTERS = "abcdefgh"
DIR = "/Users/maggicl/Git/bachelorThesis/task_slowdown/"

df = {}
dftotal = None

for cluster in CLUSTERS:
    print("\\taskslowdown{Cluster " + cluster.upper() + "}{")
    df[cluster] = pandas.read_csv(DIR + "/" + cluster + "_slowdown_table.csv")
    df[cluster]["m_slow_2"] = (df[cluster]["s_all"] / df[cluster]["s_last"]).round(2)
    del df[cluster]["s_slow"]
    del df[cluster]["m_slow"]
    df[cluster]["m_all"] = (df[cluster]["m_all"] / 1000000).round(0)
    df[cluster]["m_last"] = (df[cluster]["m_last"] / 1000000).round(0)
    del df[cluster]["c_zero_end"]
    del df[cluster]["Unnamed: 0"]
    
    
    df2 = df[cluster].copy()
    df[cluster]["c"] = df2["m_all"] / df2["s_all"]
    del df2["s_all"]
    del df2["s_last"]
    del df2["n_fsh"]
    del df2["n_non"]
    #del df2["c"]
    df2["finished%"] = (df2["finished%"] * 100).round(2).astype(str) + "%"
    print(df2.to_latex(index=False))
    print("}")
    
    if dftotal is None:
        dftotal = df[cluster]
    else:
        dftotal = dftotal.append(df[cluster])
        
dftotal = dftotal.groupby("priority").sum().reset_index()
dftotal["m_slow_2"] = (dftotal["s_all"] / dftotal["s_last"]) 
dftotal["finished%"] = (dftotal["n_fsh"]) / (dftotal["n_fsh"] + dftotal["n_non"])
dftotal["m_last"] = (dftotal["s_last"] / (dftotal["n_fsh"] * 1000000)).round(0)
dftotal["m_all"] = (dftotal["s_all"] * dftotal["c"] / 1000000).round(0)
del dftotal["s_all"]
del dftotal["s_last"]
del dftotal["n_fsh"]
del dftotal["n_non"]
del dftotal["c"]

df2 = dftotal.copy()
df2["m_all"] = df2["m_all"].round(0)
df2["m_last"] = df2["m_last"].round(0)
#del df2["c"]
df2["finished%"] = (df2["finished%"] * 100).round(2).astype(str) + "%"
print(df2.to_latex(index=False))
print("}")


\taskslowdown{Cluster A}{
\begin{tabular}{rlrrr}
\toprule
 priority & finished\% &    m\_last &     m\_all &  m\_slow\_2 \\
\midrule
       -1 &    10.62\% &     783.0 &     593.0 &      1.10 \\
       24 &      0.0\% &       NaN &       NaN &       NaN \\
       25 &     0.33\% &    5769.0 &    1203.0 &     82.97 \\
      100 &      0.0\% &       NaN &       NaN &       NaN \\
      101 &    81.92\% &   63305.0 &    6346.0 &     30.80 \\
      102 &      0.0\% &       NaN &       NaN &       NaN \\
      103 &    14.99\% &    3074.0 &    3033.0 &      1.13 \\
      105 &    57.68\% &    1666.0 &    1750.0 &      1.08 \\
      107 &    53.93\% &    1022.0 &    1031.0 &      1.02 \\
      114 &      0.0\% &       NaN &       NaN &       NaN \\
      115 &     4.11\% &    2041.0 &    2042.0 &      1.00 \\
      116 &    13.05\% &    4443.0 &    4443.0 &      1.03 \\
      117 &      0.0\% &       NaN &       NaN &       NaN \\
      118 &    11.91\% &    1817.0 &    1814.0 &      1.00 \\

In [7]:
wc

NameError: name 'wc' is not defined

In [None]:
tc