127 lines
3.4 KiB
Python
127 lines
3.4 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# # Task slowdown
|
|
|
|
import json
|
|
import sys
|
|
import gzip
|
|
import pandas
|
|
import seaborn as sns
|
|
import matplotlib as mpl
|
|
import matplotlib.pyplot as plt
|
|
from IPython.display import display, HTML
|
|
|
|
columns = ["priority", "n_fsh", "n_non", "finished%", "c_zero_end", "s_last", "m_last", "s_all", "m_all", "s_slow", "m_slow"]
|
|
CLUSTERS = "abcdefgh"
|
|
DIR = "/home/claudio/google_2019/thesis_queries/task_slowdown/"
|
|
|
|
df = {}
|
|
|
|
for cluster in CLUSTERS:
|
|
wc = 0
|
|
tc = {}
|
|
s_slowdown = 0
|
|
|
|
# Loading
|
|
print("Loading cluster " + cluster + "...")
|
|
data = None
|
|
|
|
with gzip.open(DIR + cluster + "_state_changes.json.gz", "r") as f:
|
|
data = json.load(f)
|
|
|
|
print("Done loading")
|
|
|
|
# Computation
|
|
priorities = sorted(set(data["val"].keys()).union(set(data["non"].keys())), key=lambda x: int(x))
|
|
table = {}
|
|
for c in columns:
|
|
table[c] = []
|
|
|
|
def add(col, val):
|
|
table[col].append(val)
|
|
|
|
def empty_row():
|
|
add("m_last", None)
|
|
add("m_all", None)
|
|
add("m_slow", None)
|
|
add("s_last", None)
|
|
add("s_all", None)
|
|
add("s_slow", None)
|
|
add("c_zero_end", None)
|
|
|
|
for priority in priorities:
|
|
print("Priority " + priority)
|
|
add("priority", priority)
|
|
|
|
n_not_finished = 0 if priority not in data["non"] else data["non"][priority]
|
|
n_finished = 0 if priority not in data["val"] else len(data["val"][priority])
|
|
|
|
add("n_fsh", n_finished)
|
|
add("n_non", n_not_finished)
|
|
add("finished%", n_finished / (n_not_finished + n_finished))
|
|
|
|
if n_finished == 0:
|
|
empty_row()
|
|
else:
|
|
s = 0
|
|
c = 0
|
|
ss = 0
|
|
sum_zero_end = 0
|
|
for task in data["val"][priority]:
|
|
|
|
idx_last = len(task) - 1
|
|
t = 0
|
|
|
|
while idx_last >= 0 and task[idx_last][1][-1] != 6:
|
|
idx_last -= 1
|
|
t += 1
|
|
|
|
if idx_last < 0:
|
|
#print("weird")
|
|
wc += 1
|
|
n_finished -= 1
|
|
n_not_finished += 1
|
|
continue
|
|
|
|
if t in tc:
|
|
tc[t] += 1
|
|
else:
|
|
tc[1] = 1
|
|
|
|
last_slice = task[idx_last]
|
|
|
|
s += last_slice[0]
|
|
|
|
local_sum = 0
|
|
for exec_slice in task:
|
|
local_sum += exec_slice[0]
|
|
c += 1
|
|
|
|
if last_slice[0] > 0:
|
|
s_slowdown += (local_sum / last_slice[0])
|
|
else:
|
|
sum_zero_end += 1
|
|
|
|
ss += local_sum
|
|
|
|
if n_finished == 0:
|
|
empty_row()
|
|
continue
|
|
|
|
add("c_zero_end", sum_zero_end)
|
|
add("s_last", s)
|
|
add("m_last", s / n_finished)
|
|
add("s_all", ss)
|
|
add("m_all", ss / c)
|
|
add("s_slow", s_slowdown)
|
|
add("m_slow", s_slowdown / n_finished)
|
|
|
|
df[cluster] = pandas.DataFrame(table, columns=columns)
|
|
df[cluster].to_csv(DIR + "/" + cluster + "_slowdown_table.csv")
|
|
|
|
print("Done csv")
|
|
|
|
|
|
|
|
|