bachelorThesis/task_slowdown/task_slowdown_table.py

127 lines
3.4 KiB
Python

#!/usr/bin/env python
# coding: utf-8
# # Task slowdown
import json
import sys
import gzip
import pandas
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display, HTML
columns = ["priority", "n_fsh", "n_non", "finished%", "c_zero_end", "s_last", "m_last", "s_all", "m_all", "s_slow", "m_slow"]
CLUSTERS = "abcdefgh"
DIR = "/home/claudio/google_2019/thesis_queries/task_slowdown/"
df = {}
for cluster in CLUSTERS:
wc = 0
tc = {}
s_slowdown = 0
# Loading
print("Loading cluster " + cluster + "...")
data = None
with gzip.open(DIR + cluster + "_state_changes.json.gz", "r") as f:
data = json.load(f)
print("Done loading")
# Computation
priorities = sorted(set(data["val"].keys()).union(set(data["non"].keys())), key=lambda x: int(x))
table = {}
for c in columns:
table[c] = []
def add(col, val):
table[col].append(val)
def empty_row():
add("m_last", None)
add("m_all", None)
add("m_slow", None)
add("s_last", None)
add("s_all", None)
add("s_slow", None)
add("c_zero_end", None)
for priority in priorities:
print("Priority " + priority)
add("priority", priority)
n_not_finished = 0 if priority not in data["non"] else data["non"][priority]
n_finished = 0 if priority not in data["val"] else len(data["val"][priority])
add("n_fsh", n_finished)
add("n_non", n_not_finished)
add("finished%", n_finished / (n_not_finished + n_finished))
if n_finished == 0:
empty_row()
else:
s = 0
c = 0
ss = 0
sum_zero_end = 0
for task in data["val"][priority]:
idx_last = len(task) - 1
t = 0
while idx_last >= 0 and task[idx_last][1][-1] != 6:
idx_last -= 1
t += 1
if idx_last < 0:
#print("weird")
wc += 1
n_finished -= 1
n_not_finished += 1
continue
if t in tc:
tc[t] += 1
else:
tc[1] = 1
last_slice = task[idx_last]
s += last_slice[0]
local_sum = 0
for exec_slice in task:
local_sum += exec_slice[0]
c += 1
if last_slice[0] > 0:
s_slowdown += (local_sum / last_slice[0])
else:
sum_zero_end += 1
ss += local_sum
if n_finished == 0:
empty_row()
continue
add("c_zero_end", sum_zero_end)
add("s_last", s)
add("m_last", s / n_finished)
add("s_all", ss)
add("m_all", ss / c)
add("s_slow", s_slowdown)
add("m_slow", s_slowdown / n_finished)
df[cluster] = pandas.DataFrame(table, columns=columns)
df[cluster].to_csv(DIR + "/" + cluster + "_slowdown_table.csv")
print("Done csv")