#!/usr/bin/env python # coding: utf-8 # # Task slowdown import json import sys import gzip import pandas import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt from IPython.display import display, HTML columns = ["priority", "n_fsh", "n_non", "finished%", "c_zero_end", "s_last", "m_last", "s_all", "m_all", "s_slow", "m_slow"] CLUSTERS = "abcdefgh" DIR = "/home/claudio/google_2019/thesis_queries/task_slowdown/" df = {} for cluster in CLUSTERS: wc = 0 tc = {} s_slowdown = 0 # Loading print("Loading cluster " + cluster + "...") data = None with gzip.open(DIR + cluster + "_state_changes.json.gz", "r") as f: data = json.load(f) print("Done loading") # Computation priorities = sorted(set(data["val"].keys()).union(set(data["non"].keys())), key=lambda x: int(x)) table = {} for c in columns: table[c] = [] def add(col, val): table[col].append(val) def empty_row(): add("m_last", None) add("m_all", None) add("m_slow", None) add("s_last", None) add("s_all", None) add("s_slow", None) add("c_zero_end", None) for priority in priorities: print("Priority " + priority) add("priority", priority) n_not_finished = 0 if priority not in data["non"] else data["non"][priority] n_finished = 0 if priority not in data["val"] else len(data["val"][priority]) add("n_fsh", n_finished) add("n_non", n_not_finished) add("finished%", n_finished / (n_not_finished + n_finished)) if n_finished == 0: empty_row() else: s = 0 c = 0 ss = 0 sum_zero_end = 0 for task in data["val"][priority]: idx_last = len(task) - 1 t = 0 while idx_last >= 0 and task[idx_last][1][-1] != 6: idx_last -= 1 t += 1 if idx_last < 0: #print("weird") wc += 1 n_finished -= 1 n_not_finished += 1 continue if t in tc: tc[t] += 1 else: tc[1] = 1 last_slice = task[idx_last] s += last_slice[0] local_sum = 0 for exec_slice in task: local_sum += exec_slice[0] c += 1 if last_slice[0] > 0: s_slowdown += (local_sum / last_slice[0]) else: sum_zero_end += 1 ss += local_sum if n_finished == 0: empty_row() continue add("c_zero_end", sum_zero_end) add("s_last", s) add("m_last", s / n_finished) add("s_all", ss) add("m_all", ss / c) add("s_slow", s_slowdown) add("m_slow", s_slowdown / n_finished) df[cluster] = pandas.DataFrame(table, columns=columns) df[cluster].to_csv(DIR + "/" + cluster + "_slowdown_table.csv") print("Done csv")