From a23dbe28e514483d9565e2daf82eab58ab081fa9 Mon Sep 17 00:00:00 2001 From: "Claudio Maggioni (maggicl)" Date: Sun, 18 Apr 2021 19:58:27 +0000 Subject: [PATCH] Results for 7c --- figure_7/a_figure7c.csv | 475 ++++++++++++++++++++++++++++++++++++++++ figure_7/figure7c.py | 39 +++- 2 files changed, 507 insertions(+), 7 deletions(-) create mode 100644 figure_7/a_figure7c.csv diff --git a/figure_7/a_figure7c.csv b/figure_7/a_figure7c.csv new file mode 100644 index 00000000..be26c212 --- /dev/null +++ b/figure_7/a_figure7c.csv @@ -0,0 +1,475 @@ +term,n_exec,count +7,1,391323386 +7,2,104682127 +7,3,61873236 +7,4,17411163 +7,5,5172753 +7,6,1486512 +7,7,474469 +7,8,250632 +7,9,135549 +7,10,103986 +7,11,82490 +7,12,64904 +7,13,62738 +7,14,53403 +7,15,51435 +7,16,49406 +7,17,47969 +7,19,45116 +7,20,83878 +7,21,56441 +7,22,40996 +7,23,39294 +7,24,36682 +7,25,35147 +7,26,33427 +7,27,31685 +7,28,29824 +7,29,28196 +7,30,27186 +7,31,25940 +7,32,24411 +7,33,22205 +7,34,19263 +7,35,17215 +7,36,15877 +7,37,14576 +7,38,13677 +7,18,46872 +7,39,12635 +7,40,11790 +7,41,10665 +7,42,9662 +7,43,8892 +7,44,8156 +7,45,7622 +7,47,6283 +7,46,6918 +7,48,5845 +7,49,5252 +7,50,4921 +7,51,4631 +7,52,4200 +7,53,3826 +7,54,3513 +7,55,3128 +7,56,2903 +7,57,2648 +7,58,2454 +7,59,2280 +7,60,2073 +7,61,1844 +7,62,1781 +7,63,1580 +7,64,1495 +7,65,1320 +7,66,1260 +7,67,1166 +7,68,1058 +7,69,940 +7,70,820 +7,71,744 +7,72,710 +7,73,565 +7,74,517 +7,75,440 +7,76,376 +7,77,351 +7,78,287 +7,79,201 +7,80,197 +7,81,171 +7,82,145 +7,83,126 +7,84,125 +7,85,99 +7,86,88 +7,87,76 +7,88,65 +7,89,62 +7,90,52 +7,91,52 +7,92,35 +7,93,31 +7,94,22 +7,95,27 +7,96,23 +7,97,11 +7,98,9 +7,99,10 +7,100,10 +7,101,9 +7,102,2 +6,1,80534713 +6,2,16553975 +6,37,820 +6,29,983 +6,3,9294919 +6,38,729 +6,9,11609 +6,4,2325273 +6,12,3392 +6,15,1477 +6,5,646748 +6,6,167352 +6,7,46680 +6,11,6538 +6,25,1093 +6,28,962 +6,32,896 +6,35,963 +6,16,1404 +6,23,1165 +6,8,28636 +6,33,1634 +6,34,1137 +6,39,758 +6,10,6764 +6,24,1112 +6,14,1742 +6,18,1241 +6,20,10888 +6,36,803 +6,40,678 +6,41,703 +6,17,1220 +6,27,1012 +6,19,1184 +6,21,3945 +6,30,913 +6,26,1045 +6,31,945 +6,44,495 +6,45,492 +6,46,479 +6,42,552 +6,50,354 +6,52,341 +6,54,255 +6,47,453 +6,57,225 +6,13,3010 +6,22,1201 +6,48,385 +6,43,584 +6,62,160 +6,64,130 +6,66,117 +6,58,181 +6,59,173 +6,56,193 +6,61,175 +6,49,403 +6,53,323 +6,55,249 +6,60,182 +6,63,128 +6,69,97 +6,71,82 +6,72,60 +6,74,67 +6,68,87 +6,51,317 +6,70,84 +6,65,139 +6,67,126 +6,77,35 +6,83,18 +6,86,15 +6,90,5 +6,73,61 +6,78,32 +6,79,21 +6,81,20 +6,82,17 +6,85,7 +6,88,7 +6,76,36 +6,87,12 +6,89,9 +6,92,4 +6,80,20 +6,75,44 +6,84,16 +6,93,5 +6,96,4 +6,97,5 +6,98,4 +6,99,3 +6,101,3 +6,91,6 +6,95,4 +6,94,2 +6,100,2 +8,1,32144983 +8,2,6151070 +8,3,3334677 +8,4,784436 +8,5,203923 +8,6,47719 +8,7,11081 +8,15,207 +8,22,159 +8,9,2393 +8,11,1014 +8,30,79 +8,8,7509 +8,10,1387 +8,12,554 +8,13,620 +8,18,173 +8,19,162 +8,21,1203 +8,23,103 +8,24,99 +8,25,131 +8,36,63 +8,37,51 +8,42,42 +8,17,162 +8,27,75 +8,26,112 +8,28,79 +8,31,97 +8,34,190 +8,39,48 +8,35,104 +8,20,4128 +8,16,186 +8,53,19 +8,61,15 +8,29,100 +8,32,71 +8,46,24 +8,52,24 +8,14,303 +8,40,45 +8,41,32 +8,43,46 +8,44,27 +8,33,84 +8,45,21 +8,58,6 +8,59,5 +8,48,21 +8,57,11 +8,89,4 +8,38,62 +8,54,7 +8,51,13 +8,63,4 +8,73,2 +8,74,2 +8,50,14 +8,60,9 +8,68,4 +8,65,5 +8,49,16 +8,75,2 +8,62,4 +8,66,6 +8,47,12 +8,70,4 +8,55,1 +8,56,2 +8,81,1 +8,72,1 +8,71,1 +4,1,55548512 +4,2,12554434 +4,3,6644659 +4,21,4408 +4,30,1004 +4,35,753 +4,36,685 +4,25,1347 +4,14,2482 +4,10,7847 +4,4,1907654 +4,5,570836 +4,6,159595 +4,8,23612 +4,9,10337 +4,11,5563 +4,18,1929 +4,19,1915 +4,23,1743 +4,13,3677 +4,15,2377 +4,28,1142 +4,7,51196 +4,17,2067 +4,29,1093 +4,31,933 +4,12,3948 +4,26,1274 +4,38,556 +4,37,637 +4,20,5323 +4,22,1791 +4,24,1439 +4,32,872 +4,34,869 +4,27,1183 +4,39,535 +4,42,391 +4,46,289 +4,41,443 +4,55,117 +4,16,2280 +4,33,800 +4,50,227 +4,44,345 +4,47,270 +4,48,309 +4,49,241 +4,40,462 +4,43,398 +4,45,314 +4,52,175 +4,54,136 +4,62,65 +4,53,142 +4,63,79 +4,51,184 +4,56,114 +4,60,82 +4,64,60 +4,66,53 +4,69,36 +4,70,28 +4,61,79 +4,57,107 +4,59,107 +4,67,51 +4,58,87 +4,71,24 +4,65,52 +4,68,32 +4,76,117 +4,75,29 +4,89,3 +4,73,27 +4,74,20 +4,77,10 +4,78,18 +4,79,11 +4,80,10 +4,81,4 +4,82,8 +4,72,27 +4,91,1 +4,92,3 +4,102,1 +4,87,1 +4,85,6 +4,84,2 +4,94,1 +4,83,3 +4,90,2 +4,86,1 +4,93,1 +5,1,13139241 +5,18,1071 +5,29,732 +5,2,4461294 +5,16,1132 +5,20,1173 +5,22,1027 +5,39,352 +5,24,894 +5,23,974 +5,19,1020 +5,7,49050 +5,6,116558 +5,5,362914 +5,9,11812 +5,3,2723445 +5,10,7022 +5,26,851 +5,4,978358 +5,12,2382 +5,8,21628 +5,27,790 +5,13,1925 +5,21,1027 +5,34,562 +5,37,413 +5,15,1203 +5,33,702 +5,11,4845 +5,25,872 +5,31,652 +5,17,1020 +5,28,717 +5,36,433 +5,49,124 +5,50,137 +5,52,103 +5,54,99 +5,40,318 +5,56,77 +5,30,680 +5,32,741 +5,45,201 +5,14,1303 +5,35,470 +5,41,272 +5,44,224 +5,38,382 +5,43,216 +5,57,59 +5,68,24 +5,46,186 +5,53,96 +5,42,272 +5,47,161 +5,51,116 +5,55,84 +5,60,52 +5,71,18 +5,67,34 +5,61,50 +5,64,33 +5,66,29 +5,70,21 +5,48,318 +5,58,71 +5,69,30 +5,62,47 +5,65,31 +5,59,61 +5,72,17 +5,63,43 +5,73,17 +5,78,5 +5,76,51 +5,74,9 +5,77,10 +5,83,5 +5,93,1 +5,75,20 +5,85,2 +5,89,2 +5,92,1 +5,79,7 +5,80,2 +5,84,4 +5,86,2 +5,88,1 +5,81,4 +5,90,2 +5,82,3 +5,87,4 +5,94,2 +5,96,1 +5,95,1 +-1,1,15529 +-1,2,1267 +-1,5,41 +-1,3,483 +-1,4,149 +-1,6,12 +-1,9,1 +-1,7,1 diff --git a/figure_7/figure7c.py b/figure_7/figure7c.py index 79f7bc96..c381a4e8 100755 --- a/figure_7/figure7c.py +++ b/figure_7/figure7c.py @@ -17,12 +17,24 @@ from pyspark.sql.types import * from decimal import * import random +CHECKDIR = "/home/claudio/google_2019/thesis_queries/figure_7/" + if len(sys.argv) is not 4: print(sys.argv[0] + " {cluster} {tmpdir} {maxram}") sys.exit() cluster=sys.argv[1] +if os.path.exists(CHECKDIR + cluster + "_figure7c.csv"): + print("already computed") + sys.exit() + +if os.path.exists(CHECKDIR + cluster + "_figure7c_working"): + print("already in execution") + sys.exit() + +os.system("touch " + CHECKDIR + cluster + "_figure7c_working") + spark = pyspark.sql.SparkSession.builder \ .appName("task_slowdown") \ .config("spark.driver.maxResultSize", "128g") \ @@ -33,7 +45,7 @@ sc = spark.sparkContext # READING INSTANCE EVENTS DATA dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz" -#dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_test.json" +#dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events00000000000?.json.gz" df = spark.read.json(dfepath) def tabid(x): @@ -58,9 +70,11 @@ def tally_event(bucket, term, nexec): def for_each_joined(x): machine_id = x[0] + if x[0] is None: + return {} ts = x[1] - ts = sorted(ts, key=lambda x: x["time"]) + ts = sorted(ts, key=lambda x: x["time"] or -1) in_execution = set() chum = {} @@ -88,24 +102,34 @@ def fold_resobjs(ro1, ro2): return ro1 def mark_next(data): - ts = data[1] - ts = sorted(ts, key=lambda z: z[1]) + ts = list(data[1]) + ts = sorted(ts, key=lambda z: z[1] or -1) last_term = -1 for i in range(0, len(ts)): t = ts[i] ts[i] = {"id": t[0], "time": t[1], "type": t[2], "mid": t[3], "end": (i == len(ts) -1 or t[3] != ts[i+1][3])} - if ts[i]["type"] >= 4 or ts[i]["type"] <= 8: + if ts[i]["type"] >= 4 and ts[i]["type"] <= 8: last_term = ts[i]["type"] for t in ts: t["term"] = last_term return ts + +def to_csv(result): + out = "term,n_exec,count\n" + for key in result.keys(): + for key2 in result[key].keys(): + out += str(key) + "," + str(key2) + "," + str(result[key][key2]) + "\n" + return out + + result = df.rdd \ .filter(lambda x: x.time is not None and x.type is not None and x.instance_index is not None and x.collection_id is not None) \ .map(lambda x: [tabid(x), int(x.time), int(x.type), x.machine_id]) \ .groupBy(lambda x: x[0]) \ .flatMap(mark_next) \ + .filter(lambda x: x["mid"] is not None) \ .groupBy(lambda x: x["mid"]) \ .partitionBy(1000, lambda x: random.randint(0, 1000-1)) \ .map(for_each_joined) \ @@ -113,7 +137,8 @@ result = df.rdd \ d = os.path.dirname(os.path.realpath(__file__)) -with open(d + "/" + cluster + "_figure7c.json", "w") as f: - json.dump(result, f) +with open(d + "/" + cluster + "_figure7c.csv", "w") as f: + f.write(to_csv(result)) +os.system("rm " + CHECKDIR + cluster + "_figure7c_working") # vim: set ts=4 sw=4 et tw=120: