From a23dbe28e514483d9565e2daf82eab58ab081fa9 Mon Sep 17 00:00:00 2001
From: "Claudio Maggioni (maggicl)" <maggicl@usi.ch>
Date: Sun, 18 Apr 2021 19:58:27 +0000
Subject: [PATCH] Results for 7c

---
 figure_7/a_figure7c.csv | 475 ++++++++++++++++++++++++++++++++++++++++
 figure_7/figure7c.py    |  39 +++-
 2 files changed, 507 insertions(+), 7 deletions(-)
 create mode 100644 figure_7/a_figure7c.csv

diff --git a/figure_7/a_figure7c.csv b/figure_7/a_figure7c.csv
new file mode 100644
index 00000000..be26c212
--- /dev/null
+++ b/figure_7/a_figure7c.csv
@@ -0,0 +1,475 @@
+term,n_exec,count
+7,1,391323386
+7,2,104682127
+7,3,61873236
+7,4,17411163
+7,5,5172753
+7,6,1486512
+7,7,474469
+7,8,250632
+7,9,135549
+7,10,103986
+7,11,82490
+7,12,64904
+7,13,62738
+7,14,53403
+7,15,51435
+7,16,49406
+7,17,47969
+7,19,45116
+7,20,83878
+7,21,56441
+7,22,40996
+7,23,39294
+7,24,36682
+7,25,35147
+7,26,33427
+7,27,31685
+7,28,29824
+7,29,28196
+7,30,27186
+7,31,25940
+7,32,24411
+7,33,22205
+7,34,19263
+7,35,17215
+7,36,15877
+7,37,14576
+7,38,13677
+7,18,46872
+7,39,12635
+7,40,11790
+7,41,10665
+7,42,9662
+7,43,8892
+7,44,8156
+7,45,7622
+7,47,6283
+7,46,6918
+7,48,5845
+7,49,5252
+7,50,4921
+7,51,4631
+7,52,4200
+7,53,3826
+7,54,3513
+7,55,3128
+7,56,2903
+7,57,2648
+7,58,2454
+7,59,2280
+7,60,2073
+7,61,1844
+7,62,1781
+7,63,1580
+7,64,1495
+7,65,1320
+7,66,1260
+7,67,1166
+7,68,1058
+7,69,940
+7,70,820
+7,71,744
+7,72,710
+7,73,565
+7,74,517
+7,75,440
+7,76,376
+7,77,351
+7,78,287
+7,79,201
+7,80,197
+7,81,171
+7,82,145
+7,83,126
+7,84,125
+7,85,99
+7,86,88
+7,87,76
+7,88,65
+7,89,62
+7,90,52
+7,91,52
+7,92,35
+7,93,31
+7,94,22
+7,95,27
+7,96,23
+7,97,11
+7,98,9
+7,99,10
+7,100,10
+7,101,9
+7,102,2
+6,1,80534713
+6,2,16553975
+6,37,820
+6,29,983
+6,3,9294919
+6,38,729
+6,9,11609
+6,4,2325273
+6,12,3392
+6,15,1477
+6,5,646748
+6,6,167352
+6,7,46680
+6,11,6538
+6,25,1093
+6,28,962
+6,32,896
+6,35,963
+6,16,1404
+6,23,1165
+6,8,28636
+6,33,1634
+6,34,1137
+6,39,758
+6,10,6764
+6,24,1112
+6,14,1742
+6,18,1241
+6,20,10888
+6,36,803
+6,40,678
+6,41,703
+6,17,1220
+6,27,1012
+6,19,1184
+6,21,3945
+6,30,913
+6,26,1045
+6,31,945
+6,44,495
+6,45,492
+6,46,479
+6,42,552
+6,50,354
+6,52,341
+6,54,255
+6,47,453
+6,57,225
+6,13,3010
+6,22,1201
+6,48,385
+6,43,584
+6,62,160
+6,64,130
+6,66,117
+6,58,181
+6,59,173
+6,56,193
+6,61,175
+6,49,403
+6,53,323
+6,55,249
+6,60,182
+6,63,128
+6,69,97
+6,71,82
+6,72,60
+6,74,67
+6,68,87
+6,51,317
+6,70,84
+6,65,139
+6,67,126
+6,77,35
+6,83,18
+6,86,15
+6,90,5
+6,73,61
+6,78,32
+6,79,21
+6,81,20
+6,82,17
+6,85,7
+6,88,7
+6,76,36
+6,87,12
+6,89,9
+6,92,4
+6,80,20
+6,75,44
+6,84,16
+6,93,5
+6,96,4
+6,97,5
+6,98,4
+6,99,3
+6,101,3
+6,91,6
+6,95,4
+6,94,2
+6,100,2
+8,1,32144983
+8,2,6151070
+8,3,3334677
+8,4,784436
+8,5,203923
+8,6,47719
+8,7,11081
+8,15,207
+8,22,159
+8,9,2393
+8,11,1014
+8,30,79
+8,8,7509
+8,10,1387
+8,12,554
+8,13,620
+8,18,173
+8,19,162
+8,21,1203
+8,23,103
+8,24,99
+8,25,131
+8,36,63
+8,37,51
+8,42,42
+8,17,162
+8,27,75
+8,26,112
+8,28,79
+8,31,97
+8,34,190
+8,39,48
+8,35,104
+8,20,4128
+8,16,186
+8,53,19
+8,61,15
+8,29,100
+8,32,71
+8,46,24
+8,52,24
+8,14,303
+8,40,45
+8,41,32
+8,43,46
+8,44,27
+8,33,84
+8,45,21
+8,58,6
+8,59,5
+8,48,21
+8,57,11
+8,89,4
+8,38,62
+8,54,7
+8,51,13
+8,63,4
+8,73,2
+8,74,2
+8,50,14
+8,60,9
+8,68,4
+8,65,5
+8,49,16
+8,75,2
+8,62,4
+8,66,6
+8,47,12
+8,70,4
+8,55,1
+8,56,2
+8,81,1
+8,72,1
+8,71,1
+4,1,55548512
+4,2,12554434
+4,3,6644659
+4,21,4408
+4,30,1004
+4,35,753
+4,36,685
+4,25,1347
+4,14,2482
+4,10,7847
+4,4,1907654
+4,5,570836
+4,6,159595
+4,8,23612
+4,9,10337
+4,11,5563
+4,18,1929
+4,19,1915
+4,23,1743
+4,13,3677
+4,15,2377
+4,28,1142
+4,7,51196
+4,17,2067
+4,29,1093
+4,31,933
+4,12,3948
+4,26,1274
+4,38,556
+4,37,637
+4,20,5323
+4,22,1791
+4,24,1439
+4,32,872
+4,34,869
+4,27,1183
+4,39,535
+4,42,391
+4,46,289
+4,41,443
+4,55,117
+4,16,2280
+4,33,800
+4,50,227
+4,44,345
+4,47,270
+4,48,309
+4,49,241
+4,40,462
+4,43,398
+4,45,314
+4,52,175
+4,54,136
+4,62,65
+4,53,142
+4,63,79
+4,51,184
+4,56,114
+4,60,82
+4,64,60
+4,66,53
+4,69,36
+4,70,28
+4,61,79
+4,57,107
+4,59,107
+4,67,51
+4,58,87
+4,71,24
+4,65,52
+4,68,32
+4,76,117
+4,75,29
+4,89,3
+4,73,27
+4,74,20
+4,77,10
+4,78,18
+4,79,11
+4,80,10
+4,81,4
+4,82,8
+4,72,27
+4,91,1
+4,92,3
+4,102,1
+4,87,1
+4,85,6
+4,84,2
+4,94,1
+4,83,3
+4,90,2
+4,86,1
+4,93,1
+5,1,13139241
+5,18,1071
+5,29,732
+5,2,4461294
+5,16,1132
+5,20,1173
+5,22,1027
+5,39,352
+5,24,894
+5,23,974
+5,19,1020
+5,7,49050
+5,6,116558
+5,5,362914
+5,9,11812
+5,3,2723445
+5,10,7022
+5,26,851
+5,4,978358
+5,12,2382
+5,8,21628
+5,27,790
+5,13,1925
+5,21,1027
+5,34,562
+5,37,413
+5,15,1203
+5,33,702
+5,11,4845
+5,25,872
+5,31,652
+5,17,1020
+5,28,717
+5,36,433
+5,49,124
+5,50,137
+5,52,103
+5,54,99
+5,40,318
+5,56,77
+5,30,680
+5,32,741
+5,45,201
+5,14,1303
+5,35,470
+5,41,272
+5,44,224
+5,38,382
+5,43,216
+5,57,59
+5,68,24
+5,46,186
+5,53,96
+5,42,272
+5,47,161
+5,51,116
+5,55,84
+5,60,52
+5,71,18
+5,67,34
+5,61,50
+5,64,33
+5,66,29
+5,70,21
+5,48,318
+5,58,71
+5,69,30
+5,62,47
+5,65,31
+5,59,61
+5,72,17
+5,63,43
+5,73,17
+5,78,5
+5,76,51
+5,74,9
+5,77,10
+5,83,5
+5,93,1
+5,75,20
+5,85,2
+5,89,2
+5,92,1
+5,79,7
+5,80,2
+5,84,4
+5,86,2
+5,88,1
+5,81,4
+5,90,2
+5,82,3
+5,87,4
+5,94,2
+5,96,1
+5,95,1
+-1,1,15529
+-1,2,1267
+-1,5,41
+-1,3,483
+-1,4,149
+-1,6,12
+-1,9,1
+-1,7,1
diff --git a/figure_7/figure7c.py b/figure_7/figure7c.py
index 79f7bc96..c381a4e8 100755
--- a/figure_7/figure7c.py
+++ b/figure_7/figure7c.py
@@ -17,12 +17,24 @@ from pyspark.sql.types import *
 from decimal import *
 import random
 
+CHECKDIR = "/home/claudio/google_2019/thesis_queries/figure_7/" 
+
 if len(sys.argv) is not 4:
     print(sys.argv[0] + " {cluster} {tmpdir} {maxram}")
     sys.exit()
 
 cluster=sys.argv[1]
 
+if os.path.exists(CHECKDIR + cluster + "_figure7c.csv"):
+    print("already computed")
+    sys.exit()
+
+if os.path.exists(CHECKDIR + cluster + "_figure7c_working"):
+    print("already in execution")
+    sys.exit()
+
+os.system("touch " + CHECKDIR + cluster + "_figure7c_working")
+
 spark = pyspark.sql.SparkSession.builder \
   .appName("task_slowdown") \
   .config("spark.driver.maxResultSize", "128g") \
@@ -33,7 +45,7 @@ sc = spark.sparkContext
 
 # READING INSTANCE EVENTS DATA
 dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz"
-#dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_test.json"
+#dfepath = "/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events00000000000?.json.gz"
 df = spark.read.json(dfepath)
 
 def tabid(x):
@@ -58,9 +70,11 @@ def tally_event(bucket, term, nexec):
 
 def for_each_joined(x):
     machine_id = x[0]
+    if x[0] is None:
+        return {}
     ts = x[1]
 
-    ts = sorted(ts, key=lambda x: x["time"])
+    ts = sorted(ts, key=lambda x: x["time"] or -1)
     in_execution = set()
     chum = {}
  
@@ -88,24 +102,34 @@ def fold_resobjs(ro1, ro2):
     return ro1
  
 def mark_next(data):
-    ts = data[1]
-    ts = sorted(ts, key=lambda z: z[1])
+    ts = list(data[1])
+    ts = sorted(ts, key=lambda z: z[1] or -1)
     last_term = -1
     for i in range(0, len(ts)):
         t = ts[i]
         ts[i] = {"id": t[0], "time": t[1], "type": t[2], "mid": t[3], "end": (i == len(ts) -1 or t[3] != ts[i+1][3])}
-        if ts[i]["type"] >= 4 or ts[i]["type"] <= 8:
+        if ts[i]["type"] >= 4 and ts[i]["type"] <= 8:
             last_term = ts[i]["type"]
     for t in ts:
         t["term"] = last_term
     return ts
 
+
+def to_csv(result):
+    out = "term,n_exec,count\n"
+    for key in result.keys():
+        for key2 in result[key].keys():
+            out += str(key) + "," + str(key2) + "," + str(result[key][key2]) + "\n"
+    return out
+
+
 result = df.rdd \
     .filter(lambda x: x.time is not None and x.type is not None and
             x.instance_index is not None and x.collection_id is not None) \
     .map(lambda x: [tabid(x), int(x.time), int(x.type), x.machine_id]) \
     .groupBy(lambda x: x[0]) \
     .flatMap(mark_next) \
+    .filter(lambda x: x["mid"] is not None) \
     .groupBy(lambda x: x["mid"]) \
     .partitionBy(1000, lambda x: random.randint(0, 1000-1)) \
     .map(for_each_joined) \
@@ -113,7 +137,8 @@ result = df.rdd \
 
 d = os.path.dirname(os.path.realpath(__file__))
 
-with open(d + "/" + cluster + "_figure7c.json", "w") as f:
-    json.dump(result, f)
+with open(d + "/" + cluster + "_figure7c.csv", "w") as f:
+    f.write(to_csv(result))
 
+os.system("rm " + CHECKDIR + cluster + "_figure7c_working")
 # vim: set ts=4 sw=4 et tw=120: