Completed task_slowdown analysis
This commit is contained in:
parent
c34009963d
commit
5373284e57
24 changed files with 4112 additions and 1219 deletions
BIN
machine_time_waste/a_state_changes_jobs.json
(Stored with Git LFS)
Normal file
BIN
machine_time_waste/a_state_changes_jobs.json
(Stored with Git LFS)
Normal file
Binary file not shown.
|
@ -1,895 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "proper-gnome",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Temporal impact: machine time waste"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "fantastic-harrison",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas\n",
|
||||
"from IPython import display\n",
|
||||
"import findspark\n",
|
||||
"findspark.init()\n",
|
||||
"import pyspark\n",
|
||||
"import pyspark.sql\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"from pyspark.sql.functions import col, lag, when, concat_ws, last, first\n",
|
||||
"from pyspark.sql import Window\n",
|
||||
"from pyspark.sql.types import LongType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "failing-rebecca",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cluster=\"b\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "transsexual-baptist",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ERROR:root:Exception while sending command.\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1207, in send_command\n",
|
||||
" raise Py4JNetworkError(\"Answer from Java side is empty\")\n",
|
||||
"py4j.protocol.Py4JNetworkError: Answer from Java side is empty\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1033, in send_command\n",
|
||||
" response = connection.send_command(command)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1212, in send_command\n",
|
||||
" \"Error while receiving\", e, proto.ERROR_ON_RECEIVE)\n",
|
||||
"py4j.protocol.Py4JNetworkError: Error while receiving\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n",
|
||||
"ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"<ipython-input-3-047bb33e6a05>\", line 5, in <module>\n",
|
||||
" df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n",
|
||||
" return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n",
|
||||
" answer, self.gateway_client, self.target_id, self.name)\n",
|
||||
" File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n",
|
||||
" return f(*a, **kw)\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n",
|
||||
" format(target_id, \".\", name))\n",
|
||||
"py4j.protocol.Py4JError: An error occurred while calling o26.json\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n",
|
||||
" stb = value._render_traceback_()\n",
|
||||
"AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n",
|
||||
" connection = self.deque.pop()\n",
|
||||
"IndexError: pop from an empty deque\n",
|
||||
"\n",
|
||||
"During handling of the above exception, another exception occurred:\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n",
|
||||
" self.socket.connect((self.address, self.port))\n",
|
||||
"ConnectionRefusedError: [Errno 111] Connection refused\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "Py4JError",
|
||||
"evalue": "An error occurred while calling o26.json",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mPy4JError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-3-047bb33e6a05>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/claudio/google_2019/instance_events/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_instance_events*.json.gz\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m/opt/spark/python/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36mjson\u001b[0;34m(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord, dateFormat, timestampFormat, multiLine, allowUnquotedControlChars, lineSep, samplingRatio, dropFieldIfAllNull, encoding, locale, pathGlobFilter, recursiveFileLookup)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPythonUtils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoSeq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRDD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1303\u001b[0m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1304\u001b[0m return_value = get_return_value(\n\u001b[0;32m-> 1305\u001b[0;31m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m 1306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/opt/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0mconverted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 334\u001b[0m raise Py4JError(\n\u001b[1;32m 335\u001b[0m \u001b[0;34m\"An error occurred while calling {0}{1}{2}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 336\u001b[0;31m format(target_id, \".\", name))\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mPy4JError\u001b[0m: An error occurred while calling o26.json"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"spark = pyspark.sql.SparkSession.builder \\\n",
|
||||
" .appName(\"machine_time_waste\") \\\n",
|
||||
" .getOrCreate()\n",
|
||||
"\n",
|
||||
"df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "juvenile-absolute",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.printSchema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "lucky-western",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "normal-settlement",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# .filter(df.collection_type == 0) \\\n",
|
||||
"df2 = df \\\n",
|
||||
" .withColumn(\"time\", col(\"time\").cast(LongType())) \\\n",
|
||||
" .withColumn(\"type\", col(\"type\").cast(LongType())) \\\n",
|
||||
" .withColumn(\"type\", when(col(\"type\").isNull(), 0).otherwise(col(\"type\"))) \\\n",
|
||||
" .withColumn(\"id\", concat_ws(\"-\", \"collection_id\", \"instance_index\")) \\\n",
|
||||
" .where(col(\"time\").isNotNull()) \\\n",
|
||||
" .where(col(\"type\").isNotNull()) \\\n",
|
||||
" .where((col(\"instance_index\").isNotNull()) & (col(\"collection_id\").isNotNull())) \\\n",
|
||||
" .select(\"machine_id\", \"id\", \"time\", \"type\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "typical-homeless",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df2.show()\n",
|
||||
"print(\"Total: \" + str(df.count()))\n",
|
||||
"print(\"Filtered: \" + str(df2.count()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "collect-saying",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# my_window = Window.partitionBy(\"machine_id\", \"id\").orderBy(df2.time.asc())\n",
|
||||
"\n",
|
||||
"w2 = Window.partitionBy(\"id\").orderBy(df2.time.asc()).rowsBetween(Window.currentRow, Window.unboundedFollowing)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cooperative-appraisal",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# .withColumn(\"prev_time\", lag(df2.time).over(my_window)) \\\n",
|
||||
"# .withColumn(\"prev_type\", lag(df2.type).over(my_window)) \\\n",
|
||||
"\n",
|
||||
"df3 = df2 \\\n",
|
||||
" .withColumn(\"t3_time\", when((df2.type != 3), None).otherwise(df2.time)) \\\n",
|
||||
" .withColumn(\"t45678_time\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.time)) \\\n",
|
||||
" .withColumn(\"t45678_type\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.type)) \\\n",
|
||||
" .withColumn(\"t01_time\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.time)) \\\n",
|
||||
" .withColumn(\"t01_type\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.type)) \\\n",
|
||||
" .withColumn(\"next_time\", when(df2.type == 3, first(col(\"t45678_time\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .when((df2.type == 0) | (df2.type == 1), first(col(\"t3_time\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_time\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .otherwise(None)) \\\n",
|
||||
" .withColumn(\"next_type\", when(df2.type == 3, first(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .when((df2.type == 0) | (df2.type == 1), 3) \\\n",
|
||||
" .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_type\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .otherwise(None)) \\\n",
|
||||
" .withColumn(\"last_term_type\", last(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n",
|
||||
" .withColumn(\"time_delta\", col(\"next_time\") - col(\"time\")) \\\n",
|
||||
" .select(\"machine_id\", \"id\", \"time\", \"type\", \"last_term_type\", \"time_delta\", \"t01_time\", \"t01_type\", \"t3_time\", \"t45678_time\", \"t45678_type\", \"next_time\", \"next_type\") \\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ideal-angle",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df4 = df3.where(df3.next_type.isNotNull()).groupby(\"type\", \"next_type\", \"last_term_type\").sum(\"time_delta\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "working-difficulty",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# df3.orderBy(df3.machine_id, df3.time).show(n=100)\n",
|
||||
"# df3.printSchema()\n",
|
||||
"df4.show(n=1000000)\n",
|
||||
"df4.write.csv(\"/home/claudio/google_2019/thesis_queries/machine_time_waste/\" + cluster + \"_state_change.csv\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
|
||||
# # Temporal impact: machine time waste
|
||||
|
||||
# Inefficient version of the machine_time_waste.py script in the same dir. Please ignore, kept for future need
|
||||
|
||||
import pandas
|
||||
from IPython import display
|
||||
import findspark
|
||||
findspark.init()
|
||||
import pyspark
|
||||
import pyspark.sql
|
||||
import sys
|
||||
|
||||
from pyspark.sql.functions import col, lag, when, concat_ws, last, first
|
||||
from pyspark.sql import Window
|
||||
from pyspark.sql.types import LongType
|
||||
|
||||
cluster="b"
|
||||
|
||||
spark = pyspark.sql.SparkSession.builder \
|
||||
.appName("machine_time_waste") \
|
||||
.config("spark.local.dir", "/run/tmpfiles.d/spark") \
|
||||
.config("spark.driver.memory", "124g") \
|
||||
.getOrCreate()
|
||||
|
||||
df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz")
|
||||
|
||||
df.printSchema()
|
||||
|
||||
df.show()
|
||||
|
||||
# .filter(df.collection_type == 0) \
|
||||
df2 = df \
|
||||
.withColumn("time", col("time").cast(LongType())) \
|
||||
.withColumn("type", col("type").cast(LongType())) \
|
||||
.withColumn("type", when(col("type").isNull(), 0).otherwise(col("type"))) \
|
||||
.withColumn("id", concat_ws("-", "collection_id", "instance_index")) \
|
||||
.where(col("time").isNotNull()) \
|
||||
.where(col("type").isNotNull()) \
|
||||
.where((col("instance_index").isNotNull()) & (col("collection_id").isNotNull())) \
|
||||
.select("time", "type", "id")
|
||||
|
||||
df2.show()
|
||||
print("Total: " + str(df.count()))
|
||||
print("Filtered: " + str(df2.count()))
|
||||
|
||||
# my_window = Window.partitionBy("machine_id", "id").orderBy(df2.time.asc())
|
||||
|
||||
w2 = Window.partitionBy("id").orderBy(df2.time.asc()).rowsBetween(Window.currentRow, Window.unboundedFollowing)
|
||||
|
||||
# .withColumn("prev_time", lag(df2.time).over(my_window)) \
|
||||
# .withColumn("prev_type", lag(df2.type).over(my_window)) \
|
||||
df3 = df2 \
|
||||
.withColumn("t3_time", when((df2.type != 3), None).otherwise(df2.time)) \
|
||||
.withColumn("t45678_time", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.time)) \
|
||||
.withColumn("t45678_type", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.type)) \
|
||||
.withColumn("t01_time", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.time)) \
|
||||
.withColumn("t01_type", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.type)) \
|
||||
.withColumn("next_time", when(df2.type == 3, first(col("t45678_time"), True).over(w2)) \
|
||||
.when((df2.type == 0) | (df2.type == 1), first(col("t3_time"), True).over(w2)) \
|
||||
.when((df2.type >= 4) | (df2.type <= 8), first(col("t01_time"), True).over(w2)) \
|
||||
.otherwise(None)) \
|
||||
.withColumn("next_type", when(df2.type == 3, first(col("t45678_type"), True).over(w2)) \
|
||||
.when((df2.type == 0) | (df2.type == 1), 3) \
|
||||
.when((df2.type >= 4) | (df2.type <= 8), first(col("t01_type"), True).over(w2)) \
|
||||
.otherwise(None)) \
|
||||
.withColumn("last_term_type", last(col("t45678_type"), True).over(w2)) \
|
||||
.withColumn("time_delta", col("next_time") - col("time")) \
|
||||
.select("id", "time", "type", "last_term_type", "time_delta", "t01_time", \
|
||||
"t01_type", "t3_time", "t45678_time", "t45678_type", "next_time", "next_type")
|
||||
|
||||
df4 = df3.where(df3.next_type.isNotNull()).groupby("type", "next_type", "last_term_type").sum("time_delta")
|
||||
|
||||
# df3.orderBy(df3.machine_id, df3.time).show(n=100)
|
||||
# df3.printSchema()
|
||||
df4.show(n=1000000)
|
||||
df4.write.csv("/home/claudio/google_2019/thesis_queries/machine_time_waste/" + cluster + "_state_change.csv")
|
||||
|
||||
# vim: set ts=2 sw=2 et tw=120:
|
1
task_slowdown/.gitignore
vendored
Normal file
1
task_slowdown/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
task_slowdown/?_state_changes.json.gz
|
26
task_slowdown/a_slowdown_table.csv
Normal file
26
task_slowdown/a_slowdown_table.csv
Normal file
|
@ -0,0 +1,26 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,-1,499210,4201399,0.10620113266174659,0.0,390649944354691.0,782600570.8581482,428760224746533.0,592714553.7453593,631784.7046286245,1.265672957712968
|
||||
1,24,0,4,0.0,,,,,,,
|
||||
2,25,171,51172,0.0033305416512474924,0.0,986562190939.0,5769369537.654971,81858305998487.0,1203020192.7941773,929847.5905649044,5437.705207981897
|
||||
3,100,0,9528,0.0,,,,,,,
|
||||
4,101,125339,27667,0.8191770257375528,0.0,7934233077571009.0,63305219515.77804,2.4435921623968336e+17,6345535958.256735,16491986771215.066,131585350.7952021
|
||||
5,102,0,8992,0.0,,,,,,,
|
||||
6,103,1406882,7978164,0.14990677722836948,0.0,4323773334171624.0,3074344398.783867,4888365760155938.0,3032797273.2650743,16505180966495.656,11735724.038591769
|
||||
7,105,24095559,17680282,0.5767821406635476,0.0,4.0141745605863256e+16,1666128633.975603,4.330222470396991e+16,1749707987.6882613,16507785342461.914,685174.3347848131
|
||||
8,107,5056370,4320033,0.5392654304641129,0.0,5167495901308944.0,1021994587.1937487,5251141006134509.0,1030654424.6556122,16507790953107.875,3264806.2664798116
|
||||
9,114,0,54127,0.0,,,,,,,
|
||||
10,115,5171,120690,0.041085006475397465,0.0,10550552314593.0,2040725786.1882012,10596168447023.0,2042044410.6808634,16507790958401.475,3192996316.9055076
|
||||
11,116,171154,1140843,0.13045304219445625,0.0,760319854963877.0,4443119013.597687,785219224506580.0,4443446611.511143,16507791187685.025,96467401.73842806
|
||||
12,117,0,89560,0.0,,,,,,,
|
||||
13,118,70767,523560,0.11907081455158523,0.0,128245552414526.0,1816612165.2009463,128693641215270.0,1814017270.1746447,16507791258726.13,233834654.353308
|
||||
14,119,158153,585586,0.21264583408964705,0.0,355813050795329.0,2250101502.5126414,535471126096282.0,2877438302.871586,17403987765646.334,110059872.54727907
|
||||
15,170,0,109,0.0,,,,,,,
|
||||
16,200,1812271,4847612,0.27211754320608933,0.0,8227216998822171.0,4545546812.495018,3.386947908315813e+16,16844687840.384369,165162147163583.97,91252275.42822097
|
||||
17,205,0,21334,0.0,,,,,,,
|
||||
18,210,0,1169,0.0,,,,,,,
|
||||
19,214,0,1390,0.0,,,,,,,
|
||||
20,215,0,1169,0.0,,,,,,,
|
||||
21,360,805,129798,0.006163717525631111,0.0,395919732038355.0,514181470179.6818,1157676429161870.0,400580079294.7647,165285702535957.12,214656756540.20407
|
||||
22,400,0,75,0.0,,,,,,,
|
||||
23,450,112,4971,0.02203423175290183,0.0,13736332570286.0,686816628514.3,15693068016609.0,653877834025.375,165285702535990.84,8264285126799.542
|
||||
24,500,0,75,0.0,,,,,,,
|
|
Binary file not shown.
24
task_slowdown/b_slowdown_table.csv
Normal file
24
task_slowdown/b_slowdown_table.csv
Normal file
|
@ -0,0 +1,24 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,28120567,34102646,0.4519304877425729,0.0,3.799639631137341e+16,1351418653.3085494,4.469884375416512e+16,1466778246.0122163,1456542764472.7542,51804.888156219255
|
||||
1,25,19,104988,0.00018094031826449667,0.0,203222731195.0,10695933220.789474,27126548952735.0,4121323146.87557,1456543059817.196,76660161043.01031
|
||||
2,80,0,1,0.0,,,,,,,
|
||||
3,100,0,87231,0.0,,,,,,,
|
||||
4,101,25076,12644,0.6647932131495228,0.0,152191120001561.0,6069194448.93767,6.596179176874172e+16,5401534266.648366,1758002110737.1948,70106959.27329697
|
||||
5,103,16512,15505605,0.0010637724222797701,0.0,320710989163979.0,19429964204.77275,527606225086647.0,14896556132.098001,1767936252845.3188,107108703.06829752
|
||||
6,105,10945,2351495,0.004632921894312659,0.0,1.0226301871488184e+16,934420858140.3677,2.462585154304219e+16,392431341519.6678,1768020014445.6218,161551536.40767744
|
||||
7,107,0,501380,0.0,,,,,,,
|
||||
8,114,1999,293319,0.00676897446142802,0.0,65864722282875.0,32948835559.21711,66090092132932.0,30470305271.061317,1768020016461.5698,884452234.3479589
|
||||
9,115,196,4564,0.041176470588235294,0.0,4886710126627.0,25584869772.91623,28913941115452.0,107088670797.97037,2151127269026.9988,11262446434.69633
|
||||
10,116,215967,2380902,0.0831643798743795,0.0,6325603475353640.0,29289947331.06587,7019216941178176.0,29017499012.708668,2285560716882.638,10583014.455502689
|
||||
11,117,0,1849,0.0,,,,,,,
|
||||
12,118,394,126176,0.0031129019514892944,12.0,1068751378088.0,2775977605.4233766,1068751378088.0,2775977605.4233766,2285560717255.638,5936521343.521138
|
||||
13,119,108,54995,0.001959965882075386,0.0,20852738589032.0,193080912861.4074,53282074942406.0,304468999670.8914,2285561567862.601,21162607109.838898
|
||||
14,170,0,14,0.0,,,,,,,
|
||||
15,199,0,14474,0.0,,,,,,,
|
||||
16,200,91396,204224,0.3091671740748258,68.0,1.6689308702771376e+16,182604366742.21384,1.6201186903133786e+17,466328562061.3029,5237700121655813.0,57307760970.45618
|
||||
17,205,0,35822,0.0,,,,,,,
|
||||
18,210,0,1869,0.0,,,,,,,
|
||||
19,214,0,400,0.0,,,,,,,
|
||||
20,215,0,1869,0.0,,,,,,,
|
||||
21,360,1203,33139,0.03502999242909557,0.0,1261039006121058.0,1048245225370.7881,2032979622033016.0,495124116423.04333,5237700140362286.0,4353865453335.2334
|
||||
22,450,174,28215,0.006129134523935327,0.0,274809884969137.0,1579367154995.0403,290615463635764.0,1529555071767.179,5237700140362653.0,30101724944612.95
|
|
Binary file not shown.
24
task_slowdown/c_slowdown_table.csv
Normal file
24
task_slowdown/c_slowdown_table.csv
Normal file
|
@ -0,0 +1,24 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,106013713,102314553,0.508878199946233,4.0,9.890266864180749e+16,933028622.4118965,1.0936524434036358e+17,1001823443.2076209,6123674401667.88,57769.55839058094
|
||||
1,3,0,3,0.0,,,,,,,
|
||||
2,10,0,3,0.0,,,,,,,
|
||||
3,25,903,3116,0.22468275690470266,0.0,154666893037724.0,171281166154.73312,1266916422470409.0,4551424874.873935,15646632159631.086,17327388881.09755
|
||||
4,100,0,827,0.0,,,,,,,
|
||||
5,101,25060,22557,0.5262826301530966,0.0,157151661002382.0,6271016001.691221,6.623793903991674e+16,2497980285.401496,16298829897442.27,650392254.4869221
|
||||
6,103,2437,45665340,5.3363666026485154e-05,219.0,5831404975264.0,3343695513.3394494,16294921810434.0,7444002654.378255,16709002329334.33,9580849959.480694
|
||||
7,105,8972,38135622,0.00023521026334688476,8.0,1.0778395208878152e+16,1202140888788.5513,1.4791091149243492e+16,863763790542.1333,16709168004960.305,1863614544.3854902
|
||||
8,107,27,11026707,2.448594479562126e-06,0.0,189897833827.0,7033253104.703704,2793068197304.0,93102273243.46666,17143029732233.47,634927027119.7582
|
||||
9,114,223,1003339,0.00022220849334669905,0.0,702101441201.0,3148436956.0582957,710011356178.0,3141643168.9292035,17143029732510.385,76874572791.52638
|
||||
10,115,353,124899,0.0028183182703669404,1.0,5184592226157.0,14728955187.946022,10269344080150.0,27167576931.613758,17154514855201.568,48734417202.27718
|
||||
11,116,536,3873404,0.00013836042891732966,0.0,1525621648629.0,2846309045.949627,1559366613321.0,2850761633.1279707,17154514855821.39,32004691895.189163
|
||||
12,117,259,19,0.9316546762589928,0.0,553079186614.0,2143717777.5736434,553079186614.0,2143717777.5736434,17154514856079.39,66490367659.22244
|
||||
13,118,98,2368621,4.1372573108080784e-05,0.0,109189718829.0,1114180804.377551,120109722380.0,1112127059.074074,17154514856185.732,175046069961.0789
|
||||
14,119,1523,67207,0.022159173577768077,0.0,873806372198378.0,573740231253.0387,1786103338848500.0,242446496382.31302,49098665689622.64,32238125863.179672
|
||||
15,170,0,30,0.0,,,,,,,
|
||||
16,200,40086,1071316,0.03606795740875039,96.0,1.4133034700908504e+16,352603031308.5301,5.850686106232538e+16,357993398166.3426,250440847510464.97,6248212352.439124
|
||||
17,205,0,2560,0.0,,,,,,,
|
||||
18,210,0,160,0.0,,,,,,,
|
||||
19,214,0,2560,0.0,,,,,,,
|
||||
20,215,0,160,0.0,,,,,,,
|
||||
21,360,1764,38626,0.04367417677642981,3.0,1357017724420526.0,769284424274.6746,2796928614584114.0,442062369935.8486,252051867201690.6,142886546032.7044
|
||||
22,450,472,30733,0.015125781124819741,0.0,656162472642778.0,1390174730175.3772,699478629122954.0,1319770998345.1963,252051867202432.4,534008193225.4924
|
|
Binary file not shown.
24
task_slowdown/d_slowdown_table.csv
Normal file
24
task_slowdown/d_slowdown_table.csv
Normal file
|
@ -0,0 +1,24 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,22214009,61540068,0.26522898700202974,0.0,3.1055625498764056e+16,1398282144.4098547,3.465814625449232e+16,1469082312.5360591,5049183094792.711,227339.8925932346
|
||||
1,5,0,1,0.0,,,,,,,
|
||||
2,25,785,4033,0.16293067662930677,0.0,16824467821138.0,21432443084.252228,1104970471364140.0,4037395212.59606,5082576480847.372,6474619720.824677
|
||||
3,100,0,389,0.0,,,,,,,
|
||||
4,101,25099,30289,0.45314869646854916,0.0,210602264772631.0,8390862774.318937,6.6540641585327544e+16,3317182700.392372,5082722548327.359,202506974.31480774
|
||||
5,103,1409,31032032,4.540263517667925e-05,149.0,8107930234531.0,6790561335.453099,8640791772999.0,6646762902.306923,5082722555518.349,4256886562.4106774
|
||||
6,105,8438,16308736,0.0005171238598056257,3.0,6967671696826425.0,825749193745.7247,2.018562059248311e+16,924080781563.9587,5082735553839.623,602362592.3014486
|
||||
7,107,21,6004332,3.497462590890309e-06,0.0,6311162556240.0,300531550297.1429,9790846227466.0,174836539776.17856,5082735553873.485,242035026374.9279
|
||||
8,114,0,1271161,0.0,,,,,,,
|
||||
9,115,969,17705,0.05189032879940023,1.0,12194814321934.0,12597948679.683884,26664716586105.0,26141879005.985294,7303039756501.865,7544462558.369696
|
||||
10,116,2205,1745653,0.0012615441300151385,0.0,20436333384618.0,9268178405.722448,26128064845391.0,10955163457.187002,7683329684843.638,3484503258.432489
|
||||
11,117,6,1,0.8571428571428571,0.0,65815182581.0,10969197096.833334,65815182581.0,10969197096.833334,7683329684849.638,1280554947474.9397
|
||||
12,118,684,1264690,0.0005405516471809915,0.0,16443832326929.0,24040690536.445908,33689279361478.0,30598800509.970936,7683329686398.121,11232938138.00895
|
||||
13,119,413,93059,0.004418435467305718,0.0,76007488187599.0,184484194630.09467,229579544718504.0,172746083309.6343,8050015119135.113,19538871648.386196
|
||||
14,197,0,9,0.0,,,,,,,
|
||||
15,199,0,2056,0.0,,,,,,,
|
||||
16,200,83851,1200482,0.06528758507334156,104.0,2.3440935680172868e+16,279564636964.1836,1.2926151685985552e+17,349363949014.5856,767029108363450.2,9147852165.387966
|
||||
17,205,0,1160,0.0,,,,,,,
|
||||
18,210,0,116,0.0,,,,,,,
|
||||
19,214,0,1160,0.0,,,,,,,
|
||||
20,215,0,116,0.0,,,,,,,
|
||||
21,360,630,38869,0.01594977088027545,1.0,409573341242607.0,650116414670.8048,1014392629034870.0,390151011167.2577,767029130560754.0,1217506556445.6414
|
||||
22,450,169,27484,0.006111452645282609,0.0,158644914383813.0,938727304046.2307,211037058203755.0,523665156833.139,767029130561366.4,4538633908647.139
|
|
Binary file not shown.
23
task_slowdown/e_slowdown_table.csv
Normal file
23
task_slowdown/e_slowdown_table.csv
Normal file
|
@ -0,0 +1,23 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,18722018,25015686,0.42805214466676167,1.0,1.500353270905454e+16,802192523.3827926,2.1598252363190628e+16,1126540642.2870827,555025694753.4033,29675.5085119268
|
||||
1,25,560,9918,0.0534453139912197,0.0,17929535963182.0,32247366840.255394,47981881861148.0,38946332679.50325,555025701244.8347,998247664.1094149
|
||||
2,100,0,2801,0.0,,,,,,,
|
||||
3,101,1000,6281160,0.0001591809186649178,0.0,30603262218491.0,30603262218.491,34352364938395.0,27725879692.00565,555025751516.257,555025751.5162569
|
||||
4,103,2901,13390514,0.0002165989779305726,63.0,218657914919168.0,76293759567.05095,691624969429776.0,48552121406.09168,555026057152.8004,193658777.79232395
|
||||
5,105,1819,447535,0.004048033399057314,10.0,194046126324950.0,106677364664.62341,2862241027748842.0,64190200218.63292,558184748019.9266,306863522.82568806
|
||||
6,107,0,17636,0.0,,,,,,,
|
||||
7,114,0,26682,0.0,,,,,,,
|
||||
8,115,35,128050,0.000273256040910333,0.0,2353296659913.0,67237047426.08572,2353296659913.0,65369351664.25,558184748054.9266,15948135658.712189
|
||||
9,116,0,22414,0.0,,,,,,,
|
||||
10,117,0,1927223,0.0,,,,,,,
|
||||
11,118,0,231,0.0,,,,,,,
|
||||
12,119,32,6951,0.004582557639982815,0.0,1987923999531.0,62122624985.34375,20497271238102.0,83322240805.29268,558184748263.1056,17443273383.22205
|
||||
13,170,0,25,0.0,,,,,,,
|
||||
14,200,52049,2604518,0.019592579445577695,55.0,1.20565574443766e+16,231638599096.55518,1.0291142081922765e+17,414148798615.74414,20960917278040.887,402715081.52012306
|
||||
15,201,0,52,0.0,,,,,,,
|
||||
16,205,0,48240,0.0,,,,,,,
|
||||
17,210,0,2691,0.0,,,,,,,
|
||||
18,215,0,2691,0.0,,,,,,,
|
||||
19,220,0,360,0.0,,,,,,,
|
||||
20,360,3670,6207,0.3715703148729371,0.0,2244218737366886.0,611503743151.7401,6448185707562959.0,439279631280.26154,20971123506308.477,5714202590.274789
|
||||
21,450,82,14869,0.005484582971038727,0.0,65910927328054.0,803791796683.5853,73377545680348.0,824466805397.1686,20971123506484.12,255745408615.66
|
|
Binary file not shown.
18
task_slowdown/f_slowdown_table.csv
Normal file
18
task_slowdown/f_slowdown_table.csv
Normal file
|
@ -0,0 +1,18 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,12923639,15663283,0.4520822143776095,0.0,3.78338056722563e+16,2928982128.368198,4.116929832921998e+16,2972515519.4539537,1201620091124.0388,93025.8986494546
|
||||
1,25,41,6291,0.0064750473783954515,0.0,7565244176289.0,184518150641.19513,16877753924299.0,34096472574.341415,1201620091466.3787,29307807108.936066
|
||||
2,100,0,1,0.0,,,,,,,
|
||||
3,101,24996,37034,0.4029663066258262,0.0,203958763846899.0,8159656098.851776,6.6053822975088104e+16,10082616647.838951,1202133084785.5625,48093018.2743464
|
||||
4,103,7762,13279173,0.0005841828834114113,0.0,358824272376313.0,46443731863.35918,418872350566253.0,47234139666.92072,1202133235297.3857,155595810.93675715
|
||||
5,105,12351,5541856,0.002223719785740791,0.0,1.372628292784802e+16,1111529915608.391,2.128195689989448e+16,1173594182193.365,1202168661283.2175,97349474.55528525
|
||||
6,107,135,221684,0.0006086043125250768,0.0,10820342510697.0,80150685264.42223,10958054196969.0,78834922280.35252,1202168661421.0103,8904953047.56304
|
||||
7,114,46,661054,6.958100136136742e-05,0.0,31149185194.0,677156199.8695652,31149185194.0,677156199.8695652,1202168661467.0103,26134101336.239353
|
||||
8,115,284,7503,0.036471041479388726,0.0,31792318431347.0,121344726837.20229,161956898216762.0,252662867732.85803,1202171165553.6995,4588439563.182059
|
||||
9,116,0,3286187,0.0,,,,,,,
|
||||
10,117,2,2330310,8.582541736900466e-07,0.0,31750539045.0,15875269522.5,31750539045.0,15875269522.5,1202171165555.6995,601085582777.8497
|
||||
11,118,28,1344923,2.081860231339283e-05,0.0,841251531517.0,30044697554.17857,841251531517.0,25492470652.030304,1202171165583.6995,42934684485.132126
|
||||
12,119,9624,21070,0.31354662148954193,0.0,1483986210888747.0,154196405952.69608,1.1291352785335876e+16,68833343200.92097,1242061274616.297,129058735.9326992
|
||||
13,200,91296,2407549,0.036535279299036154,4.0,2.7130266058773024e+16,297168178877.2019,1.612418735061216e+17,492371666990.7219,313966009267630.25,3438989761.5189085
|
||||
14,201,0,2710,0.0,,,,,,,
|
||||
15,360,3433,42804,0.07424789670610117,0.0,3307182681726832.0,963350620951.5968,7181626757621634.0,569428065146.0223,314099638070996.44,91494214410.42715
|
||||
16,450,257,25634,0.009926229191610984,0.0,286756266724416.0,1115783139005.5098,292793287192594.0,1113282460808.3423,314099638071341.06,1222177580044.1287
|
|
Binary file not shown.
18
task_slowdown/g_slowdown_table.csv
Normal file
18
task_slowdown/g_slowdown_table.csv
Normal file
|
@ -0,0 +1,18 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,5492965,10849211,0.3361220072528897,0.0,1.6527216940835638e+16,3009921472.2279587,1.8824305818044464e+16,3316599522.012279,11162954215411.947,2032986.9031638177
|
||||
1,25,16,6841,0.002333381945457197,0.0,987330663809.0,61708166488.0625,8582428925651.0,12156414908.854107,11162954217604.191,697684638600.262
|
||||
2,50,0,114,0.0,,,,,,,
|
||||
3,100,0,352,0.0,,,,,,,
|
||||
4,101,99978,3658,0.9647033849241576,0.0,1.3392322932850584e+16,133952698922.26875,2.5952343687590653e+17,7448011529.683315,20715452279317.676,207200106.81667644
|
||||
5,103,1955,6006297,0.0003253858193697601,0.0,230231524187399.0,118310135759.19785,292689255555383.0,112746246361.85786,20715452288533.508,10645145060.91136
|
||||
6,105,260,132200,0.0019628567114600634,8.0,2150573680378.0,8271437232.223077,2152161004117.0,8214354977.545801,20715452288785.64,79674816495.32939
|
||||
7,107,0,23092,0.0,,,,,,,
|
||||
8,114,0,120,0.0,,,,,,,
|
||||
9,115,130,1573,0.07633587786259542,0.0,4406301766057.0,34424232547.32031,7940457046678.0,56315298203.39007,21102853071855.844,164866039623.87378
|
||||
10,117,0,1839528,0.0,,,,,,,
|
||||
11,118,1425,1485,0.4896907216494845,0.0,867149560067822.0,608526007065.1383,3362027382573537.0,474728520555.4274,21121295768754.938,14821961942.985922
|
||||
12,119,122,141831,0.0008594393919114073,0.0,15128747139790.0,124006124096.63934,47898774339924.0,231395045120.4058,21121296641155.156,173125382304.55048
|
||||
13,170,0,260,0.0,,,,,,,
|
||||
14,200,51527,141118,0.2674712554179968,43.0,5974724112527930.0,115953269402.99124,8.707510143782531e+16,399049986883.15314,191818188297781.62,3722673322.680956
|
||||
15,360,1713,104101,0.016188784092842155,1.0,1347435910813181.0,786594226977.9224,2855922335929930.0,488025006139.7693,191818197237788.03,111977931837.58788
|
||||
16,450,643,22848,0.027372185092162955,0.0,774652144131685.0,1204746724932.636,803257942748745.0,1188251394598.735,191818197238602.72,298317569577.9202
|
|
Binary file not shown.
18
task_slowdown/h_slowdown_table.csv
Normal file
18
task_slowdown/h_slowdown_table.csv
Normal file
|
@ -0,0 +1,18 @@
|
|||
,priority,n_fsh,n_non,finished%,c_zero_end,s_last,m_last,s_all,m_all,s_slow,m_slow
|
||||
0,0,6626426,17257424,0.2774437957029541,0.0,3.7506706117482584e+16,5663231755.760943,4.2099702663376344e+16,6211184824.091121,955621700576.83,144291.72063921622
|
||||
1,19,0,10,0.0,,,,,,,
|
||||
2,25,79,7497,0.01042766631467793,0.0,24084767122845.0,304870469909.43036,73800261137123.0,283847158219.70386,955623663255.364,12096502066.523596
|
||||
3,101,25000,0,1.0,0.0,851579182155103.0,34063167286.20412,6.5093086443320904e+16,12249648080.349836,1012710576185.66,40508423.0474264
|
||||
4,103,24112,4986108,0.004812563120980716,0.0,6553604470468504.0,272635180566.95667,8271087007558414.0,92893899318.92467,1012712234179.6411,42129637.83091942
|
||||
5,105,10000,690645,0.014272563138251183,0.0,6117631322603891.0,611763132260.3892,2.572798470086729e+16,393761531411.06055,1012750158700.6492,101275015.87006491
|
||||
6,107,0,146547,0.0,,,,,,,
|
||||
7,115,23,426,0.051224944320712694,0.0,664857822015.0,28906861826.739132,664857822015.0,28906861826.739132,1012750158723.6492,44032615596.6804
|
||||
8,116,134,12809,0.010353086610523062,0.0,84843310887.0,633159036.4701493,6231571054261.0,42105209826.08784,1012750568167.0226,7557840060.947929
|
||||
9,117,1,2013415,4.966683487168077e-07,0.0,655786059.0,655786059.0,655786059.0,655786059.0,1012750568168.0226,1012750568168.0226
|
||||
10,118,506,49926,0.010033312182741116,0.0,100029587052190.0,197686930933.18182,194769739643864.0,139121242602.76,1012750618649.7072,2001483436.0666149
|
||||
11,119,3970,2729917,0.0014521448765073318,0.0,555431711526397.0,139907232122.51813,4055258837608037.0,15558074672.488085,1246603528452.2136,314005926.56227046
|
||||
12,200,57852,2082619,0.027027696240687214,21.0,1.7285546693220572e+16,298799424256.18964,1.0022405625008838e+17,470783023233.28876,17132799512734.742,296159023.5563482
|
||||
13,201,0,363,0.0,,,,,,,
|
||||
14,220,0,360,0.0,,,,,,,
|
||||
15,360,1862,40210,0.044257463396082906,1.0,1561694630087215.0,838718920562.4141,3152187766522115.0,397301205762.80756,17132832150876.488,9201306203.478243
|
||||
16,450,91,16906,0.005353885979878802,0.0,133822527771016.0,1470577228252.923,141139695870169.0,1411396958701.69,17132832151004.443,188272880780.26862
|
|
Binary file not shown.
File diff suppressed because one or more lines are too long
|
@ -9,39 +9,54 @@ findspark.init()
|
|||
import pyspark
|
||||
import pyspark.sql
|
||||
import sys
|
||||
import gzip
|
||||
|
||||
from pyspark.sql.functions import col, lag, when, concat_ws, last, first
|
||||
from pyspark import AccumulatorParam
|
||||
from pyspark.sql.functions import lit
|
||||
from pyspark.sql import Window
|
||||
from pyspark.sql.types import LongType
|
||||
from pyspark.sql.types import ByteType
|
||||
|
||||
cluster=sys.argv[1]
|
||||
|
||||
spark = pyspark.sql.SparkSession.builder \
|
||||
.appName("task_slowdown") \
|
||||
.config("spark.driver.maxResultSize", "32g") \
|
||||
.config("spark.local.dir", "/run/tmpfiles.d/spark") \
|
||||
.config("spark.driver.memory", "124g") \
|
||||
.config("spark.driver.memory", "75g") \
|
||||
.getOrCreate()
|
||||
sc = spark.sparkContext
|
||||
|
||||
df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz")
|
||||
# df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_test.json")
|
||||
#df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_test.json")
|
||||
|
||||
df.printSchema()
|
||||
try:
|
||||
df["collection_type"] = df["collection_type"].cast(ByteType())
|
||||
except:
|
||||
df = df.withColumn("collection_type", lit(None).cast(ByteType()))
|
||||
|
||||
df.show()
|
||||
|
||||
non = sc.accumulator(0)
|
||||
tot = sc.accumulator(0)
|
||||
class NonPriorityAcc(pyspark.AccumulatorParam):
|
||||
|
||||
def zero(self, value):
|
||||
return {}
|
||||
|
||||
def addInPlace(self, v1, v2):
|
||||
for key in v2:
|
||||
if key in v1:
|
||||
v1[key] += v2[key]
|
||||
else:
|
||||
v1[key] = v2[key]
|
||||
return v1
|
||||
|
||||
non = sc.accumulator({}, NonPriorityAcc())
|
||||
|
||||
def for_each_task(ts):
|
||||
global none
|
||||
global tot
|
||||
global non
|
||||
|
||||
ts = sorted(ts, key=lambda x: x["time"])
|
||||
|
||||
last_term = None
|
||||
priority = None
|
||||
priority = -1
|
||||
responding = False
|
||||
|
||||
resp_burst_start = None
|
||||
|
@ -51,13 +66,14 @@ def for_each_task(ts):
|
|||
resp_time_last = 0
|
||||
|
||||
for i,t in enumerate(ts):
|
||||
if t["priority"] is not None and priority is None:
|
||||
if t["priority"] is not -1 and priority is -1:
|
||||
priority = t["priority"]
|
||||
if responding:
|
||||
resp_burst_type.append(t["type"])
|
||||
#if responding:
|
||||
# resp_burst_type.append(t["type"])
|
||||
if t["type"] >= 4 and t["type"] <= 8:
|
||||
last_term = t["type"]
|
||||
if responding:
|
||||
resp_burst_type.append(t["type"])
|
||||
# This response time interval has ended, so record the time delta and term
|
||||
resp_time.append((t["time"] - resp_burst_start, resp_burst_type))
|
||||
responding = False
|
||||
|
@ -65,12 +81,10 @@ def for_each_task(ts):
|
|||
resp_burst_start = t["time"]
|
||||
resp_burst_type = [t["type"]]
|
||||
responding = True
|
||||
|
||||
|
||||
tot.add(1)
|
||||
|
||||
if last_term != 6:
|
||||
non.add(1)
|
||||
return (priority, resp_time) if last_term == 5 else None
|
||||
non.add({priority: 1})
|
||||
return (priority, resp_time) if last_term == 6 else None
|
||||
|
||||
|
||||
def cleanup(x):
|
||||
|
@ -78,14 +92,13 @@ def cleanup(x):
|
|||
"time": int(x.time),
|
||||
"type": 0 if x.type is None else int(x.type),
|
||||
"id": x.collection_id + "-" + x.instance_index,
|
||||
"priority": 0 if x.priority is None else int(x.priority)
|
||||
"priority": -1 if x.priority is None else int(x.priority)
|
||||
}
|
||||
|
||||
|
||||
df2 = df.rdd \
|
||||
.filter(lambda x: x.collection_type is None or x.collection_type == 0) \
|
||||
.filter(lambda x: x.type is not None and x.time is not None
|
||||
and x.instance_index is not None and x.collection_id is not None) \
|
||||
.filter(lambda x: x.time is not None and x.instance_index is not None and x.collection_id is not None) \
|
||||
.map(cleanup) \
|
||||
.groupBy(lambda x: x["id"]) \
|
||||
.mapValues(for_each_task) \
|
||||
|
@ -94,6 +107,12 @@ df2 = df.rdd \
|
|||
.groupBy(lambda x: x[0]) \
|
||||
.mapValues(lambda x: [e[1] for e in x])
|
||||
|
||||
a = {"val": df2.collect(), "tot": tot.value, "non": non.value}
|
||||
with open(cluster + "_state_changes.json", "w") as out:
|
||||
json.dump(a, out)
|
||||
val = df2.collect()
|
||||
val2 = {}
|
||||
for e in val:
|
||||
val2[e[0]] = e[1]
|
||||
|
||||
a = {"val": val2, "non": non.value}
|
||||
|
||||
with gzip.open(cluster + "_state_changes.json.gz", "wt") as out:
|
||||
json.dump(a, out, separators=(',', ':'))
|
||||
|
|
3762
task_slowdown/task_slowdown_table.ipynb
Normal file
3762
task_slowdown/task_slowdown_table.ipynb
Normal file
File diff suppressed because it is too large
Load diff
127
task_slowdown/task_slowdown_table.py
Normal file
127
task_slowdown/task_slowdown_table.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# # Task slowdown
|
||||
|
||||
import json
|
||||
import sys
|
||||
import gzip
|
||||
import pandas
|
||||
import seaborn as sns
|
||||
import matplotlib as mpl
|
||||
import matplotlib.pyplot as plt
|
||||
from IPython.display import display, HTML
|
||||
|
||||
columns = ["priority", "n_fsh", "n_non", "finished%", "c_zero_end", "s_last", "m_last", "s_all", "m_all", "s_slow", "m_slow"]
|
||||
CLUSTERS = "abcdefgh"
|
||||
DIR = "/home/claudio/google_2019/thesis_queries/task_slowdown/"
|
||||
|
||||
df = {}
|
||||
|
||||
for cluster in CLUSTERS:
|
||||
wc = 0
|
||||
tc = {}
|
||||
s_slowdown = 0
|
||||
|
||||
# Loading
|
||||
print("Loading cluster " + cluster + "...")
|
||||
data = None
|
||||
|
||||
with gzip.open(DIR + cluster + "_state_changes.json.gz", "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
print("Done loading")
|
||||
|
||||
# Computation
|
||||
priorities = sorted(set(data["val"].keys()).union(set(data["non"].keys())), key=lambda x: int(x))
|
||||
table = {}
|
||||
for c in columns:
|
||||
table[c] = []
|
||||
|
||||
def add(col, val):
|
||||
table[col].append(val)
|
||||
|
||||
def empty_row():
|
||||
add("m_last", None)
|
||||
add("m_all", None)
|
||||
add("m_slow", None)
|
||||
add("s_last", None)
|
||||
add("s_all", None)
|
||||
add("s_slow", None)
|
||||
add("c_zero_end", None)
|
||||
|
||||
for priority in priorities:
|
||||
print("Priority " + priority)
|
||||
add("priority", priority)
|
||||
|
||||
n_not_finished = 0 if priority not in data["non"] else data["non"][priority]
|
||||
n_finished = 0 if priority not in data["val"] else len(data["val"][priority])
|
||||
|
||||
add("n_fsh", n_finished)
|
||||
add("n_non", n_not_finished)
|
||||
add("finished%", n_finished / (n_not_finished + n_finished))
|
||||
|
||||
if n_finished == 0:
|
||||
empty_row()
|
||||
else:
|
||||
s = 0
|
||||
c = 0
|
||||
ss = 0
|
||||
sum_zero_end = 0
|
||||
for task in data["val"][priority]:
|
||||
|
||||
idx_last = len(task) - 1
|
||||
t = 0
|
||||
|
||||
while idx_last >= 0 and task[idx_last][1][-1] != 6:
|
||||
idx_last -= 1
|
||||
t += 1
|
||||
|
||||
if idx_last < 0:
|
||||
#print("weird")
|
||||
wc += 1
|
||||
n_finished -= 1
|
||||
n_not_finished += 1
|
||||
continue
|
||||
|
||||
if t in tc:
|
||||
tc[t] += 1
|
||||
else:
|
||||
tc[1] = 1
|
||||
|
||||
last_slice = task[idx_last]
|
||||
|
||||
s += last_slice[0]
|
||||
|
||||
local_sum = 0
|
||||
for exec_slice in task:
|
||||
local_sum += exec_slice[0]
|
||||
c += 1
|
||||
|
||||
if last_slice[0] > 0:
|
||||
s_slowdown += (local_sum / last_slice[0])
|
||||
else:
|
||||
sum_zero_end += 1
|
||||
|
||||
ss += local_sum
|
||||
|
||||
if n_finished == 0:
|
||||
empty_row()
|
||||
continue
|
||||
|
||||
add("c_zero_end", sum_zero_end)
|
||||
add("s_last", s)
|
||||
add("m_last", s / n_finished)
|
||||
add("s_all", ss)
|
||||
add("m_all", ss / c)
|
||||
add("s_slow", s_slowdown)
|
||||
add("m_slow", s_slowdown / n_finished)
|
||||
|
||||
df[cluster] = pandas.DataFrame(table, columns=columns)
|
||||
df[cluster].to_csv(DIR + "/" + cluster + "_slowdown_table.csv")
|
||||
|
||||
print("Done csv")
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in a new issue