From 9f26534e80721af08244009ba749abb57f79c231 Mon Sep 17 00:00:00 2001 From: "Claudio Maggioni (maggicl)" Date: Fri, 19 Feb 2021 18:47:56 +0000 Subject: [PATCH] machine time waste --- .../machine_time_waste-checkpoint.ipynb | 895 ++++++++++++++++++ machine_time_waste/b_state_changes.json | 1 + machine_time_waste/d_state_changes.json | 1 + machine_time_waste/g_state_changes.json | 1 + machine_time_waste/h_state_changes.json | 1 + machine_time_waste/machine_time_waste.ipynb | 895 ++++++++++++++++++ machine_time_waste/machine_time_waste.py | 79 ++ machine_time_waste/machine_time_waste_rdd.py | 103 ++ 8 files changed, 1976 insertions(+) create mode 100644 machine_time_waste/.ipynb_checkpoints/machine_time_waste-checkpoint.ipynb create mode 100644 machine_time_waste/b_state_changes.json create mode 100644 machine_time_waste/d_state_changes.json create mode 100644 machine_time_waste/g_state_changes.json create mode 100644 machine_time_waste/h_state_changes.json create mode 100644 machine_time_waste/machine_time_waste.ipynb create mode 100755 machine_time_waste/machine_time_waste.py create mode 100755 machine_time_waste/machine_time_waste_rdd.py diff --git a/machine_time_waste/.ipynb_checkpoints/machine_time_waste-checkpoint.ipynb b/machine_time_waste/.ipynb_checkpoints/machine_time_waste-checkpoint.ipynb new file mode 100644 index 00000000..40fa4661 --- /dev/null +++ b/machine_time_waste/.ipynb_checkpoints/machine_time_waste-checkpoint.ipynb @@ -0,0 +1,895 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "proper-gnome", + "metadata": {}, + "source": [ + "# Temporal impact: machine time waste" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fantastic-harrison", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "import pandas\n", + "from IPython import display\n", + "import findspark\n", + "findspark.init()\n", + "import pyspark\n", + "import pyspark.sql\n", + "import sys\n", + "\n", + "from pyspark.sql.functions import col, lag, when, concat_ws, last, first\n", + "from pyspark.sql import Window\n", + "from pyspark.sql.types import LongType" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "failing-rebecca", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "cluster=\"b\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "transsexual-baptist", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:root:Exception while sending command.\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1207, in send_command\n", + " raise Py4JNetworkError(\"Answer from Java side is empty\")\n", + "py4j.protocol.Py4JNetworkError: Answer from Java side is empty\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1033, in send_command\n", + " response = connection.send_command(command)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1212, in send_command\n", + " \"Error while receiving\", e, proto.ERROR_ON_RECEIVE)\n", + "py4j.protocol.Py4JNetworkError: Error while receiving\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n" + ] + }, + { + "ename": "Py4JError", + "evalue": "An error occurred while calling o26.json", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/claudio/google_2019/instance_events/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_instance_events*.json.gz\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/spark/python/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36mjson\u001b[0;34m(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord, dateFormat, timestampFormat, multiLine, allowUnquotedControlChars, lineSep, samplingRatio, dropFieldIfAllNull, encoding, locale, pathGlobFilter, recursiveFileLookup)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPythonUtils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoSeq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRDD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1303\u001b[0m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1304\u001b[0m return_value = get_return_value(\n\u001b[0;32m-> 1305\u001b[0;31m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m 1306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0mconverted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 334\u001b[0m raise Py4JError(\n\u001b[1;32m 335\u001b[0m \u001b[0;34m\"An error occurred while calling {0}{1}{2}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 336\u001b[0;31m format(target_id, \".\", name))\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mPy4JError\u001b[0m: An error occurred while calling o26.json" + ] + } + ], + "source": [ + "spark = pyspark.sql.SparkSession.builder \\\n", + " .appName(\"machine_time_waste\") \\\n", + " .getOrCreate()\n", + "\n", + "df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "juvenile-absolute", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df.printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "lucky-western", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "normal-settlement", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# .filter(df.collection_type == 0) \\\n", + "df2 = df \\\n", + " .withColumn(\"time\", col(\"time\").cast(LongType())) \\\n", + " .withColumn(\"type\", col(\"type\").cast(LongType())) \\\n", + " .withColumn(\"type\", when(col(\"type\").isNull(), 0).otherwise(col(\"type\"))) \\\n", + " .withColumn(\"id\", concat_ws(\"-\", \"collection_id\", \"instance_index\")) \\\n", + " .where(col(\"time\").isNotNull()) \\\n", + " .where(col(\"type\").isNotNull()) \\\n", + " .where((col(\"instance_index\").isNotNull()) & (col(\"collection_id\").isNotNull())) \\\n", + " .select(\"machine_id\", \"id\", \"time\", \"type\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "typical-homeless", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df2.show()\n", + "print(\"Total: \" + str(df.count()))\n", + "print(\"Filtered: \" + str(df2.count()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "collect-saying", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# my_window = Window.partitionBy(\"machine_id\", \"id\").orderBy(df2.time.asc())\n", + "\n", + "w2 = Window.partitionBy(\"id\").orderBy(df2.time.asc()).rowsBetween(Window.currentRow, Window.unboundedFollowing)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cooperative-appraisal", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# .withColumn(\"prev_time\", lag(df2.time).over(my_window)) \\\n", + "# .withColumn(\"prev_type\", lag(df2.type).over(my_window)) \\\n", + "\n", + "df3 = df2 \\\n", + " .withColumn(\"t3_time\", when((df2.type != 3), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t45678_time\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t45678_type\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.type)) \\\n", + " .withColumn(\"t01_time\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t01_type\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.type)) \\\n", + " .withColumn(\"next_time\", when(df2.type == 3, first(col(\"t45678_time\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type == 0) | (df2.type == 1), first(col(\"t3_time\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_time\"), ignorenulls=True).over(w2)) \\\n", + " .otherwise(None)) \\\n", + " .withColumn(\"next_type\", when(df2.type == 3, first(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type == 0) | (df2.type == 1), 3) \\\n", + " .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_type\"), ignorenulls=True).over(w2)) \\\n", + " .otherwise(None)) \\\n", + " .withColumn(\"last_term_type\", last(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n", + " .withColumn(\"time_delta\", col(\"next_time\") - col(\"time\")) \\\n", + " .select(\"machine_id\", \"id\", \"time\", \"type\", \"last_term_type\", \"time_delta\", \"t01_time\", \"t01_type\", \"t3_time\", \"t45678_time\", \"t45678_type\", \"next_time\", \"next_type\") \\" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ideal-angle", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df4 = df3.where(df3.next_type.isNotNull()).groupby(\"type\", \"next_type\", \"last_term_type\").sum(\"time_delta\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "working-difficulty", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# df3.orderBy(df3.machine_id, df3.time).show(n=100)\n", + "# df3.printSchema()\n", + "df4.show(n=1000000)\n", + "df4.write.csv(\"/home/claudio/google_2019/thesis_queries/machine_time_waste/\" + cluster + \"_state_change.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/machine_time_waste/b_state_changes.json b/machine_time_waste/b_state_changes.json new file mode 100644 index 00000000..77860740 --- /dev/null +++ b/machine_time_waste/b_state_changes.json @@ -0,0 +1 @@ +{"filtered": 2825779668, "total": 2853934132, "data": [[null, {"0-2": 225896859946, "2-3": 7635911895140, "0-9": 474085053477, "9-1": 916972777804, "1-2": 1414164372648749, "3-3": 0, "0-1": 70355910236, "9-9": 1421261885361018, "9-2": 904098862822, "3-10": 592321370313147, "10-10": 4340494867869402, "1-1": 0, "2-9": 289768624002, "9-3": 833368656197, "2-0": 0, "3-0": 0, "1-9": 189323058, "2-10": 2220164949520}], [4, {"0-2": 68162861605468, "2-3": 19758375196601, "3-4": 15478516770658839, "4-0": 1360657109463, "3-7": 8827861924501422, "7-0": 114757513268972, "2-7": 326216199490451, "3-3": 0, "1-7": 64373914649, "0-1": 43098843428, "1-2": 192381556025327, "2-9": 427545587184, "9-3": 1143245866201, "3-10": 518497292772096, "10-4": 1127790492785484, "4-4": 120002369512, "0-9": 2602404086039, "9-2": 18881997548207, "2-4": 409267648953969, "0-0": 0, "3-0": 12092838348994662, "3-8": 169316764089, "8-3": 61414007, "7-3": 214793420685668, "10-10": 911526884634899, "7-4": 90440989359743, "10-0": 548613469728417, "10-7": 574605406649835, "3-5": 1168016622406400, "5-0": 54176, "9-4": 22899893629, "2-0": 439444457500891, "10-5": 106665874554993, "9-1": 104752308950, "9-7": 125334569538, "2-2": 0, "9-9": 14626819973994, "8-0": 64049681192, "8-7": 231667003945, "4-7": 4780483829, "1-9": 13656403350, "2-5": 27571759525793, "8-4": 383518034669, "2-8": 2645764798, "0-7": 595450354117, "2-10": 2591840914895, "7-5": 168590527204, "7-8": 0, "8-5": 1, "0-8": 0, "0-4": 697641242342, "10-8": 309983824, "7-10": 1057667389940, "4-3": 280483139, "7-2": 5346818664, "5-7": 105868737}], [5, {"0-1": 428137301, "1-2": 62930339851407, "2-3": 959136825460, "3-10": 140313976572632, "10-5": 615616764239438, "5-0": 535549, "0-2": 27049260344385, "3-5": 2286111686908689, "5-5": 265291812705, "3-7": 3319147429378778, "7-0": 5175009255922, "3-3": 1103458917880, "0-9": 104850122684, "9-2": 4123085456427, "2-9": 926246008255, "9-3": 1791509627530, "3-4": 1639443562257655, "4-0": 122552, "3-0": 1140942022439354, "2-7": 307214668051189, "9-1": 646360619071, "10-4": 102446885484293, "10-7": 169441151275490, "10-10": 65953000608444, "0-0": 0, "2-4": 259100082283638, "3-8": 54705870407, "8-3": 5897724541, "7-5": 2735541978770, "7-3": 37577024282023, "10-0": 182195425024130, "1-9": 28797699078, "8-0": 1, "7-4": 2666067114811, "9-7": 13797144816, "1-7": 38891478, "2-0": 36666533411727, "8-7": 71, "7-8": 0, "2-8": 964412256, "9-4": 2511713906, "2-10": 1413085478266, "7-10": 616004207413, "3-6": 409060919, "6-0": 1446260283, "10-6": 141965794, "2-5": 6526408472297, "0-5": 129905752964, "5-2": 4713451910, "8-4": 4, "4-7": 257579396, "0-7": 374953512, "0-10": 91411716163, "4-5": 0, "0-8": 0, "1-0": 0, "0-4": 130736967771, "2-2": 0, "5-7": 755, "7-2": 645990022}], [6, {"0-9": 494309455724821, "9-2": 13045628623484845, "2-3": 264735059274316, "3-10": 8046910684379451, "10-6": 38195436431095370, "6-6": 64768866437863, "0-2": 99895104610787, "2-9": 33454771897741, "9-3": 75583337179785, "3-0": 71594210644346503, "3-8": 1604945552768, "8-3": 56439753129, "3-6": 29685031822076998, "6-0": 2087603530928879, "6-3": 1422313872340493, "3-3": 0, "0-1": 82837696718, "1-2": 1567606646529729, "9-9": 4281612865974478, "2-8": 570064024178, "10-0": 9696550665528690, "2-6": 685079858154654, "0-0": 0, "10-10": 2069217222600707, "9-1": 27123037096973, "3-7": 26696093406969, "7-0": 2440565289140, "10-7": 421871126779730, "9-6": 25413120897562, "3-5": 35975631105696, "5-0": 870607, "10-5": 145739145098908, "10-4": 1821858544033683, "4-0": 1493729, "3-4": 243859978825172, "2-4": 418927588324, "2-7": 58621943891, "7-3": 75273041647637, "0-6": 3282331564780, "2-0": 1737572448583918, "0-10": 1649342715838, "0-3": 11213701035, "3-2": 0, "8-6": 6647471862957, "8-0": 47716849717591, "7-6": 14263069317710, "1-9": 1144445209073, "1-6": 61116928120, "7-4": 3110399580690, "4-7": 534770159, "9-7": 90425348685, "6-8": 0, "6-10": 36126148879580, "8-7": 138, "9-4": 40396576178, "2-10": 52640935265983, "7-5": 47601469188, "2-2": 0, "0-4": 11711384236, "6-2": 638942723531, "1-0": 0, "8-4": 11, "10-8": 7109100750, "0-8": 0, "4-3": 45905571, "6-7": 2105, "5-6": 0, "7-10": 11184161416, "8-2": 0, "8-10": 2538801591482, "3-1": 0, "7-8": 0, "9-5": 290231447, "2-5": 193759570, "1-3": 0, "6-9": 97820859484, "1-10": 611202897}], [7, {"0-1": 10360338179859, "1-2": 9335863895407038, "2-9": 259540278835846, "9-3": 577610363314144, "3-5": 7641820911973245, "5-0": 3010979538453, "0-2": 37638179735517085, "2-3": 1016137116691462, "3-7": 140253331128549246, "7-7": 63593894684285, "3-10": 49562179675905095, "10-7": 299745753538135666, "1-7": 287963789300203974, "7-0": 25162824913612697, "10-5": 8558694642143517, "10-4": 26139575124411235, "4-0": 16908744215549, "1-9": 24100505157444, "9-2": 9358959415084227, "0-9": 2455016761777851, "2-4": 1152913661015238, "0-7": 1647867230537041466, "3-4": 35033307392945522, "2-7": 2141860131027723, "0-0": 0, "9-1": 46312318100604, "3-0": 73731187255925981, "2-10": 97170197078464, "3-8": 5673969131922, "8-3": 77076822482, "7-4": 195409486976397, "10-0": 18282560176348287, "7-3": 2659016170219374, "9-7": 17916077709303321, "9-4": 3500620623065, "10-10": 4698494299635380, "3-3": 869357405, "7-1": 1511277153536, "10-6": 40002838501315, "6-0": 14226651713995, "2-8": 539859523276, "8-7": 8112887103476, "9-9": 2696264794133017, "8-4": 838364468924, "3-6": 10298722392104, "7-5": 35378139642309, "8-0": 8558349260038, "7-9": 5782572671303, "1-1": 0, "0-5": 3904228114937, "2-0": 1949818942256528, "7-2": 965898448478, "10-8": 24076433453, "2-5": 56906590673793, "4-7": 1403451520794, "4-5": 4904913190477, "4-3": 1273633864505, "0-10": 13504787578619, "5-7": 1810741544139, "7-10": 59461539748847, "7-6": 89282926736, "0-8": 0, "8-5": 67271271356, "7-8": 0, "0-3": 915915479836, "3-1": 54160611165, "3-2": 6471365, "5-4": 41581624593, "0-4": 7138860275641, "2-6": 1838564277, "6-7": 25024, "8-6": 31, "8-10": 370382416788, "5-3": 800075235, "1-10": 210146173706, "1-0": 0, "9-8": 28458749, "9-5": 539535851, "2-2": 0, "1-3": 0, "2-1": 0, "5-2": 9}], [8, {"0-9": 54079239150, "9-1": 111002160755, "1-7": 12494938306409, "7-1": 39720429047652, "1-8": 933450000000000, "8-8": 0, "7-0": 2495219266, "0-1": 860945070, "9-2": 47019072184, "2-3": 346064823, "3-10": 57996651038, "10-4": 53777973111, "4-0": 184, "0-2": 499, "10-7": 286650877743, "3-7": 19316801603, "3-4": 29198739963, "3-5": 38887716034, "5-0": 57, "3-0": 403741430044, "3-8": 531373117, "8-3": 27526193937, "2-9": 14829517146, "9-3": 10595226901, "3-3": 0, "7-3": 58048682890, "2-8": 350726164, "8-0": 72, "10-6": 39712759919, "6-0": 44, "3-6": 2375041578, "10-0": 191856738324, "6-3": 34303339580, "8-6": 9, "2-6": 25841409777, "9-6": 16425279250, "8-4": 5, "2-7": 30417881849, "7-4": 31838394104, "2-0": 0, "8-7": 5, "1-2": 16706395, "10-5": 8210236128, "6-8": 0}]]} \ No newline at end of file diff --git a/machine_time_waste/d_state_changes.json b/machine_time_waste/d_state_changes.json new file mode 100644 index 00000000..124742d1 --- /dev/null +++ b/machine_time_waste/d_state_changes.json @@ -0,0 +1 @@ +{"filtered": 4202246219, "total": 4244575882, "data": [[null, {"0-9": 32590149422737702, "9-9": 56359901751366520, "9-2": 1265609521726, "2-3": 23427918960909, "3-10": 495603263214110, "10-10": 2964461331561335, "3-3": 0, "0-2": 57580648583, "0-1": 13827272, "1-2": 9544270075088, "9-1": 181400970401, "2-9": 16960782562, "9-3": 95390550232, "1-1": 0, "2-0": 0, "3-0": 0, "1-9": 2242129700, "1-0": 0, "0-3": 4933925145454, "2-2": 0, "2-10": 430250408671}], [4, {"1-7": 424301670529, "7-0": 26776079350402, "0-1": 19681271109, "1-2": 30993641596772, "2-3": 15643826766000, "3-10": 1481543207069574, "10-7": 1654106496545777, "0-2": 73923645908735, "3-4": 18720044202961948, "4-4": 3512477157, "4-0": 255478885170, "2-4": 290044901342868, "3-7": 13931782597669522, "2-7": 306216387028716, "3-3": 4748859672, "2-9": 570030662154, "9-3": 3344636985570, "10-4": 4064356752349671, "0-9": 9072091114772, "9-2": 28926502372081, "10-5": 253073807695717, "5-0": 77909, "0-0": 0, "3-5": 1309971852188246, "3-0": 4789226404161049, "3-8": 175337432787, "8-3": 1765498639, "7-3": 186150849509338, "10-0": 1675847171390269, "9-4": 64616977636, "9-7": 41094129806, "9-1": 490398173814, "7-4": 41123722988156, "10-10": 1616536253385216, "8-0": 23, "0-7": 1499541178994, "4-7": 12290919887, "7-5": 1751614479666, "8-7": 726, "9-9": 8051382241747, "1-9": 36663108524, "2-8": 12920227598, "2-0": 38815355626660, "2-2": 0, "2-5": 13313365215042, "8-4": 546, "7-10": 3737347489518, "10-6": 204958383512, "6-0": 168268158526, "4-1": 8374142633, "1-4": 85412791037, "7-1": 19736019472, "2-10": 4373529992144, "0-4": 1154317347668, "10-8": 1616726719, "0-3": 0, "3-2": 0, "7-2": 8916968857, "5-7": 119831085, "5-4": 70647647, "3-6": 68753491510, "4-3": 101365820293, "7-8": 0, "0-10": 221075873097, "0-5": 344659528050, "8-2": 1}], [5, {"0-9": 257847515577, "9-2": 40587170085057, "2-3": 805920254409, "3-5": 4308621093939302, "0-2": 1912341272332, "3-7": 4441338500154059, "7-0": 11650222521483, "5-0": 474786, "3-3": 402656603445, "0-1": 8540192615, "1-2": 19176031107930, "9-1": 430908485393, "5-5": 20721388545, "3-6": 1125958858997, "6-0": 6360476231566, "3-10": 141721256329414, "10-5": 450755233883099, "3-0": 1928874592224663, "3-8": 122448047839, "8-3": 844190146, "7-5": 16610611875641, "7-3": 116218757455682, "3-4": 1889064458062999, "4-0": 107396, "7-4": 1731042707612, "2-4": 61351885334609, "2-9": 52417367132, "9-3": 110061779012, "10-6": 268568343438, "10-7": 215364157611881, "10-0": 426284549576805, "2-7": 146716807056670, "8-7": 40296915839, "10-10": 342210658247735, "10-4": 78971190042187, "2-0": 22919957898591, "2-5": 39062554698824, "8-0": 210593003905, "1-9": 739060361769, "2-8": 9620241901, "7-10": 3613146193443, "0-0": 0, "0-5": 1739010978887, "0-7": 2418582704776, "7-8": 0, "4-5": 0, "5-7": 14365969, "1-0": 0, "4-7": 72534230, "5-2": 36522089581, "7-2": 1101012393, "9-7": 5742271806, "8-4": 9, "4-3": 37445461529, "0-3": 0, "3-2": 1500558263, "1-7": 1309955, "8-5": 5, "10-8": 3666385, "2-10": 1741444210299, "9-4": 1780591329, "7-6": 80907501, "0-10": 98225067802, "5-4": 13623469, "5-3": 749227668}], [6, {"0-9": 225469131256944, "9-2": 8281043096213150, "2-3": 238180886839397, "3-6": 31673284193316229, "6-6": 14891999358072, "3-10": 6817723320993746, "10-6": 23233802030236726, "0-2": 88886401778938, "6-0": 1218554217361137, "10-10": 2574280710738969, "10-4": 305499111833752, "4-0": 1577302, "0-1": 3007048206, "1-2": 2130113696751439, "2-9": 21251198783900, "9-3": 34797239586712, "3-0": 54164363658777374, "3-8": 1604230747427, "8-3": 43903777036, "6-3": 1416353697770830, "3-3": 0, "9-1": 82497614655206, "3-4": 224822289899324, "2-6": 213492224793322, "10-0": 3809816846084771, "9-6": 44921739310914, "8-6": 346946880, "9-9": 1716966486480888, "8-0": 29714792614073, "0-0": 0, "3-7": 89877390559613, "7-0": 390220368655, "1-9": 5743584080385, "10-7": 36448649080607, "3-5": 175471587034288, "5-0": 280389, "7-6": 9669539444442, "1-0": 0, "10-5": 32791604260092, "2-4": 543400471810, "2-7": 339835139077, "7-4": 1057294398377, "7-3": 12112470895830, "9-4": 90516968125, "0-6": 1761432432410, "2-0": 1725308103343985, "7-5": 364670169700, "2-8": 151068857108, "6-10": 11941691814928, "9-7": 66993680498, "2-10": 22473378560772, "1-7": 985061, "10-8": 19692919917, "0-3": 0, "3-2": 0, "8-4": 44, "5-6": 0, "2-2": 0, "8-7": 122, "0-7": 3505485885, "4-7": 3294592400, "6-8": 0, "0-10": 276581307657, "5-7": 30712309, "6-2": 163004673910, "4-6": 0, "8-2": 4841496319, "4-5": 0, "0-5": 545904904, "6-9": 5707801894, "4-3": 508970094, "7-10": 52658909965, "0-8": 0, "8-5": 2, "8-10": 1425281712581, "2-5": 4099011}], [7, {"0-9": 1831904695103770, "9-2": 8326746677774456, "2-3": 1827431214746945, "3-10": 38076569318216319, "10-7": 187152440379309913, "7-7": 90189197906816, "1-7": 5946786266341670, "7-0": 23876756663769052, "0-1": 58370634236034, "1-9": 117128486558470, "2-9": 495100707513169, "9-3": 611676470011117, "3-4": 33582214912482723, "4-0": 33719432684394, "0-2": 51539660986659955, "1-2": 13182632205182605, "3-7": 154127880491366977, "9-1": 77274957783295, "2-7": 2145268932549102, "0-7": 1846616379548500981, "10-6": 26161663200398, "6-0": 19010861322829, "7-1": 521886008084419, "2-4": 608543671565929, "3-5": 10259485650539681, "5-0": 477451306018, "9-7": 36416966000387360, "0-0": 0, "10-4": 13705444507942719, "3-6": 21680038514165, "3-0": 70432592658802717, "3-8": 5934980882663, "8-3": 58798263006, "7-4": 162598869769307, "10-5": 11508725740965884, "7-3": 2578869281392157, "10-0": 41070308205481867, "3-3": 168352861067, "10-10": 21805513962165091, "0-4": 9550272568061, "8-4": 875883894, "9-9": 4870819450119518, "7-5": 51941669521129, "9-4": 5574699427297, "4-7": 453315979415, "8-7": 1879605334854, "8-0": 2338137913687, "5-7": 8891952928529, "2-10": 123469556707805, "2-8": 1018819509907, "10-8": 100860546786, "7-10": 303236773905027, "1-0": 0, "1-1": 0, "2-0": 1226060047835538, "2-5": 73090205607890, "4-5": 21201982, "7-2": 998830166561, "7-8": 0, "8-10": 955591792991, "4-3": 1323108357474, "0-10": 9190121353830, "7-9": 367539793035, "2-6": 6528332541, "6-7": 45679, "5-2": 172549254466, "8-6": 44, "3-1": 4634204139, "0-3": 39117068772, "3-2": 22925083970, "8-5": 91753345553, "7-6": 27815318686, "0-5": 1179232600922, "2-2": 0, "5-4": 29922304071, "0-8": 0, "4-1": 479827070, "1-4": 40529670110, "9-8": 403213838, "10-1": 43840857571, "5-3": 49724555545, "2-1": 0, "1-3": 0, "8-2": 29867610986, "5-8": 0, "5-10": 1453207818, "8-9": 745790804}], [8, {"0-1": 475762317, "1-7": 407710084463217, "7-1": 660572413481375, "1-8": 1423689799085488, "8-8": 0, "7-0": 9847733857, "0-2": 5367, "2-3": 12435431, "3-0": 23533291597735, "3-8": 736961178, "8-3": 831283505, "3-7": 3876679813121, "7-3": 203721568748, "3-3": 0, "1-2": 162479428, "2-9": 660534921, "9-3": 1324586513, "3-10": 208497633631, "10-0": 1850076061897, "2-8": 2141611513, "8-0": 96540366, "3-6": 1569566230, "6-8": 0, "10-8": 105914394, "6-3": 302901173382, "10-6": 7294961755, "6-0": 35209981175, "3-5": 2310600339025, "5-0": 45, "8-7": 1, "0-9": 5048538458, "9-2": 20413617352, "10-5": 323090303711, "10-4": 164330906051, "4-0": 39, "3-4": 187690948397, "10-7": 491645438966, "2-6": 4080179109, "9-1": 130655510, "2-7": 2348369238, "9-6": 811626039, "7-4": 10061833223, "2-5": 197611009875}]]} \ No newline at end of file diff --git a/machine_time_waste/g_state_changes.json b/machine_time_waste/g_state_changes.json new file mode 100644 index 00000000..2a9c2d4f --- /dev/null +++ b/machine_time_waste/g_state_changes.json @@ -0,0 +1 @@ +{"filtered": 3045860218, "total": 3058287288, "data": [[null, {"0-2": 3928817853, "0-1": 525907, "1-2": 203083890868, "2-9": 11022003839, "9-3": 68685537735, "3-10": 111423178103584, "10-10": 1729554268203665, "0-9": 5715386, "9-2": 609601825127, "2-3": 49312298286697, "3-3": 0, "3-0": 0, "2-0": 0, "2-2": 0, "0-3": 13881349412889, "9-9": 773721078, "2-10": 1949290862129}], [4, {"0-2": 64489003701097, "2-3": 23982089270499, "3-4": 2187921658839158, "4-0": 1493115519497, "2-4": 475389258054358, "3-7": 4490299848189010, "7-0": 11824970094317, "0-0": 0, "3-10": 273235642268267, "10-4": 399618592960286, "3-5": 216842491388005, "5-0": 717844843, "3-0": 1373339746921876, "3-8": 101003902657, "8-3": 37512610, "7-3": 57259029048939, "2-9": 1108094196743, "9-3": 740077854911, "10-7": 795603533778986, "10-5": 55585815016335, "3-3": 0, "2-7": 405464217976705, "8-4": 690, "8-7": 739, "7-4": 14007559795697, "10-10": 93875455758563, "4-4": 1351897898, "10-0": 352702829709343, "0-9": 3574815721918, "9-2": 7374714564603, "2-2": 0, "9-4": 7737108359, "2-0": 41022832856688, "2-10": 27486729641494, "1-7": 9130337, "0-1": 650111, "1-2": 5766729864, "8-0": 1, "4-7": 8352998722, "2-8": 1271602175, "9-7": 3818118321, "7-5": 351551174624, "2-5": 10746780065219, "7-9": 55250319, "9-9": 533336190080, "1-9": 178469832, "4-3": 4985120077, "0-7": 1693906688, "7-10": 511957676270, "10-8": 235176772, "7-2": 9616346758, "7-8": 0, "0-3": 853268325276, "0-4": 551526385083}], [5, {"0-1": 308238446, "1-2": 28229086541982, "2-3": 961584971406, "3-10": 83586807314651, "10-5": 245432332328491, "5-0": 614846028, "0-2": 255389284836446, "3-5": 1099009787109853, "5-5": 411306302867, "3-3": 44889884160, "2-9": 1176305629513, "9-3": 1955579069324, "3-7": 188084133091615, "7-0": 6548168992554, "0-9": 1000405637297, "9-2": 3245965826254, "10-7": 50925478942177, "3-0": 430827796112856, "3-8": 37987983848, "8-3": 294328200, "7-4": 427089562277, "4-0": 17573, "3-4": 136331480261697, "7-3": 21168532668606, "7-5": 4204713531135, "3-2": 252335314700, "10-0": 107786188213269, "10-4": 8478014236205, "10-10": 55912598441367, "2-10": 1426236211845, "5-2": 64738751720, "0-0": 0, "2-5": 8477806926721, "0-5": 333639074540, "0-7": 22035655813, "2-0": 61422182748730, "2-4": 4334658546777, "2-7": 10968850548446, "2-8": 617045315, "8-0": 3, "8-7": 61, "5-7": 52500862, "7-2": 3404833937, "8-4": 19, "0-3": 0, "3-1": 0, "5-3": 633615786, "7-10": 240862342044, "0-10": 53768425027, "1-5": 10506823596, "1-0": 0, "5-8": 0, "8-5": 3, "9-7": 72829766, "4-7": 15992500, "4-5": 0, "2-2": 0, "1-10": 4942780105, "7-8": 0, "7-1": 0, "9-9": 0}], [6, {"3-0": 37370031575464831, "0-2": 91040495861267, "2-0": 6799216756021671, "2-3": 83441252814195, "3-8": 1875946300597, "8-3": 8226137243, "3-6": 13275390418683022, "6-3": 1293534324750896, "3-3": 0, "3-10": 4324117196671663, "10-0": 5320759023340283, "2-8": 316010080988, "6-0": 986199907679934, "10-6": 15230450516943898, "2-9": 16724385510198, "9-3": 39752122225196, "0-9": 417029463702576, "9-2": 629498876230291, "2-6": 954603713788582, "0-0": 0, "10-1": 159824677924571, "1-3": 2272318260838, "3-1": 12073426728417, "1-6": 3467776358582, "6-2": 28787450256467, "0-1": 49686853074583, "1-2": 183586059116655, "10-10": 1895728817358295, "6-6": 27876716501246, "9-6": 12820939423858214, "9-1": 91047974122901, "3-4": 27806058256788, "4-0": 205118, "3-7": 602493589825, "7-3": 48491078034302, "1-1": 0, "6-8": 0, "8-10": 67372182124, "8-6": 1042085280, "1-9": 381323813287173, "1-0": 0, "2-4": 67382118841, "8-0": 10758265847, "9-9": 0, "2-10": 145612605218616, "6-10": 49958209400474, "7-6": 7933111714223, "10-7": 1437914416062, "7-0": 776562343617, "10-4": 132833340767042, "0-6": 30235343407877, "10-8": 50678131539, "3-5": 11610834712653, "5-0": 21286, "10-5": 68735918811488, "6-1": 67531203340, "2-2": 0, "0-3": 2135172250871, "3-2": 0, "0-10": 2119215955239, "7-5": 83350700663, "8-1": 0, "4-7": 461723185, "0-7": 8264389140, "2-7": 198873975, "9-7": 367121785, "7-4": 119819445458, "8-7": 17, "2-1": 0, "1-10": 44176953678, "7-10": 99603862156, "0-8": 0, "4-3": 588011, "1-8": 0, "8-4": 4, "7-1": 0, "2-5": 160402834354, "9-4": 616054193, "1-4": 1827315100, "6-7": 0, "0-4": 1832419721}], [7, {"0-7": 3744747332563720776, "7-7": 44978822023688, "0-1": 5220041070164, "1-2": 291555517115352, "2-3": 179672482897641, "3-10": 11317016309790697, "10-7": 50091377470667363, "0-2": 99074333719362968, "3-7": 101568270438804683, "7-0": 4311330088854912, "3-4": 39359807692801770, "4-0": 23396764639817, "0-0": 0, "1-7": 181895693230549, "2-9": 44165899865868, "9-3": 53458266415726, "2-7": 3846972930123630, "0-9": 13345488643867680, "9-1": 806117049540, "7-1": 34748670919651, "10-0": 13569735120343469, "7-3": 2820608321343442, "9-2": 3673623432387626, "1-9": 5821235670307, "10-4": 1686186135631280, "10-5": 2006394544880513, "5-0": 112453695043, "3-5": 11950891778847821, "3-0": 55581563825063297, "3-8": 5245507299485, "8-3": 7923298077, "7-4": 477246383660765, "3-3": 5379453685, "2-0": 3171906104378798, "2-4": 3258113551593669, "10-10": 4527332360985680, "8-7": 1100330707426, "9-7": 58134846545268397, "8-0": 44547431171, "9-9": 2273001573868974, "8-4": 15014645, "2-8": 188632277880, "2-10": 503444651357293, "7-5": 44889646255397, "4-7": 528460121950, "10-6": 125256467484, "6-0": 199075200505, "2-5": 152248143295257, "10-8": 31594014890, "7-2": 4105624628529, "7-10": 81699998006415, "0-10": 2519008891088, "9-4": 47332783292, "0-5": 715995232730, "4-3": 109528005576, "8-6": 20, "6-7": 75896, "3-6": 83951812874, "8-5": 25, "7-9": 152167859348, "2-2": 0, "7-8": 0, "5-7": 15806792296, "0-3": 22601754627939, "3-2": 3988228159, "0-6": 698216877, "3-1": 0, "1-0": 0, "0-4": 1695141982516, "5-2": 738464830, "7-6": 399083903, "5-3": 10913538148, "5-4": 19605798, "2-6": 55808838, "10-1": 4457454340, "10-3": 356936615, "0-8": 0}], [8, {"0-9": 367991772280, "9-2": 437971622577, "2-3": 1382179062, "3-10": 125067619150, "10-7": 194946728524, "7-0": 2019613616, "0-2": 5900, "3-7": 1568086296627, "3-4": 417550671418, "4-0": 344, "3-5": 171053788900, "5-0": 84, "10-4": 156803713602, "3-0": 4670775410813, "3-8": 901346783, "8-3": 79, "3-3": 0, "0-1": 152919292, "1-7": 10947232314, "7-1": 2970641874, "1-8": 11201400000000, "8-8": 0, "2-4": 261383094, "8-4": 3, "8-7": 5, "2-7": 1934777094, "10-5": 64828891457, "10-0": 677002741473, "7-3": 89391620808, "2-9": 15122062156, "9-3": 22397837993, "7-4": 15016400917, "10-6": 796722336053, "6-0": 151, "3-6": 2572881979468, "6-3": 160952538274, "2-6": 1644356367, "0-3": 0, "3-2": 0, "2-10": 4852334440, "0-7": 251318175, "9-6": 789054974, "8-6": 3, "7-5": 3703200935, "2-0": 460309662620, "10-1": 6196626073, "1-6": 360245487, "6-2": 1709897670, "1-2": 1555668223, "1-3": 244132401, "2-8": 28422206, "8-0": 1, "9-1": 2632147415, "3-1": 732902529, "1-9": 966426564, "6-8": 0}]]} \ No newline at end of file diff --git a/machine_time_waste/h_state_changes.json b/machine_time_waste/h_state_changes.json new file mode 100644 index 00000000..9297cd3b --- /dev/null +++ b/machine_time_waste/h_state_changes.json @@ -0,0 +1 @@ +{"filtered": 2396413341, "total": 2419205700, "data": [[null, {"0-1": 62727698, "1-2": 557147535671, "2-3": 13121936038661, "3-10": 342880932708398, "10-10": 2019936053649937, "0-2": 52291811, "0-9": 2026351956541194, "9-2": 733461379178, "3-3": 0, "2-0": 0, "3-2": 0, "1-9": 497199817996, "2-9": 13671790674, "9-3": 12246984747, "0-3": 17488915068441, "9-9": 158564424420, "3-0": 0, "10-0": 302041119267, "2-2": 0}], [4, {"0-2": 35374159096539, "2-3": 8145869017357, "3-4": 11281543949613373, "4-0": 451134, "3-3": 0, "3-0": 983472712246660, "3-8": 66118667090, "8-3": 9020, "3-7": 10645305035665303, "7-4": 4884965527112, "2-7": 1274394640914289, "7-0": 23895638484083, "0-0": 0, "0-9": 355434119429, "9-2": 3188497469825, "3-5": 481511644894098, "5-0": 22111, "0-3": 1990628727765, "3-2": 0, "2-4": 2260870742059736, "2-0": 110169679433057, "0-7": 1259636729228721, "3-10": 732027424258824, "10-7": 1538861796530491, "2-9": 3333272732, "9-3": 13913448350, "10-0": 1119192160354917, "7-3": 58109928153166, "10-4": 2479089752098986, "0-4": 2304296654184684, "4-4": 2101804209, "0-1": 13117159, "1-2": 27354548234, "10-10": 616263345704717, "0-8": 0, "7-2": 39518, "7-5": 242845957987, "2-10": 27635000398632, "0-10": 27538333299652, "2-2": 0, "1-7": 1786459827, "10-5": 88786275553387, "0-5": 27691702946836, "8-7": 1383137512299, "1-9": 8762056086, "2-5": 25670291863892, "8-4": 492692553076, "8-0": 1196590569491, "2-8": 66759454, "8-2": 1, "4-7": 919528479, "7-8": 0, "9-9": 305441119528, "5-2": 0, "5-3": 0, "4-5": 0, "5-7": 0, "5-4": 0, "4-2": 0, "4-3": 9866504, "8-10": 1266806659, "10-8": 100248233, "7-10": 2632949473}], [5, {"0-9": 644997409360, "9-2": 1991494702754, "2-3": 18010659845, "3-5": 1190653140605529, "5-0": 210854, "0-2": 2655233077718, "3-4": 1347134884341485, "4-0": 114813, "3-3": 0, "3-10": 74337913686335, "10-5": 144971824147636, "0-3": 5515688, "3-2": 0, "2-5": 87305576065833, "3-7": 405794667400694, "7-0": 614679552242, "3-0": 614302283114549, "3-8": 108516598080, "8-3": 14992809550, "7-3": 65691846382313, "2-0": 36410697910361, "2-9": 73093687149, "9-3": 100479176611, "10-7": 200678476746601, "10-0": 181114872876010, "10-10": 68859208448646, "0-1": 459131371, "1-2": 4336603766601, "5-5": 95714884639, "5-3": 0, "5-2": 643001714, "0-5": 87227930667138, "0-7": 34563051982402, "7-5": 10859774025655, "9-1": 475249121820, "0-0": 0, "8-7": 812961280496, "2-7": 36550429949457, "5-7": 0, "7-2": 1001243718, "2-4": 19134370404091, "10-4": 76513148903363, "7-4": 620624861310, "2-2": 0, "0-10": 4097710011034, "2-10": 4650609570462, "0-8": 0, "0-4": 23792368703632, "5-4": 0, "4-7": 311540435, "1-9": 1580303577, "8-4": 10, "4-5": 0, "4-3": 0, "8-2": 1, "7-8": 0, "8-0": 1542886200170, "4-2": 0, "2-8": 1380058, "8-5": 344832940815, "1-10": 39721656221, "8-10": 49302740793, "7-10": 70253755025, "3-1": 0, "1-8": 0, "1-7": 57038}], [6, {"0-9": 1663556472076272, "9-2": 12256029102293603, "2-3": 1664897690457109, "3-6": 33584037052220977, "6-0": 1096489762947879, "0-2": 3583679807472113, "3-10": 4763575479555242, "10-6": 35067269843709850, "2-9": 3591091746991, "9-3": 7338442438017, "9-9": 447584164843305, "0-1": 10297875103, "1-2": 181522813771768, "6-6": 32296346893636, "10-0": 4874755596353808, "6-3": 823692197963364, "0-0": 0, "3-0": 22158726416550009, "2-0": 5858422924260469, "2-7": 78655142489, "7-0": 2015275450119, "3-4": 70903713566446, "4-0": 167540, "2-4": 127203446498408, "9-1": 7075035712659, "3-3": 0, "2-1": 0, "1-6": 1385445474320, "3-5": 261066141288745, "5-0": 137063, "10-5": 249720166537960, "2-6": 5703794852912365, "3-7": 11357653018369, "7-3": 95025107737190, "3-8": 712822006795, "8-3": 96632906671, "1-9": 5676252243270, "10-10": 2120821302493930, "0-3": 1700209747891908, "3-2": 21181694362, "10-4": 173467739018720, "0-4": 140290961045312, "2-10": 372639198057273, "10-7": 51849611259538, "0-10": 390521152935285, "6-8": 0, "0-6": 5765390258751631, "7-6": 18214343440683, "1-3": 0, "9-4": 125222820, "1-7": 0, "7-2": 92375773, "8-0": 50198699069926, "7-4": 773702551052, "3-1": 0, "1-0": 0, "8-2": 1, "8-6": 9735889534253, "2-8": 3894277669, "7-5": 1087144954779, "0-8": 0, "1-10": 4357269619900, "6-2": 64606825286, "8-4": 56, "2-2": 0, "7-8": 0, "8-7": 99, "7-1": 0, "4-7": 1055077473, "0-5": 394945109506, "0-7": 82020939901, "9-6": 317182224, "6-10": 3118622389594, "4-8": 0, "2-5": 320532115090, "4-3": 38534021, "8-10": 2144113634259, "10-8": 1653880931, "9-7": 147274970, "1-8": 0, "8-1": 0}], [7, {"0-2": 111441075350151432, "2-3": 95522511583827, "3-4": 38413776276409027, "4-0": 3418708099971, "3-7": 196552026658903541, "7-0": 5379679980894207, "0-7": 3735720024707141803, "7-7": 28936799819911, "0-3": 3100547867357, "3-2": 938822913107, "2-7": 10577921353631597, "3-0": 53995795934708606, "3-8": 4347227072433, "8-3": 126889711158, "7-3": 3717057353679258, "3-3": 5975839270, "3-10": 17506121923575960, "10-7": 97666809888347721, "0-9": 36142874359688505, "9-7": 171189167404529923, "10-5": 2602967291672415, "5-0": 1005609659608, "3-5": 18490751199024072, "0-0": 0, "1-7": 31038865875207, "0-1": 249960340489, "1-2": 182168896173268, "2-4": 4171577204547049, "9-2": 18463548344044979, "9-9": 6479028352729619, "2-0": 5653657200027014, "0-10": 1000777577265343, "10-0": 23197529552797464, "7-5": 25691016606895, "2-9": 12253806911294, "9-3": 33492723850012, "7-4": 93405645806228, "1-9": 11069340802431, "2-5": 589901339220187, "10-4": 3741666007182503, "0-4": 4234707684169560, "10-10": 8861825525358968, "2-10": 984521905363456, "8-2": 151045208168, "7-2": 4213342210583, "8-4": 11949699389485, "2-8": 12843277236, "7-10": 8892013957183, "10-6": 12805806632249, "6-0": 2772901883052, "8-7": 71333843433629, "0-5": 600945765976543, "2-1": 0, "1-3": 0, "8-0": 58336810055294, "0-8": 0, "8-10": 9626094658327, "7-8": 0, "4-7": 206731019477, "5-2": 4013591246, "5-3": 86403, "3-1": 0, "3-6": 3956661819993, "10-8": 6280594541, "9-4": 6471319272, "1-0": 134389973029, "4-3": 12609932061, "9-1": 34444193596, "1-10": 4061617342950, "8-6": 8, "6-7": 1129, "7-6": 13165329665, "5-7": 1693350702, "2-6": 4318220, "8-5": 1865670975854, "7-1": 955967250, "4-2": 14, "4-8": 0, "8-1": 0, "4-10": 18219648626, "3-9": 76512093721, "4-5": 239900548128, "2-2": 0, "1-4": 29936277363, "7-9": 149487104173, "1-8": 0}], [8, {"1-8": 140668292572, "8-8": 0, "0-3": 0, "3-6": 504671664, "6-2": 0, "2-8": 0, "8-3": 44, "3-2": 0, "2-3": 18350955, "3-0": 11545235217001, "0-2": 6143, "3-8": 319053307, "6-3": 69151092826, "3-3": 0, "2-7": 805194188175, "7-0": 180, "3-7": 1399856426329, "7-3": 147765106078, "2-9": 300087, "9-3": 3670920, "3-4": 648511939984, "4-0": 58, "3-10": 2375173008387, "10-7": 1659831440741, "10-0": 3770682646239, "10-10": 0, "0-7": 117342220748, "8-2": 0, "2-2": 0, "0-9": 9436, "9-2": 12218924, "0-1": 7119, "1-2": 65823130, "2-0": 1478521186473, "3-5": 577142741376, "5-0": 21, "0-5": 24647518271, "7-5": 10151019743, "0-8": 0, "2-10": 133380380739, "1-7": 22566695, "7-1": 4720393044, "0-10": 3477028031, "10-5": 129603925296, "7-2": 0, "2-5": 124779767537, "7-8": 0, "2-6": 0, "6-0": 2}]]} \ No newline at end of file diff --git a/machine_time_waste/machine_time_waste.ipynb b/machine_time_waste/machine_time_waste.ipynb new file mode 100644 index 00000000..40fa4661 --- /dev/null +++ b/machine_time_waste/machine_time_waste.ipynb @@ -0,0 +1,895 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "proper-gnome", + "metadata": {}, + "source": [ + "# Temporal impact: machine time waste" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fantastic-harrison", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "import pandas\n", + "from IPython import display\n", + "import findspark\n", + "findspark.init()\n", + "import pyspark\n", + "import pyspark.sql\n", + "import sys\n", + "\n", + "from pyspark.sql.functions import col, lag, when, concat_ws, last, first\n", + "from pyspark.sql import Window\n", + "from pyspark.sql.types import LongType" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "failing-rebecca", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "cluster=\"b\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "transsexual-baptist", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:root:Exception while sending command.\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1207, in send_command\n", + " raise Py4JNetworkError(\"Answer from Java side is empty\")\n", + "py4j.protocol.Py4JNetworkError: Answer from Java side is empty\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1033, in send_command\n", + " response = connection.send_command(command)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1212, in send_command\n", + " \"Error while receiving\", e, proto.ERROR_ON_RECEIVE)\n", + "py4j.protocol.Py4JNetworkError: Error while receiving\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n", + "ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:36135)\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 3343, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"\", line 5, in \n", + " df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")\n", + " File \"/opt/spark/python/pyspark/sql/readwriter.py\", line 300, in json\n", + " return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n", + " answer, self.gateway_client, self.target_id, self.name)\n", + " File \"/opt/spark/python/pyspark/sql/utils.py\", line 128, in deco\n", + " return f(*a, **kw)\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 336, in get_return_value\n", + " format(target_id, \".\", name))\n", + "py4j.protocol.Py4JError: An error occurred while calling o26.json\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/claudio/python-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2044, in showtraceback\n", + " stb = value._render_traceback_()\n", + "AttributeError: 'Py4JError' object has no attribute '_render_traceback_'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 977, in _get_connection\n", + " connection = self.deque.pop()\n", + "IndexError: pop from an empty deque\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1115, in start\n", + " self.socket.connect((self.address, self.port))\n", + "ConnectionRefusedError: [Errno 111] Connection refused\n" + ] + }, + { + "ename": "Py4JError", + "evalue": "An error occurred while calling o26.json", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mgetOrCreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/home/claudio/google_2019/instance_events/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcluster\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_instance_events*.json.gz\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/spark/python/pyspark/sql/readwriter.py\u001b[0m in \u001b[0;36mjson\u001b[0;34m(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord, dateFormat, timestampFormat, multiLine, allowUnquotedControlChars, lineSep, samplingRatio, dropFieldIfAllNull, encoding, locale, pathGlobFilter, recursiveFileLookup)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_spark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPythonUtils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoSeq\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRDD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1303\u001b[0m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1304\u001b[0m return_value = get_return_value(\n\u001b[0;32m-> 1305\u001b[0;31m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m 1306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0mconverted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_exception\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 334\u001b[0m raise Py4JError(\n\u001b[1;32m 335\u001b[0m \u001b[0;34m\"An error occurred while calling {0}{1}{2}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 336\u001b[0;31m format(target_id, \".\", name))\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mPy4JError\u001b[0m: An error occurred while calling o26.json" + ] + } + ], + "source": [ + "spark = pyspark.sql.SparkSession.builder \\\n", + " .appName(\"machine_time_waste\") \\\n", + " .getOrCreate()\n", + "\n", + "df = spark.read.json(\"/home/claudio/google_2019/instance_events/\" + cluster + \"/\" + cluster + \"_instance_events*.json.gz\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "juvenile-absolute", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df.printSchema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "lucky-western", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "normal-settlement", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# .filter(df.collection_type == 0) \\\n", + "df2 = df \\\n", + " .withColumn(\"time\", col(\"time\").cast(LongType())) \\\n", + " .withColumn(\"type\", col(\"type\").cast(LongType())) \\\n", + " .withColumn(\"type\", when(col(\"type\").isNull(), 0).otherwise(col(\"type\"))) \\\n", + " .withColumn(\"id\", concat_ws(\"-\", \"collection_id\", \"instance_index\")) \\\n", + " .where(col(\"time\").isNotNull()) \\\n", + " .where(col(\"type\").isNotNull()) \\\n", + " .where((col(\"instance_index\").isNotNull()) & (col(\"collection_id\").isNotNull())) \\\n", + " .select(\"machine_id\", \"id\", \"time\", \"type\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "typical-homeless", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df2.show()\n", + "print(\"Total: \" + str(df.count()))\n", + "print(\"Filtered: \" + str(df2.count()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "collect-saying", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# my_window = Window.partitionBy(\"machine_id\", \"id\").orderBy(df2.time.asc())\n", + "\n", + "w2 = Window.partitionBy(\"id\").orderBy(df2.time.asc()).rowsBetween(Window.currentRow, Window.unboundedFollowing)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cooperative-appraisal", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# .withColumn(\"prev_time\", lag(df2.time).over(my_window)) \\\n", + "# .withColumn(\"prev_type\", lag(df2.type).over(my_window)) \\\n", + "\n", + "df3 = df2 \\\n", + " .withColumn(\"t3_time\", when((df2.type != 3), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t45678_time\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t45678_type\", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.type)) \\\n", + " .withColumn(\"t01_time\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.time)) \\\n", + " .withColumn(\"t01_type\", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.type)) \\\n", + " .withColumn(\"next_time\", when(df2.type == 3, first(col(\"t45678_time\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type == 0) | (df2.type == 1), first(col(\"t3_time\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_time\"), ignorenulls=True).over(w2)) \\\n", + " .otherwise(None)) \\\n", + " .withColumn(\"next_type\", when(df2.type == 3, first(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n", + " .when((df2.type == 0) | (df2.type == 1), 3) \\\n", + " .when((df2.type >= 4) | (df2.type <= 8), first(col(\"t01_type\"), ignorenulls=True).over(w2)) \\\n", + " .otherwise(None)) \\\n", + " .withColumn(\"last_term_type\", last(col(\"t45678_type\"), ignorenulls=True).over(w2)) \\\n", + " .withColumn(\"time_delta\", col(\"next_time\") - col(\"time\")) \\\n", + " .select(\"machine_id\", \"id\", \"time\", \"type\", \"last_term_type\", \"time_delta\", \"t01_time\", \"t01_type\", \"t3_time\", \"t45678_time\", \"t45678_type\", \"next_time\", \"next_type\") \\" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ideal-angle", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "df4 = df3.where(df3.next_type.isNotNull()).groupby(\"type\", \"next_type\", \"last_term_type\").sum(\"time_delta\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "working-difficulty", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# df3.orderBy(df3.machine_id, df3.time).show(n=100)\n", + "# df3.printSchema()\n", + "df4.show(n=1000000)\n", + "df4.write.csv(\"/home/claudio/google_2019/thesis_queries/machine_time_waste/\" + cluster + \"_state_change.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/machine_time_waste/machine_time_waste.py b/machine_time_waste/machine_time_waste.py new file mode 100755 index 00000000..b8bf92e4 --- /dev/null +++ b/machine_time_waste/machine_time_waste.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# # Temporal impact: machine time waste + +import pandas +from IPython import display +import findspark +findspark.init() +import pyspark +import pyspark.sql +import sys + +from pyspark.sql.functions import col, lag, when, concat_ws, last, first +from pyspark.sql import Window +from pyspark.sql.types import LongType + +cluster="b" + +spark = pyspark.sql.SparkSession.builder \ + .appName("machine_time_waste") \ + .config("spark.local.dir", "/run/tmpfiles.d/spark") \ + .config("spark.driver.memory", "124g") \ + .getOrCreate() + +df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz") + +df.printSchema() + +df.show() + +# .filter(df.collection_type == 0) \ +df2 = df \ + .withColumn("time", col("time").cast(LongType())) \ + .withColumn("type", col("type").cast(LongType())) \ + .withColumn("type", when(col("type").isNull(), 0).otherwise(col("type"))) \ + .withColumn("id", concat_ws("-", "collection_id", "instance_index")) \ + .where(col("time").isNotNull()) \ + .where(col("type").isNotNull()) \ + .where((col("instance_index").isNotNull()) & (col("collection_id").isNotNull())) \ + .select("time", "type", "id") + +df2.show() +print("Total: " + str(df.count())) +print("Filtered: " + str(df2.count())) + +# my_window = Window.partitionBy("machine_id", "id").orderBy(df2.time.asc()) + +w2 = Window.partitionBy("id").orderBy(df2.time.asc()).rowsBetween(Window.currentRow, Window.unboundedFollowing) + +# .withColumn("prev_time", lag(df2.time).over(my_window)) \ +# .withColumn("prev_type", lag(df2.type).over(my_window)) \ +df3 = df2 \ + .withColumn("t3_time", when((df2.type != 3), None).otherwise(df2.time)) \ + .withColumn("t45678_time", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.time)) \ + .withColumn("t45678_type", when((df2.type < 4) | (df2.type > 8), None).otherwise(df2.type)) \ + .withColumn("t01_time", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.time)) \ + .withColumn("t01_type", when((df2.type != 0) & (df2.type != 1), None).otherwise(df2.type)) \ + .withColumn("next_time", when(df2.type == 3, first(col("t45678_time"), True).over(w2)) \ + .when((df2.type == 0) | (df2.type == 1), first(col("t3_time"), True).over(w2)) \ + .when((df2.type >= 4) | (df2.type <= 8), first(col("t01_time"), True).over(w2)) \ + .otherwise(None)) \ + .withColumn("next_type", when(df2.type == 3, first(col("t45678_type"), True).over(w2)) \ + .when((df2.type == 0) | (df2.type == 1), 3) \ + .when((df2.type >= 4) | (df2.type <= 8), first(col("t01_type"), True).over(w2)) \ + .otherwise(None)) \ + .withColumn("last_term_type", last(col("t45678_type"), True).over(w2)) \ + .withColumn("time_delta", col("next_time") - col("time")) \ + .select("id", "time", "type", "last_term_type", "time_delta", "t01_time", \ + "t01_type", "t3_time", "t45678_time", "t45678_type", "next_time", "next_type") + +df4 = df3.where(df3.next_type.isNotNull()).groupby("type", "next_type", "last_term_type").sum("time_delta") + +# df3.orderBy(df3.machine_id, df3.time).show(n=100) +# df3.printSchema() +df4.show(n=1000000) +df4.write.csv("/home/claudio/google_2019/thesis_queries/machine_time_waste/" + cluster + "_state_change.csv") + +# vim: set ts=2 sw=2 et tw=120: diff --git a/machine_time_waste/machine_time_waste_rdd.py b/machine_time_waste/machine_time_waste_rdd.py new file mode 100755 index 00000000..33f66829 --- /dev/null +++ b/machine_time_waste/machine_time_waste_rdd.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# # Temporal impact: machine time waste + +import json +import pandas +from IPython import display +import findspark +findspark.init() +import pyspark +import pyspark.sql +import sys + +from pyspark.sql.functions import col, lag, when, concat_ws, last, first +from pyspark.sql import Window +from pyspark.sql.types import LongType + +if len(sys.argv) != 2 or len(sys.argv[1]) != 1: + print("usage: " + sys.argv[0] + " {cluster}", file=sys.stderr) + sys.exit(1) + +cluster=sys.argv[1] + +spark = pyspark.sql.SparkSession.builder \ + .appName("machine_time_waste") \ + .config("spark.local.dir", "/tmp/ramdisk/spark") \ + .config("spark.driver.memory", "124g") \ + .getOrCreate() + +df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_instance_events*.json.gz") +# df = spark.read.json("/home/claudio/google_2019/instance_events/" + cluster + "/" + cluster + "_test.json") + +df.printSchema() + +df.show() + +# .filter(df.collection_type == 0) \ +df2 = df \ + .withColumn("time", col("time").cast(LongType())) \ + .withColumn("type", col("type").cast(LongType())) \ + .withColumn("type", when(col("type").isNull(), 0).otherwise(col("type"))) \ + .withColumn("id", concat_ws("-", "collection_id", "instance_index")) \ + .where(col("time").isNotNull()) \ + .where(col("type").isNotNull()) \ + .where((col("instance_index").isNotNull()) & (col("collection_id").isNotNull())) \ + .select("time", "type", "id") + +df2.show() + +total = df.count() +filtered = df2.count() + +print("Total: " + str(total)) +print("Filtered: " + str(filtered)) + +r = df2.rdd + +def for_each_task(ts): + ts = sorted(ts, key=lambda x: x.time) + last_term = None + prev = None + tr = {} + + for i,t in enumerate(ts): + if prev is not None and t.type == prev.type: # remove useless transitions + if (i == len(ts)-1): # if last + tr[str(prev.type) + "-" + str(t.type)] = t.time - prev.time # keep "loops" if last + else: + continue + if t.type >= 4 and t.type <= 8: + last_term = t.type + if prev is not None: + tr[str(prev.type) + "-" + str(t.type)] = t.time - prev.time + prev = t + return {"last_term": last_term, 'tr': tr} + +def sum_values(ds): + dsum = {} + for dt in ds: + d = dt["tr"] + for key in d: + if key not in dsum: + dsum[key] = d[key] + else: + dsum[key] += d[key] + return dsum + +r2 = r \ + .groupBy(lambda x: x.id) \ + .mapValues(for_each_task) \ + .map(lambda x: x[1]) \ + .groupBy(lambda x: x["last_term"]) \ + .mapValues(sum_values) \ + .collect() + +with open(cluster + "_state_changes.json", "w") as out: + json.dump({"filtered": filtered, "total": total, "data": r2}, out) + +# .withColumn("prev_time", lag(df2.time).over(my_window)) \ +# .withColumn("prev_type", lag(df2.type).over(my_window)) \ + +# vim: set ts=2 sw=2 et tw=120: