From e3513f36331b80fa38c77b383ed336e2cc2d704a Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Wed, 31 May 2023 18:26:51 +0200 Subject: [PATCH] hw3: submitted --- Assignment3/MaggioniClaudio_Assignment3.ipynb | 520 ++++++++++++++---- 1 file changed, 402 insertions(+), 118 deletions(-) diff --git a/Assignment3/MaggioniClaudio_Assignment3.ipynb b/Assignment3/MaggioniClaudio_Assignment3.ipynb index 6e05ba5..0200a71 100644 --- a/Assignment3/MaggioniClaudio_Assignment3.ipynb +++ b/Assignment3/MaggioniClaudio_Assignment3.ipynb @@ -24,10 +24,318 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 28, "id": "9f434eb8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " const force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + "const JS_MIME_TYPE = 'application/javascript';\n", + " const HTML_MIME_TYPE = 'text/html';\n", + " const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " const CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " const script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " const cell = handle.cell;\n", + "\n", + " const id = cell.output_area._bokeh_element_id;\n", + " const server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd_clean, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " const id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd_destroy);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " const output_area = handle.output_area;\n", + " const output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " const bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " const script_attrs = bk_div.children[0].attributes;\n", + " for (let i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " const toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " const events = require('base/js/events');\n", + " const OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " const NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " const el = document.getElementById(\"db708107-1acc-4d1f-9761-d8a8b49e0317\");\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error(url) {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (let i = 0; i < css_urls.length; i++) {\n", + " const url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error.bind(null, url);\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (let i = 0; i < js_urls.length; i++) {\n", + " const url = js_urls[i];\n", + " const element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error.bind(null, url);\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n", + " const css_urls = [];\n", + "\n", + " const inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {\n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if (root.Bokeh !== undefined || force === true) {\n", + " for (let i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + "if (force === true) {\n", + " display_loaded();\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " const cell = $(document.getElementById(\"db708107-1acc-4d1f-9761-d8a8b49e0317\")).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(\"db708107-1acc-4d1f-9761-d8a8b49e0317\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.1.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.1.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\nif (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"db708107-1acc-4d1f-9761-d8a8b49e0317\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Import the basic spark library\n", "from pyspark.sql import SparkSession\n", @@ -44,7 +352,14 @@ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", - "import matplotlib as mpl" + "import matplotlib as mpl\n", + "from bokeh.io import output_notebook\n", + "import sys\n", + "\n", + "output_notebook()\n", + "\n", + "# required libraries and versions, uncomment to install\n", + "#!{sys.executable} -m pip install jupyterlab==4.0.1 pyspark==3.4.0 shapely==2.0.1 bokeh==3.1.1 seaborn==0.12.2 shrek==0.0.2" ] }, { @@ -59,7 +374,7 @@ "text": [ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "23/05/31 16:53:08 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + "23/05/31 17:51:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] } ], @@ -423,7 +738,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[Stage 20:=================================================> (184 + 1) / 200]\r" + "[Stage 20:==================================================> (187 + 1) / 200]\r" ] }, { @@ -530,10 +845,58 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 25, "id": "2cba45e6-7ad1-4044-b9f0-81943c1cf547", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " const docs_json = {\"053aed4f-f39a-49e7-bb80-2614ad894c1b\":{\"version\":\"3.1.1\",\"title\":\"Bokeh Application\",\"defs\":[],\"roots\":[{\"type\":\"object\",\"name\":\"Figure\",\"id\":\"p1198\",\"attributes\":{\"width\":900,\"height\":900,\"x_range\":{\"type\":\"object\",\"name\":\"FactorRange\",\"id\":\"p1208\",\"attributes\":{\"factors\":[\"Bronx\",\"Brooklyn\",\"Manhattan\",\"Queens\",\"Staten Island\",\"Unknown\"]}},\"y_range\":{\"type\":\"object\",\"name\":\"FactorRange\",\"id\":\"p1210\",\"attributes\":{\"factors\":[\"Unknown\",\"Staten Island\",\"Queens\",\"Manhattan\",\"Brooklyn\",\"Bronx\"]}},\"x_scale\":{\"type\":\"object\",\"name\":\"CategoricalScale\",\"id\":\"p1212\"},\"y_scale\":{\"type\":\"object\",\"name\":\"CategoricalScale\",\"id\":\"p1214\"},\"title\":{\"type\":\"object\",\"name\":\"Title\",\"id\":\"p1201\",\"attributes\":{\"text\":\"Mean NYC Taxi fares on 2013-01-01\"}},\"renderers\":[{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1253\",\"attributes\":{\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1244\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1245\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1246\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"AAAAAAEAAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAoAAAALAAAADAAAAA0AAAAOAAAADwAAABAAAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAAABkAAAAaAAAAGwAAABwAAAAdAAAAHgAAAB8AAAAgAAAA\"},\"shape\":[33],\"dtype\":\"int32\",\"order\":\"little\"}],[\"pickup_borough\",{\"type\":\"ndarray\",\"array\":[\"Brooklyn\",\"Manhattan\",\"Brooklyn\",\"Queens\",\"Queens\",\"Unknown\",\"Bronx\",\"Bronx\",\"Unknown\",\"Brooklyn\",\"Queens\",\"Manhattan\",\"Manhattan\",\"Manhattan\",\"Queens\",\"Bronx\",\"Bronx\",\"Unknown\",\"Queens\",\"Manhattan\",\"Unknown\",\"Brooklyn\",\"Brooklyn\",\"Manhattan\",\"Bronx\",\"Unknown\",\"Staten Island\",\"Queens\",\"Brooklyn\",\"Staten Island\",\"Staten Island\",\"Staten Island\",\"Unknown\"],\"shape\":[33],\"dtype\":\"object\",\"order\":\"little\"}],[\"dropoff_borough\",{\"type\":\"ndarray\",\"array\":[\"Manhattan\",\"Manhattan\",\"Brooklyn\",\"Queens\",\"Bronx\",\"Bronx\",\"Queens\",\"Brooklyn\",\"Manhattan\",\"Queens\",\"Unknown\",\"Brooklyn\",\"Queens\",\"Bronx\",\"Manhattan\",\"Manhattan\",\"Bronx\",\"Unknown\",\"Brooklyn\",\"Unknown\",\"Queens\",\"Unknown\",\"Bronx\",\"Staten Island\",\"Unknown\",\"Brooklyn\",\"Manhattan\",\"Staten Island\",\"Staten Island\",\"Staten Island\",\"Brooklyn\",\"Unknown\",\"Staten Island\"],\"shape\":[33],\"dtype\":\"object\",\"order\":\"little\"}],[\"mean_fare\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"pqw35aYRM0CTbkAAW/giQCumpMHRNiZAERFty5GdL0AOosd34XBEQBQ7sRM7MTpA09LS0tJSPkAAAAAAAOBDQLgUn+7lWC5AqRLxC5AZOUBhNyH0SMhKQEHDSd2XJzZArFqn/qRQPUAN76fzcfE6QMAKNE3KD0FA/HapJ+P3MUAcUtGT1i0mQHsHWZIeATJA8oIPz2bZQUDjtpksKOZIQImIiIiIiDpAAAAAAAAsQUCH8hrKayhHQBdddNFF50pAXXTRRRedPUCidiVqV6I9QAAAAAAAcEJA6k1vetMbUkAAAAAAAEBBQG/kRm7kxjlAAAAAAAAANEAAAAAAAAAEQAAAAAAAABZA\"},\"shape\":[33],\"dtype\":\"float64\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1254\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1255\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Rect\",\"id\":\"p1250\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"pickup_borough\"},\"y\":{\"type\":\"field\",\"field\":\"dropoff_borough\"},\"width\":{\"type\":\"value\",\"value\":1},\"height\":{\"type\":\"value\",\"value\":1},\"line_color\":{\"type\":\"value\",\"value\":null},\"fill_color\":{\"type\":\"field\",\"field\":\"mean_fare\",\"transform\":{\"type\":\"object\",\"name\":\"LinearColorMapper\",\"id\":\"p1243\",\"attributes\":{\"palette\":[\"#75968f\",\"#a5bab7\",\"#c9d9d3\",\"#e2e2e2\",\"#dfccce\",\"#ddb7b1\",\"#cc7878\",\"#933b41\",\"#550b1d\"],\"low\":2.5,\"high\":72.43478260869566}}}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Rect\",\"id\":\"p1251\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"pickup_borough\"},\"y\":{\"type\":\"field\",\"field\":\"dropoff_borough\"},\"width\":{\"type\":\"value\",\"value\":1},\"height\":{\"type\":\"value\",\"value\":1},\"line_color\":{\"type\":\"value\",\"value\":null},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"field\",\"field\":\"mean_fare\",\"transform\":{\"id\":\"p1243\"}},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Rect\",\"id\":\"p1252\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"pickup_borough\"},\"y\":{\"type\":\"field\",\"field\":\"dropoff_borough\"},\"width\":{\"type\":\"value\",\"value\":1},\"height\":{\"type\":\"value\",\"value\":1},\"line_color\":{\"type\":\"value\",\"value\":null},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"field\",\"field\":\"mean_fare\",\"transform\":{\"id\":\"p1243\"}},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}}],\"toolbar\":{\"type\":\"object\",\"name\":\"Toolbar\",\"id\":\"p1204\",\"attributes\":{\"tools\":[{\"type\":\"object\",\"name\":\"HoverTool\",\"id\":\"p1228\",\"attributes\":{\"renderers\":\"auto\",\"tooltips\":[[\"Pickup Borough\",\"@pickup_borough\"],[\"Dropoff Borough\",\"@dropoff_borough\"],[\"Average Fare Amount\",\"$@mean_fare\"]]}},{\"type\":\"object\",\"name\":\"SaveTool\",\"id\":\"p1229\"},{\"type\":\"object\",\"name\":\"PanTool\",\"id\":\"p1230\"},{\"type\":\"object\",\"name\":\"BoxZoomTool\",\"id\":\"p1231\",\"attributes\":{\"overlay\":{\"type\":\"object\",\"name\":\"BoxAnnotation\",\"id\":\"p1232\",\"attributes\":{\"syncable\":false,\"level\":\"overlay\",\"visible\":false,\"left_units\":\"canvas\",\"right_units\":\"canvas\",\"bottom_units\":\"canvas\",\"top_units\":\"canvas\",\"line_color\":\"black\",\"line_alpha\":1.0,\"line_width\":2,\"line_dash\":[4,4],\"fill_color\":\"lightgrey\",\"fill_alpha\":0.5}}}},{\"type\":\"object\",\"name\":\"ResetTool\",\"id\":\"p1233\"},{\"type\":\"object\",\"name\":\"WheelZoomTool\",\"id\":\"p1234\"}]}},\"toolbar_location\":\"below\",\"left\":[{\"type\":\"object\",\"name\":\"CategoricalAxis\",\"id\":\"p1222\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"CategoricalTicker\",\"id\":\"p1224\"},\"formatter\":{\"type\":\"object\",\"name\":\"CategoricalTickFormatter\",\"id\":\"p1223\"},\"major_label_standoff\":0,\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1225\"},\"major_label_text_font_size\":\"14px\",\"axis_line_color\":null,\"major_tick_line_color\":null}}],\"right\":[{\"type\":\"object\",\"name\":\"ColorBar\",\"id\":\"p1258\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"BasicTicker\",\"id\":\"p1256\",\"attributes\":{\"desired_num_ticks\":9,\"mantissas\":[1,2,5]}},\"formatter\":{\"type\":\"object\",\"name\":\"PrintfTickFormatter\",\"id\":\"p1257\",\"attributes\":{\"format\":\"$%d\"}},\"major_label_policy\":{\"type\":\"object\",\"name\":\"NoOverlap\",\"id\":\"p1259\"},\"padding\":5,\"major_label_text_font_size\":\"14px\",\"label_standoff\":6,\"color_mapper\":{\"id\":\"p1243\"}}}],\"above\":[{\"type\":\"object\",\"name\":\"CategoricalAxis\",\"id\":\"p1216\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"CategoricalTicker\",\"id\":\"p1218\"},\"formatter\":{\"type\":\"object\",\"name\":\"CategoricalTickFormatter\",\"id\":\"p1217\"},\"major_label_standoff\":0,\"major_label_orientation\":1.0471975511965976,\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1219\"},\"major_label_text_font_size\":\"14px\",\"axis_line_color\":null,\"major_tick_line_color\":null}}],\"center\":[{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1221\",\"attributes\":{\"axis\":{\"id\":\"p1216\"},\"grid_line_color\":null}},{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1227\",\"attributes\":{\"dimension\":1,\"axis\":{\"id\":\"p1222\"},\"grid_line_color\":null}}]}}],\"callbacks\":{\"type\":\"map\"}}};\n", + " const render_items = [{\"docid\":\"053aed4f-f39a-49e7-bb80-2614ad894c1b\",\"roots\":{\"p1198\":\"dac2c983-4bfc-4924-b86e-51a0856585b4\"},\"root_ids\":[\"p1198\"]}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " let attempts = 0;\n", + " const timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "p1198" + } + }, + "output_type": "display_data" + } + ], "source": [ "pickup = list(sorted(df_ex4['pickup_borough'].unique()))\n", "dropoff = list(reversed(sorted(df_ex4['dropoff_borough'].unique())))\n", @@ -750,7 +1113,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[Stage 50:===================================================> (189 + 1) / 200]\r" + "[Stage 50:=================================================> (183 + 1) / 200]\r" ] }, { @@ -808,7 +1171,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 19, "id": "ca83556d", "metadata": {}, "outputs": [ @@ -816,7 +1179,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[Stage 100:==================================================> (192 + 1) / 200]\r" + "23/05/31 18:21:27 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n", + "[Stage 61:> (0 + 1) / 1]\r" ] }, { @@ -889,7 +1253,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 20, "id": "edde38bb", "metadata": {}, "outputs": [ @@ -897,118 +1261,38 @@ "name": "stderr", "output_type": "stream", "text": [ - "[Stage 265:> (0 + 1) / 1]\r" + "[Stage 84:> (0 + 1) / 1]\r" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "+--------------------+-----+--------+---------+------+-------------+\n", - "| medallion|Bronx|Brooklyn|Manhattan|Queens|Staten Island|\n", - "+--------------------+-----+--------+---------+------+-------------+\n", - "|35E11D9D2AE5C8A80...| 0| 1| 15| 5| 0|\n", - "|DA350783B6954CC67...| 0| 0| 27| 0| 0|\n", - "|35B2F21FAF5E53F1E...| 0| 3| 8| 3| 0|\n", - "|6695FB6E06F7D99F5...| 0| 1| 21| 0| 0|\n", - "|36372627462019376...| 0| 0| 5| 2| 0|\n", - "|EF882BDAF03D41517...| 0| 0| 17| 1| 0|\n", - "|846DFE2D59F6E76EC...| 0| 1| 25| 0| 0|\n", - "|9B69C5971F62F151B...| 0| 0| 13| 1| 0|\n", - "|0F621E366CFE63044...| 0| 1| 11| 1| 0|\n", - "|87EB479F55B88D47C...| 0| 1| 19| 0| 0|\n", - "|4EE5F2532F57F2124...| 0| 0| 14| 1| 0|\n", - "|4F4CA97166A04A455...| 0| 0| 13| 1| 0|\n", - "|DB1964B903773868E...| 0| 0| 6| 0| 0|\n", - "|B01A3E26873C4B514...| 0| 3| 20| 1| 0|\n", - "|F49F752E7E9CAAE41...| 1| 0| 8| 2| 0|\n", - "|D72C164FE66ADFFFE...| 0| 1| 20| 1| 0|\n", - "|E1BD31C1BF8DDCFCB...| 0| 2| 3| 1| 0|\n", - "|80F732B990A7E3763...| 0| 0| 13| 2| 0|\n", - "|F9B3A00E6DDCA4F8B...| 0| 0| 15| 0| 0|\n", - "|1E8EDF1C2EF489B7A...| 0| 0| 2| 1| 0|\n", - "|6AFD7E44A278CFD00...| 0| 0| 4| 1| 0|\n", - "|27E7626D5A223B479...| 0| 0| 21| 0| 0|\n", - "|DDCBE3295F4678F61...| 0| 0| 4| 0| 0|\n", - "|EB6F0753E865DA0AB...| 0| 3| 13| 1| 0|\n", - "|963BEE5F306952D20...| 0| 0| 15| 1| 0|\n", - "|ADFCF211DDD6D7885...| 0| 0| 12| 2| 0|\n", - "|BF46B95E44ED3BE1B...| 0| 0| 17| 0| 0|\n", - "|DCE32B5E6CAD1AFEB...| 0| 2| 12| 2| 0|\n", - "|7D4F34EF0A251F3A6...| 0| 4| 6| 1| 0|\n", - "|764CA5AE502C0FEC9...| 0| 1| 13| 0| 0|\n", - "|4D0A5B1BD7C0B459D...| 0| 1| 12| 1| 0|\n", - "|ED9B774735449ABBE...| 0| 2| 9| 0| 0|\n", - "|198109D0AF980C5BC...| 0| 0| 16| 0| 0|\n", - "|F0BC746C7DD8C0BC9...| 0| 1| 6| 0| 0|\n", - "|223670562219093D6...| 1| 0| 9| 0| 0|\n", - "|59DF6039EC312EE6D...| 0| 2| 7| 0| 0|\n", - "|A02946A94C960AF04...| 0| 0| 7| 0| 0|\n", - "|15162141EA7436635...| 0| 0| 9| 1| 0|\n", - "|5803D6EAD49AEAA82...| 0| 0| 16| 1| 0|\n", - "|618BB39CEEAE5E9A6...| 0| 1| 12| 1| 0|\n", - "|B9E10026AAC457AA6...| 0| 1| 8| 0| 0|\n", - "|E7C49B0A85D992BF1...| 0| 0| 28| 0| 0|\n", - "|4E8142153D6520C41...| 0| 0| 14| 0| 0|\n", - "|72EAFBA3FB9F0507C...| 0| 1| 11| 0| 0|\n", - "|7550D0BD520A691EC...| 0| 0| 7| 0| 0|\n", - "|A5A2F3BDEA888D6A7...| 0| 0| 7| 2| 0|\n", - "|7F82F9083BCBA1011...| 0| 1| 6| 0| 0|\n", - "|586D9BD604B923DA3...| 0| 0| 28| 0| 0|\n", - "|06EAD4C8D98202F1E...| 0| 1| 17| 0| 0|\n", - "|D563F5CC514A87541...| 1| 0| 8| 2| 0|\n", - "|496036713FC662D71...| 0| 0| 13| 0| 0|\n", - "|595917A7813CC80DA...| 0| 1| 22| 0| 0|\n", - "|DAF60A90A00F8FE30...| 0| 0| 20| 0| 0|\n", - "|C251B99766928BB4A...| 0| 0| 7| 2| 0|\n", - "|B59C6B4E3CFAB9EDF...| 0| 0| 33| 0| 0|\n", - "|1109955CCAABCBCE1...| 0| 0| 3| 0| 0|\n", - "|56CF5E3DD6328847A...| 0| 0| 5| 1| 0|\n", - "|5CCB4924B158F945B...| 0| 0| 24| 0| 0|\n", - "|73039762E0F4B253E...| 0| 0| 22| 0| 0|\n", - "|C0D5941A4A93777E9...| 0| 0| 15| 1| 0|\n", - "|57E8E649531AB8807...| 0| 0| 5| 0| 0|\n", - "|911B6F71706854496...| 0| 0| 9| 0| 0|\n", - "|B2B089B939CB4A0A6...| 0| 0| 15| 0| 0|\n", - "|753BC0484097BB236...| 0| 0| 11| 0| 0|\n", - "|47D63452A91E1705F...| 0| 0| 6| 1| 0|\n", - "|BEA5A07E7B365D7F6...| 0| 0| 20| 0| 0|\n", - "|34CE2E3B6B1E89A38...| 0| 1| 9| 0| 0|\n", - "|5B9AB2A961429F558...| 0| 1| 24| 1| 0|\n", - "|72AAE2B8FF50AF611...| 0| 1| 24| 0| 0|\n", - "|4A9DED62DD8EA1E19...| 1| 0| 20| 0| 0|\n", - "|9771700E1AE5E87B2...| 0| 0| 7| 1| 0|\n", - "|C50532B1D6B517BCB...| 0| 2| 8| 1| 0|\n", - "|4F9B5CF4F0FC8835D...| 0| 2| 16| 0| 0|\n", - "|2B8C6434EB5875E58...| 0| 0| 18| 0| 0|\n", - "|BA57B240D0EEE2F43...| 0| 2| 36| 2| 0|\n", - "|4A17962CB3E106E57...| 0| 1| 12| 1| 0|\n", - "|286EFDDA8BBA68C50...| 0| 0| 22| 0| 0|\n", - "|98EDCE7D6FB0741BD...| 0| 7| 5| 0| 0|\n", - "|F0C30DB1889710471...| 0| 3| 7| 0| 0|\n", - "|B6585890F68EE0270...| 0| 0| 26| 1| 0|\n", - "|DC8694A18613057F7...| 0| 2| 9| 2| 0|\n", - "|41EB945E62B7F03D9...| 0| 0| 15| 1| 0|\n", - "|4CD65097EFB67A8D6...| 0| 0| 11| 0| 0|\n", - "|08E9F5633328D780C...| 0| 5| 18| 1| 0|\n", - "|8D708B5B292FB555F...| 1| 0| 21| 1| 0|\n", - "|F4BB93A9C7E2E0A47...| 0| 0| 8| 0| 0|\n", - "|9586875D692663562...| 0| 4| 4| 1| 0|\n", - "|552CCF061B871F717...| 0| 2| 10| 0| 0|\n", - "|167C661512D5AA2C5...| 0| 0| 13| 0| 0|\n", - "|1C8CB1A88201C4E83...| 0| 0| 16| 0| 0|\n", - "|D50F5974294A3AC41...| 0| 0| 4| 0| 0|\n", - "|5205D3FE7D57F5494...| 0| 0| 6| 0| 0|\n", - "|B50F660464E5D0649...| 0| 0| 5| 3| 0|\n", - "|2EBD87EE737D1AB90...| 0| 1| 12| 0| 0|\n", - "|5EE2C4D3BF57BDB45...| 0| 0| 13| 0| 0|\n", - "|C12F3B53D695B3195...| 0| 1| 17| 0| 0|\n", - "|3C698F44315B54EF2...| 0| 0| 19| 0| 0|\n", - "|652979D8BB6F2409F...| 0| 0| 6| 1| 0|\n", - "|667F9BBD97EADE903...| 0| 0| 16| 0| 0|\n", - "|725D9245A61E2C54D...| 0| 0| 31| 0| 0|\n", - "+--------------------+-----+--------+---------+------+-------------+\n", - "only showing top 100 rows\n", + "+--------------------------------+-----+--------+---------+------+-------------+\n", + "|medallion |Bronx|Brooklyn|Manhattan|Queens|Staten Island|\n", + "+--------------------------------+-----+--------+---------+------+-------------+\n", + "|35E11D9D2AE5C8A80261CF6A309BD9FD|0 |1 |15 |5 |0 |\n", + "|DA350783B6954CC672B3830F3A40C0F7|0 |0 |27 |0 |0 |\n", + "|35B2F21FAF5E53F1EB17848E7DC82055|0 |3 |8 |3 |0 |\n", + "|6695FB6E06F7D99F56B579A27759B6F2|0 |1 |21 |0 |0 |\n", + "|36372627462019376C639E270076E599|0 |0 |5 |2 |0 |\n", + "|EF882BDAF03D4151746F1A5A235FC454|0 |0 |17 |1 |0 |\n", + "|846DFE2D59F6E76ECE92959C7827FC12|0 |1 |25 |0 |0 |\n", + "|9B69C5971F62F151BB1C412B35090015|0 |0 |13 |1 |0 |\n", + "|0F621E366CFE63044BFED29EA126CDB9|0 |1 |11 |1 |0 |\n", + "|87EB479F55B88D47C643E19A11B4BEBF|0 |1 |19 |0 |0 |\n", + "|4EE5F2532F57F21244FCC00EEFC37BBC|0 |0 |14 |1 |0 |\n", + "|4F4CA97166A04A4551611769E2C01016|0 |0 |13 |1 |0 |\n", + "|DB1964B903773868E191176E8EF47946|0 |0 |6 |0 |0 |\n", + "|B01A3E26873C4B5145DED29355E1CEFD|0 |3 |20 |1 |0 |\n", + "|F49F752E7E9CAAE41B953FB96E25D059|1 |0 |8 |2 |0 |\n", + "|D72C164FE66ADFFFE94472B10DA5F9E3|0 |1 |20 |1 |0 |\n", + "|E1BD31C1BF8DDCFCB288ACD0A5B8015C|0 |2 |3 |1 |0 |\n", + "|80F732B990A7E37633782074F64AEF8B|0 |0 |13 |2 |0 |\n", + "|F9B3A00E6DDCA4F8BF2560DFF36B9E91|0 |0 |15 |0 |0 |\n", + "|1E8EDF1C2EF489B7AB3712977C7C08B5|0 |0 |2 |1 |0 |\n", + "+--------------------------------+-----+--------+---------+------+-------------+\n", + "only showing top 20 rows\n", "\n" ] }, @@ -1050,7 +1334,7 @@ " .groupBy(\"medallion\").pivot(\"borough\").sum(\"maxTrips\") \\\n", " .fillna(value=0)\n", "\n", - "df_ex8.show(100)" + "df_ex8.show(truncate=False)" ] }, {