hw1: ex1, ex2, ex3 (code only), ex4.1, ex4.2 (no continents) done
This commit is contained in:
parent
7485e14887
commit
a7d3b2fce0
1 changed files with 251 additions and 84 deletions
|
@ -22,7 +22,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 1,
|
||||
"id": "fcf3beb9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -52,7 +52,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "a0af6847",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -62,7 +62,7 @@
|
|||
"('Ü', 'sloppy-windows-1252')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -74,7 +74,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "22ce9426",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -271,7 +271,7 @@
|
|||
"4 2016-03-31 00:00:00 0 60437 2016-04-06 10:17:21 "
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -284,7 +284,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "a332b6a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -313,7 +313,7 @@
|
|||
" 'lastSeen': ['str']}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -331,7 +331,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "11bfa9a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -372,7 +372,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "f1c539c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -413,7 +413,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "86074e70",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -610,7 +610,7 @@
|
|||
"4 2016-03-31 00:00:00 0 60437 2016-04-06 10:17:21 "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -653,7 +653,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"id": "8b6f9ce3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -683,7 +683,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"id": "98f8d101",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -708,7 +708,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"id": "f300f49d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -727,7 +727,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"id": "923c5354",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -738,7 +738,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"id": "4b847b1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -749,7 +749,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"id": "bf1f417d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -779,7 +779,7 @@
|
|||
"dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -791,7 +791,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"id": "919e692f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -836,7 +836,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 15,
|
||||
"id": "7cc5c90f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -898,7 +898,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 16,
|
||||
"id": "ca97e7c8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -964,7 +964,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 130,
|
||||
"execution_count": 17,
|
||||
"id": "eb956ed4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -985,7 +985,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 131,
|
||||
"execution_count": 18,
|
||||
"id": "4a29684b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -995,7 +995,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 136,
|
||||
"execution_count": 19,
|
||||
"id": "d3d58d25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1044,7 +1044,10 @@
|
|||
"\n",
|
||||
"You'll need to work with the *'airports'* and *‘airports-delays’* datasets. Examine the datasets and perform cleansing if needed, before performing the exercise.\n",
|
||||
"\n",
|
||||
"1. Create a dataframe that provides, for each country, the mean of flights delayed. Display these information by binning the flights delayed in 6 bins. The resulting dataframe should have the countries as rows and the 6 bins as columns. For this exercise you cannot use pivot_table but only groupby. \n",
|
||||
"1. Create a dataframe that provides, for each country, <del>the mean of flights delayed</del>. Display these information by binning the flights delayed in 6 bins. The resulting dataframe should have the countries as rows and the 6 bins as columns. For this exercise you cannot use pivot_table but only groupby. \n",
|
||||
"\n",
|
||||
"<span style=\"color: red\">According to answer of question to professor:</span>\n",
|
||||
"> Bin by delay_duration value, compute delay mean per-bin per-country \n",
|
||||
"\n",
|
||||
"2. Create a dataframe from ‘a*irports-delays’* which shows for each continent and country:\n",
|
||||
" 1. max, min and mean of ‘**delay_duration**’;\n",
|
||||
|
@ -1057,9 +1060,180 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 53,
|
||||
"id": "b4fde7e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_air = pd.read_csv(\"./datasets/airports.csv\", index_col='ID', na_values=['\\\\N'])\n",
|
||||
"df_del = pd.read_csv(\"./datasets/airports-delays.csv\", index_col='ID', sep=\";\", na_values=['\\\\N'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"id": "f8906707",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>delay_duration_bin</th>\n",
|
||||
" <th>(15.999, 30.0]</th>\n",
|
||||
" <th>(30.0, 35.0]</th>\n",
|
||||
" <th>(35.0, 41.0]</th>\n",
|
||||
" <th>(41.0, 47.0]</th>\n",
|
||||
" <th>(47.0, 59.0]</th>\n",
|
||||
" <th>(59.0, 850.0]</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>country</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Afghanistan</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>44.0</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>60.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>Albania</th>\n",
|
||||
" <td>18.5</td>\n",
|
||||
" <td>31.000000</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>56.000000</td>\n",
|
||||
" <td>63.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>Algeria</th>\n",
|
||||
" <td>26.5</td>\n",
|
||||
" <td>33.857143</td>\n",
|
||||
" <td>38.75</td>\n",
|
||||
" <td>43.0</td>\n",
|
||||
" <td>51.200000</td>\n",
|
||||
" <td>73.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>American Samoa</th>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>43.0</td>\n",
|
||||
" <td>48.000000</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>Angola</th>\n",
|
||||
" <td>28.0</td>\n",
|
||||
" <td>34.500000</td>\n",
|
||||
" <td>36.00</td>\n",
|
||||
" <td>45.0</td>\n",
|
||||
" <td>51.666667</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"delay_duration_bin (15.999, 30.0] (30.0, 35.0] (35.0, 41.0] (41.0, 47.0] \\\n",
|
||||
"country \n",
|
||||
"Afghanistan 0.0 0.000000 0.00 44.0 \n",
|
||||
"Albania 18.5 31.000000 0.00 0.0 \n",
|
||||
"Algeria 26.5 33.857143 38.75 43.0 \n",
|
||||
"American Samoa 0.0 0.000000 0.00 43.0 \n",
|
||||
"Angola 28.0 34.500000 36.00 45.0 \n",
|
||||
"\n",
|
||||
"delay_duration_bin (47.0, 59.0] (59.0, 850.0] \n",
|
||||
"country \n",
|
||||
"Afghanistan 0.000000 60.0 \n",
|
||||
"Albania 56.000000 63.0 \n",
|
||||
"Algeria 51.200000 73.0 \n",
|
||||
"American Samoa 48.000000 0.0 \n",
|
||||
"Angola 51.666667 0.0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_4_1 = df_del.copy()\n",
|
||||
"\n",
|
||||
"# The following statements bins the data by the value of delay_duration.\n",
|
||||
"# The bins are chosen as equally-spaced percentile values of the data. This is done to \n",
|
||||
"# better distribute the data between bins, as it is quite skewed towards low values\n",
|
||||
"df_4_1[\"delay_duration_bin\"] = pd.qcut(df_del.delay_duration, 6)\n",
|
||||
"\n",
|
||||
"# The dataframe will contain countries as row indices, the 6 bins as columns and values\n",
|
||||
"# corresponding to the mean delay_duration per country, per bin. When no delay_duration \n",
|
||||
"# falls in a particular bin for some country, that bin has a value of 0\n",
|
||||
"df_4_1 = df_4_1.loc[:, ['country', 'delay_duration', 'delay_duration_bin']] \\\n",
|
||||
" .groupby(['country', 'delay_duration_bin']) \\\n",
|
||||
" .mean() \\\n",
|
||||
" .fillna(0) \\\n",
|
||||
" .reset_index() \\\n",
|
||||
" .pivot(index='country', columns='delay_duration_bin', values='delay_duration') \n",
|
||||
"\n",
|
||||
"df_4_1.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"id": "a677ce07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 4.2\n",
|
||||
"# TODO: continents\n",
|
||||
"df_4_2 = df_del.loc[:, ['country', 'delay_duration', 'flights_cancelled', 'flights_delayed', 'flights_planned']] \\\n",
|
||||
" .groupby('country') \\\n",
|
||||
" .agg(dur_min=('delay_duration', 'min'), \\\n",
|
||||
" dur_mean=('delay_duration', 'mean'), \\\n",
|
||||
" dur_max=('delay_duration', 'max'), \\\n",
|
||||
" cancelled_sum=('flights_cancelled', 'sum'), \\\n",
|
||||
" cancelled_mean=('flights_cancelled', 'mean'), \\\n",
|
||||
" delayed_sum=('flights_delayed', 'sum'), \\\n",
|
||||
" delayed_mean=('flights_delayed', 'mean'), \\\n",
|
||||
" planned_sum=('flights_planned', 'sum'), \\\n",
|
||||
" planned_mean=('flights_planned', 'mean'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "a29b8c2f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
|
@ -1132,7 +1306,7 @@
|
|||
" <td>31.328199</td>\n",
|
||||
" <td>35.388599</td>\n",
|
||||
" <td>-1266</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>E</td>\n",
|
||||
" <td>Asia/Jerusalem</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1152,7 +1326,7 @@
|
|||
" <td>30.621700</td>\n",
|
||||
" <td>35.203300</td>\n",
|
||||
" <td>-164</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>E</td>\n",
|
||||
" <td>Asia/Jerusalem</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1172,7 +1346,7 @@
|
|||
" <td>33.626701</td>\n",
|
||||
" <td>-116.160004</td>\n",
|
||||
" <td>-115</td>\n",
|
||||
" <td>-8</td>\n",
|
||||
" <td>-8.0</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>America/Los_Angeles</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1192,7 +1366,7 @@
|
|||
" <td>47.121899</td>\n",
|
||||
" <td>51.821400</td>\n",
|
||||
" <td>-72</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>U</td>\n",
|
||||
" <td>Asia/Oral</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1272,7 +1446,7 @@
|
|||
" <td>47.092444</td>\n",
|
||||
" <td>8.305184</td>\n",
|
||||
" <td>1400</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>E</td>\n",
|
||||
" <td>Europe/Zurich</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1292,7 +1466,7 @@
|
|||
" <td>3.421000</td>\n",
|
||||
" <td>115.153999</td>\n",
|
||||
" <td>1400</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>8.0</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" <td>Asia/Kuala_Lumpur</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1312,7 +1486,7 @@
|
|||
" <td>-13.549100</td>\n",
|
||||
" <td>-48.195301</td>\n",
|
||||
" <td>1401</td>\n",
|
||||
" <td>-3</td>\n",
|
||||
" <td>-3.0</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>America/Sao_Paulo</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1327,12 +1501,12 @@
|
|||
" <td>Bubovice Airport</td>\n",
|
||||
" <td>Bubovice</td>\n",
|
||||
" <td>Czech Republic</td>\n",
|
||||
" <td>\\N</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LKBU</td>\n",
|
||||
" <td>49.974400</td>\n",
|
||||
" <td>14.178100</td>\n",
|
||||
" <td>1401</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>E</td>\n",
|
||||
" <td>Europe/Prague</td>\n",
|
||||
" <td>airport</td>\n",
|
||||
|
@ -1360,67 +1534,60 @@
|
|||
"1670 Emmen Air Base Emmen Switzerland EML \n",
|
||||
"6215 Long Lellang Airport Long Datih Malaysia LGL \n",
|
||||
"7375 Minaçu Airport Minacu Brazil MQH \n",
|
||||
"9253 Bubovice Airport Bubovice Czech Republic \\N \n",
|
||||
"9253 Bubovice Airport Bubovice Czech Republic NaN \n",
|
||||
"\n",
|
||||
" ICAO latitude longitude altitude timezone DST tz_database_timezone \\\n",
|
||||
" ICAO latitude longitude altitude timezone DST \\\n",
|
||||
"ID \n",
|
||||
"1600 LLMZ 31.328199 35.388599 -1266 2 E Asia/Jerusalem \n",
|
||||
"1595 LLEY 30.621700 35.203300 -164 2 E Asia/Jerusalem \n",
|
||||
"7646 KTRM 33.626701 -116.160004 -115 -8 A America/Los_Angeles \n",
|
||||
"4357 UATG 47.121899 51.821400 -72 5 U Asia/Oral \n",
|
||||
"2151 OINR 36.909901 50.679600 -70 3.5 E Asia/Tehran \n",
|
||||
"... ... ... ... ... ... .. ... \n",
|
||||
"3039 VELP 23.840599 92.619698 1398 5.5 N Asia/Calcutta \n",
|
||||
"1670 LSME 47.092444 8.305184 1400 1 E Europe/Zurich \n",
|
||||
"6215 WBGF 3.421000 115.153999 1400 8 N Asia/Kuala_Lumpur \n",
|
||||
"7375 SBMC -13.549100 -48.195301 1401 -3 S America/Sao_Paulo \n",
|
||||
"9253 LKBU 49.974400 14.178100 1401 1 E Europe/Prague \n",
|
||||
"1600 LLMZ 31.328199 35.388599 -1266 2.0 E \n",
|
||||
"1595 LLEY 30.621700 35.203300 -164 2.0 E \n",
|
||||
"7646 KTRM 33.626701 -116.160004 -115 -8.0 A \n",
|
||||
"4357 UATG 47.121899 51.821400 -72 5.0 U \n",
|
||||
"2151 OINR 36.909901 50.679600 -70 3.5 E \n",
|
||||
"... ... ... ... ... ... .. \n",
|
||||
"3039 VELP 23.840599 92.619698 1398 5.5 N \n",
|
||||
"1670 LSME 47.092444 8.305184 1400 1.0 E \n",
|
||||
"6215 WBGF 3.421000 115.153999 1400 8.0 N \n",
|
||||
"7375 SBMC -13.549100 -48.195301 1401 -3.0 S \n",
|
||||
"9253 LKBU 49.974400 14.178100 1401 1.0 E \n",
|
||||
"\n",
|
||||
" type source flights_planned flights_cancelled \\\n",
|
||||
" tz_database_timezone type source flights_planned \\\n",
|
||||
"ID \n",
|
||||
"1600 airport OurAirports 62 0 \n",
|
||||
"1595 airport OurAirports 56 0 \n",
|
||||
"7646 airport OurAirports 60 0 \n",
|
||||
"4357 airport OurAirports 71 0 \n",
|
||||
"2151 airport OurAirports 62 1 \n",
|
||||
"1600 Asia/Jerusalem airport OurAirports 62 \n",
|
||||
"1595 Asia/Jerusalem airport OurAirports 56 \n",
|
||||
"7646 America/Los_Angeles airport OurAirports 60 \n",
|
||||
"4357 Asia/Oral airport OurAirports 71 \n",
|
||||
"2151 Asia/Tehran airport OurAirports 62 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"3039 airport OurAirports 118 0 \n",
|
||||
"1670 airport OurAirports 124 0 \n",
|
||||
"6215 airport OurAirports 126 0 \n",
|
||||
"7375 airport OurAirports 119 1 \n",
|
||||
"9253 airport OurAirports 128 0 \n",
|
||||
"3039 Asia/Calcutta airport OurAirports 118 \n",
|
||||
"1670 Europe/Zurich airport OurAirports 124 \n",
|
||||
"6215 Asia/Kuala_Lumpur airport OurAirports 126 \n",
|
||||
"7375 America/Sao_Paulo airport OurAirports 119 \n",
|
||||
"9253 Europe/Prague airport OurAirports 128 \n",
|
||||
"\n",
|
||||
" flights_delayed delay_duration \n",
|
||||
" flights_cancelled flights_delayed delay_duration \n",
|
||||
"ID \n",
|
||||
"1600 9 32.0 \n",
|
||||
"1595 7 24.0 \n",
|
||||
"7646 7 28.0 \n",
|
||||
"4357 9 35.0 \n",
|
||||
"2151 6 47.0 \n",
|
||||
"... ... ... \n",
|
||||
"3039 23 38.0 \n",
|
||||
"1670 19 38.0 \n",
|
||||
"6215 18 32.0 \n",
|
||||
"7375 25 48.0 \n",
|
||||
"9253 15 32.0 \n",
|
||||
"1600 0 9 32.0 \n",
|
||||
"1595 0 7 24.0 \n",
|
||||
"7646 0 7 28.0 \n",
|
||||
"4357 0 9 35.0 \n",
|
||||
"2151 1 6 47.0 \n",
|
||||
"... ... ... ... \n",
|
||||
"3039 0 23 38.0 \n",
|
||||
"1670 0 19 38.0 \n",
|
||||
"6215 0 18 32.0 \n",
|
||||
"7375 1 25 48.0 \n",
|
||||
"9253 0 15 32.0 \n",
|
||||
"\n",
|
||||
"[6029 rows x 17 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 62,
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_air = pd.read_csv(\"./datasets/airports.csv\", index_col='ID')\n",
|
||||
"df_del = pd.read_csv(\"./datasets/airports-delays.csv\", index_col='ID', sep=\";\")\n",
|
||||
"\n",
|
||||
"df_del\n",
|
||||
"#pd.cut(df_del.flights_delayed, range(0, df_del.flights_delayed.max(), 25))\n",
|
||||
"\n",
|
||||
"#df_bycountry = df_del.loc[:, ['country', 'flights_delayed']].groupby('country').sum().sort_values('flights_delayed', ascending=False)\n",
|
||||
"#df_bycountry\n"
|
||||
"df_del"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Reference in a new issue