diff --git a/Assignment1/Assignment1.ipynb b/Assignment1/Assignment1.ipynb index 4df9a60..ca49c32 100644 --- a/Assignment1/Assignment1.ipynb +++ b/Assignment1/Assignment1.ipynb @@ -1508,7 +1508,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 194, "id": "5d1fad2a", "metadata": {}, "outputs": [ @@ -1541,8 +1541,8 @@ " Bosnia and Herzegovina\n", " Bulgaria\n", " Croatia\n", - " Cyprus\n", " Czech Republic\n", + " Denmark\n", " ...\n", " San Marino\n", " Serbia\n", @@ -1583,174 +1583,166 @@ " \n", " \n", " Albania\n", - " 0\n", - " 0\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", " \n", " \n", " Andorra\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " Austria\n", - " 1\n", - " 0\n", - " 15\n", - " 2\n", - " 2\n", - " 1\n", - " 3\n", - " 6\n", - " 3\n", - " 1\n", + " 1.0\n", + " 0.0\n", + " 15.0\n", + " 2.0\n", + " 2.0\n", + " 1.0\n", + " 3.0\n", + " 6.0\n", + " 1.0\n", + " 5.0\n", " ...\n", - " 0\n", - " 3\n", - " 1\n", - " 2\n", - " 40\n", - " 4\n", - " 11\n", - " 10\n", - " 11\n", - " 0\n", + " 0.0\n", + " 3.0\n", + " 1.0\n", + " 2.0\n", + " 40.0\n", + " 4.0\n", + " 11.0\n", + " 10.0\n", + " 11.0\n", + " 0.0\n", " \n", " \n", " Belarus\n", - " 0\n", - " 0\n", - " 2\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 2\n", + " 0.0\n", + " 0.0\n", + " 2.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 2.0\n", + " 0.0\n", " ...\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 2\n", - " 1\n", - " 0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 1.0\n", + " 1.0\n", + " 2.0\n", + " 1.0\n", + " 0.0\n", " \n", " \n", " Belgium\n", - " 0\n", - " 0\n", - " 2\n", - " 0\n", - " 1\n", - " 0\n", - " 4\n", - " 5\n", - " 2\n", - " 4\n", + " 0.0\n", + " 0.0\n", + " 2.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 4.0\n", + " 5.0\n", + " 4.0\n", + " 5.0\n", " ...\n", - " 0\n", - " 2\n", - " 1\n", - " 3\n", - " 60\n", - " 6\n", - " 6\n", - " 2\n", - " 17\n", - " 0\n", + " 0.0\n", + " 2.0\n", + " 1.0\n", + " 3.0\n", + " 60.0\n", + " 6.0\n", + " 6.0\n", + " 2.0\n", + " 17.0\n", + " 0.0\n", " \n", " \n", "\n", - "

5 rows × 47 columns

\n", + "

5 rows × 46 columns

\n", "" ], "text/plain": [ "country Albania Andorra Austria Belarus Belgium \\\n", "country_dest \n", - "Albania 0 0 1 0 0 \n", - "Andorra 0 0 0 0 0 \n", - "Austria 1 0 15 2 2 \n", - "Belarus 0 0 2 0 0 \n", - "Belgium 0 0 2 0 1 \n", + "Albania 0.0 0.0 1.0 0.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 1.0 0.0 15.0 2.0 2.0 \n", + "Belarus 0.0 0.0 2.0 0.0 0.0 \n", + "Belgium 0.0 0.0 2.0 0.0 1.0 \n", "\n", - "country Bosnia and Herzegovina Bulgaria Croatia Cyprus \\\n", - "country_dest \n", - "Albania 0 0 0 0 \n", - "Andorra 0 0 0 0 \n", - "Austria 1 3 6 3 \n", - "Belarus 0 0 0 1 \n", - "Belgium 0 4 5 2 \n", + "country Bosnia and Herzegovina Bulgaria Croatia Czech Republic \\\n", + "country_dest \n", + "Albania 0.0 0.0 0.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 \n", + "Austria 1.0 3.0 6.0 1.0 \n", + "Belarus 0.0 0.0 0.0 2.0 \n", + "Belgium 0.0 4.0 5.0 4.0 \n", "\n", - "country Czech Republic ... San Marino Serbia Slovakia Slovenia \\\n", - "country_dest ... \n", - "Albania 0 ... 0 0 0 1 \n", - "Andorra 0 ... 0 0 0 0 \n", - "Austria 1 ... 0 3 1 2 \n", - "Belarus 2 ... 0 0 0 0 \n", - "Belgium 4 ... 0 2 1 3 \n", + "country Denmark ... San Marino Serbia Slovakia Slovenia Spain \\\n", + "country_dest ... \n", + "Albania 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", + "Andorra 0.0 ... 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 5.0 ... 0.0 3.0 1.0 2.0 40.0 \n", + "Belarus 0.0 ... 0.0 0.0 0.0 0.0 1.0 \n", + "Belgium 5.0 ... 0.0 2.0 1.0 3.0 60.0 \n", "\n", - "country Spain Sweden Switzerland Ukraine United Kingdom \\\n", - "country_dest \n", - "Albania 0 0 0 0 1 \n", - "Andorra 0 0 0 0 0 \n", - "Austria 40 4 11 10 11 \n", - "Belarus 1 1 1 2 1 \n", - "Belgium 60 6 6 2 17 \n", + "country Sweden Switzerland Ukraine United Kingdom Vatican City \n", + "country_dest \n", + "Albania 0.0 0.0 0.0 1.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 4.0 11.0 10.0 11.0 0.0 \n", + "Belarus 1.0 1.0 2.0 1.0 0.0 \n", + "Belgium 6.0 6.0 2.0 17.0 0.0 \n", "\n", - "country Vatican City \n", - "country_dest \n", - "Albania 0 \n", - "Andorra 0 \n", - "Austria 0 \n", - "Belarus 0 \n", - "Belgium 0 \n", - "\n", - "[5 rows x 47 columns]" + "[5 rows x 46 columns]" ] }, - "execution_count": 122, + "execution_count": 194, "metadata": {}, "output_type": "execute_result" } @@ -1762,7 +1754,9 @@ "df_routes = pd.read_csv(\"./datasets/routes.csv\", na_values=['\\\\N'], sep=\";\") \\\n", " .rename(lambda x: x.strip(), axis=1)\n", "\n", - "df_countries = pd.read_csv(\"./datasets/countries.csv\") \\\n", + "# Note that I consider a country to be 'European' if the 'continent' country in countries.csv is equal to 'eu' \n", + "df_countries = pd.read_csv(\"./datasets/countries.csv\") \n", + "df_countries = df_countries.loc[df_countries.continent == 'eu', :] \\\n", " .rename(columns={'name': 'country'}).drop(columns=['continent'])\n", "\n", "df_countries.loc[df_countries.country == 'Faroe Is.', 'country'] = 'Faroe Islands'\n", @@ -1779,7 +1773,7 @@ " .join(df_id_country, how='right', on='source_airport') \\\n", " .join(df_id_country, how='right', on='destination_airport', rsuffix='_dest')\n", "\n", - "# Count only a pair of notna source and destination airport as a valid route\n", + "# Count only a pair of notna source and destination airport as a valid flight\n", "# When this is not a case the row is an artifact of the right join. We assign 0\n", "# as a value so that in the final sum the value will still appear to include \n", "# no-flight countries, albeit with a total number of routes to 0\n", @@ -1795,16 +1789,16 @@ " .fillna(0) \\\n", " .sort_values('country_dest')\n", "\n", - "# Change type of cells and remove column level for geopandas compatibility\n", - "df_routes_count = df_routes_count[df_routes_count.columns].astype(int)\n", + "# Change type of cells to float and remove column level for geopandas compatibility\n", + "df_routes_count = df_routes_count[df_routes_count.columns].astype(float)\n", "df_routes_count.columns = df_routes_count.columns.droplevel(0)\n", "df_routes_count.head()" ] }, { "cell_type": "code", - "execution_count": 128, - "id": "75225ed4", + "execution_count": 195, + "id": "87bd101d", "metadata": {}, "outputs": [ { @@ -1854,30 +1848,6 @@ " \n", " \n", " 0\n", - " Azerbaijan\n", - " MULTIPOLYGON (((45.08332 39.76804, 45.26639 39...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 1\n", " Albania\n", " POLYGON ((19.43621 41.02107, 19.45055 41.06000...\n", " 0.0\n", @@ -1901,31 +1871,7 @@ " 0.0\n", " \n", " \n", - " 2\n", - " Armenia\n", - " MULTIPOLYGON (((45.57305 40.63249, 45.52888 40...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 3\n", + " 1\n", " Bosnia and Herzegovina\n", " POLYGON ((17.64984 42.88908, 17.57853 42.94382...\n", " 0.0\n", @@ -1949,7 +1895,7 @@ " 0.0\n", " \n", " \n", - " 4\n", + " 2\n", " Bulgaria\n", " POLYGON ((27.87917 42.84110, 27.89500 42.80250...\n", " 0.0\n", @@ -1972,63 +1918,111 @@ " 10.0\n", " 0.0\n", " \n", + " \n", + " 3\n", + " Denmark\n", + " MULTIPOLYGON (((11.51389 54.82972, 11.56444 54...\n", + " 0.0\n", + " 0.0\n", + " 5.0\n", + " 0.0\n", + " 5.0\n", + " 1.0\n", + " 1.0\n", + " 6.0\n", + " ...\n", + " 0.0\n", + " 2.0\n", + " 0.0\n", + " 2.0\n", + " 31.0\n", + " 9.0\n", + " 7.0\n", + " 0.0\n", + " 22.0\n", + " 0.0\n", + " \n", + " \n", + " 4\n", + " Ireland\n", + " MULTIPOLYGON (((-9.65639 53.22222, -9.66333 53...\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 3.0\n", + " 0.0\n", + " 1.0\n", + " 3.0\n", + " ...\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 53.0\n", + " 2.0\n", + " 3.0\n", + " 0.0\n", + " 63.0\n", + " 0.0\n", + " \n", " \n", "\n", - "

5 rows × 49 columns

\n", + "

5 rows × 48 columns

\n", "" ], "text/plain": [ " NAME geometry \\\n", - "0 Azerbaijan MULTIPOLYGON (((45.08332 39.76804, 45.26639 39... \n", - "1 Albania POLYGON ((19.43621 41.02107, 19.45055 41.06000... \n", - "2 Armenia MULTIPOLYGON (((45.57305 40.63249, 45.52888 40... \n", - "3 Bosnia and Herzegovina POLYGON ((17.64984 42.88908, 17.57853 42.94382... \n", - "4 Bulgaria POLYGON ((27.87917 42.84110, 27.89500 42.80250... \n", + "0 Albania POLYGON ((19.43621 41.02107, 19.45055 41.06000... \n", + "1 Bosnia and Herzegovina POLYGON ((17.64984 42.88908, 17.57853 42.94382... \n", + "2 Bulgaria POLYGON ((27.87917 42.84110, 27.89500 42.80250... \n", + "3 Denmark MULTIPOLYGON (((11.51389 54.82972, 11.56444 54... \n", + "4 Ireland MULTIPOLYGON (((-9.65639 53.22222, -9.66333 53... \n", "\n", " Albania Andorra Austria Belarus Belgium Bosnia and Herzegovina \\\n", - "0 NaN NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 1.0 0.0 0.0 0.0 \n", - "2 NaN NaN NaN NaN NaN NaN \n", - "3 0.0 0.0 1.0 0.0 0.0 2.0 \n", - "4 0.0 0.0 3.0 0.0 4.0 0.0 \n", + "0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 2.0 \n", + "2 0.0 0.0 3.0 0.0 4.0 0.0 \n", + "3 0.0 0.0 5.0 0.0 5.0 1.0 \n", + "4 0.0 0.0 1.0 0.0 3.0 0.0 \n", "\n", " Bulgaria Croatia ... San Marino Serbia Slovakia Slovenia Spain \\\n", - "0 NaN NaN ... NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", - "2 NaN NaN ... NaN NaN NaN NaN NaN \n", - "3 0.0 1.0 ... 0.0 3.0 0.0 1.0 0.0 \n", - "4 6.0 0.0 ... 0.0 2.0 0.0 0.0 8.0 \n", + "0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", + "1 0.0 1.0 ... 0.0 3.0 0.0 1.0 0.0 \n", + "2 6.0 0.0 ... 0.0 2.0 0.0 0.0 8.0 \n", + "3 1.0 6.0 ... 0.0 2.0 0.0 2.0 31.0 \n", + "4 1.0 3.0 ... 0.0 0.0 1.0 0.0 53.0 \n", "\n", " Sweden Switzerland Ukraine United Kingdom Vatican City \n", - "0 NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 0.0 1.0 0.0 \n", - "2 NaN NaN NaN NaN NaN \n", - "3 3.0 1.0 0.0 0.0 0.0 \n", - "4 0.0 1.0 0.0 10.0 0.0 \n", + "0 0.0 0.0 0.0 1.0 0.0 \n", + "1 3.0 1.0 0.0 0.0 0.0 \n", + "2 0.0 1.0 0.0 10.0 0.0 \n", + "3 9.0 7.0 0.0 22.0 0.0 \n", + "4 2.0 3.0 0.0 63.0 0.0 \n", "\n", - "[5 rows x 49 columns]" + "[5 rows x 48 columns]" ] }, - "execution_count": 128, + "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Some countries have column values = 'NaN'. These countries are not in Europe according to the dataset,\n", - "# but I choose to include them in the map as 'no data' (i.e. in grey)\n", + "# Note the inner join to drop countries that we do not consider part of 'Europe'\n", + "# (according to the countries.csv file)\n", "yurop = gpd.read_file(\"./datasets/europe.geojson\") \\\n", " .loc[:, ['NAME', 'geometry']] \\\n", " .set_index('NAME') \\\n", - " .join(df_routes_count, how='left') \\\n", - " .reset_index()\n", + " .join(df_routes_count, how='inner') \\\n", + " .reset_index(names='NAME')\n", "\n", "yurop.head()" ] }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 218, "id": "11612845", "metadata": {}, "outputs": [ @@ -2036,11 +2030,11 @@ "data": { "application/vnd.bokehjs_exec.v0+json": "", "text/html": [ - "