diff --git a/Assignment1/Assignment1.ipynb b/Assignment1/Assignment1.ipynb index 4df9a60..ca49c32 100644 --- a/Assignment1/Assignment1.ipynb +++ b/Assignment1/Assignment1.ipynb @@ -1508,7 +1508,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 194, "id": "5d1fad2a", "metadata": {}, "outputs": [ @@ -1541,8 +1541,8 @@ "
5 rows × 47 columns
\n", + "5 rows × 46 columns
\n", "" ], "text/plain": [ "country Albania Andorra Austria Belarus Belgium \\\n", "country_dest \n", - "Albania 0 0 1 0 0 \n", - "Andorra 0 0 0 0 0 \n", - "Austria 1 0 15 2 2 \n", - "Belarus 0 0 2 0 0 \n", - "Belgium 0 0 2 0 1 \n", + "Albania 0.0 0.0 1.0 0.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 1.0 0.0 15.0 2.0 2.0 \n", + "Belarus 0.0 0.0 2.0 0.0 0.0 \n", + "Belgium 0.0 0.0 2.0 0.0 1.0 \n", "\n", - "country Bosnia and Herzegovina Bulgaria Croatia Cyprus \\\n", - "country_dest \n", - "Albania 0 0 0 0 \n", - "Andorra 0 0 0 0 \n", - "Austria 1 3 6 3 \n", - "Belarus 0 0 0 1 \n", - "Belgium 0 4 5 2 \n", + "country Bosnia and Herzegovina Bulgaria Croatia Czech Republic \\\n", + "country_dest \n", + "Albania 0.0 0.0 0.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 \n", + "Austria 1.0 3.0 6.0 1.0 \n", + "Belarus 0.0 0.0 0.0 2.0 \n", + "Belgium 0.0 4.0 5.0 4.0 \n", "\n", - "country Czech Republic ... San Marino Serbia Slovakia Slovenia \\\n", - "country_dest ... \n", - "Albania 0 ... 0 0 0 1 \n", - "Andorra 0 ... 0 0 0 0 \n", - "Austria 1 ... 0 3 1 2 \n", - "Belarus 2 ... 0 0 0 0 \n", - "Belgium 4 ... 0 2 1 3 \n", + "country Denmark ... San Marino Serbia Slovakia Slovenia Spain \\\n", + "country_dest ... \n", + "Albania 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", + "Andorra 0.0 ... 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 5.0 ... 0.0 3.0 1.0 2.0 40.0 \n", + "Belarus 0.0 ... 0.0 0.0 0.0 0.0 1.0 \n", + "Belgium 5.0 ... 0.0 2.0 1.0 3.0 60.0 \n", "\n", - "country Spain Sweden Switzerland Ukraine United Kingdom \\\n", - "country_dest \n", - "Albania 0 0 0 0 1 \n", - "Andorra 0 0 0 0 0 \n", - "Austria 40 4 11 10 11 \n", - "Belarus 1 1 1 2 1 \n", - "Belgium 60 6 6 2 17 \n", + "country Sweden Switzerland Ukraine United Kingdom Vatican City \n", + "country_dest \n", + "Albania 0.0 0.0 0.0 1.0 0.0 \n", + "Andorra 0.0 0.0 0.0 0.0 0.0 \n", + "Austria 4.0 11.0 10.0 11.0 0.0 \n", + "Belarus 1.0 1.0 2.0 1.0 0.0 \n", + "Belgium 6.0 6.0 2.0 17.0 0.0 \n", "\n", - "country Vatican City \n", - "country_dest \n", - "Albania 0 \n", - "Andorra 0 \n", - "Austria 0 \n", - "Belarus 0 \n", - "Belgium 0 \n", - "\n", - "[5 rows x 47 columns]" + "[5 rows x 46 columns]" ] }, - "execution_count": 122, + "execution_count": 194, "metadata": {}, "output_type": "execute_result" } @@ -1762,7 +1754,9 @@ "df_routes = pd.read_csv(\"./datasets/routes.csv\", na_values=['\\\\N'], sep=\";\") \\\n", " .rename(lambda x: x.strip(), axis=1)\n", "\n", - "df_countries = pd.read_csv(\"./datasets/countries.csv\") \\\n", + "# Note that I consider a country to be 'European' if the 'continent' country in countries.csv is equal to 'eu' \n", + "df_countries = pd.read_csv(\"./datasets/countries.csv\") \n", + "df_countries = df_countries.loc[df_countries.continent == 'eu', :] \\\n", " .rename(columns={'name': 'country'}).drop(columns=['continent'])\n", "\n", "df_countries.loc[df_countries.country == 'Faroe Is.', 'country'] = 'Faroe Islands'\n", @@ -1779,7 +1773,7 @@ " .join(df_id_country, how='right', on='source_airport') \\\n", " .join(df_id_country, how='right', on='destination_airport', rsuffix='_dest')\n", "\n", - "# Count only a pair of notna source and destination airport as a valid route\n", + "# Count only a pair of notna source and destination airport as a valid flight\n", "# When this is not a case the row is an artifact of the right join. We assign 0\n", "# as a value so that in the final sum the value will still appear to include \n", "# no-flight countries, albeit with a total number of routes to 0\n", @@ -1795,16 +1789,16 @@ " .fillna(0) \\\n", " .sort_values('country_dest')\n", "\n", - "# Change type of cells and remove column level for geopandas compatibility\n", - "df_routes_count = df_routes_count[df_routes_count.columns].astype(int)\n", + "# Change type of cells to float and remove column level for geopandas compatibility\n", + "df_routes_count = df_routes_count[df_routes_count.columns].astype(float)\n", "df_routes_count.columns = df_routes_count.columns.droplevel(0)\n", "df_routes_count.head()" ] }, { "cell_type": "code", - "execution_count": 128, - "id": "75225ed4", + "execution_count": 195, + "id": "87bd101d", "metadata": {}, "outputs": [ { @@ -1854,30 +1848,6 @@ " \n", "5 rows × 49 columns
\n", + "5 rows × 48 columns
\n", "" ], "text/plain": [ " NAME geometry \\\n", - "0 Azerbaijan MULTIPOLYGON (((45.08332 39.76804, 45.26639 39... \n", - "1 Albania POLYGON ((19.43621 41.02107, 19.45055 41.06000... \n", - "2 Armenia MULTIPOLYGON (((45.57305 40.63249, 45.52888 40... \n", - "3 Bosnia and Herzegovina POLYGON ((17.64984 42.88908, 17.57853 42.94382... \n", - "4 Bulgaria POLYGON ((27.87917 42.84110, 27.89500 42.80250... \n", + "0 Albania POLYGON ((19.43621 41.02107, 19.45055 41.06000... \n", + "1 Bosnia and Herzegovina POLYGON ((17.64984 42.88908, 17.57853 42.94382... \n", + "2 Bulgaria POLYGON ((27.87917 42.84110, 27.89500 42.80250... \n", + "3 Denmark MULTIPOLYGON (((11.51389 54.82972, 11.56444 54... \n", + "4 Ireland MULTIPOLYGON (((-9.65639 53.22222, -9.66333 53... \n", "\n", " Albania Andorra Austria Belarus Belgium Bosnia and Herzegovina \\\n", - "0 NaN NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 1.0 0.0 0.0 0.0 \n", - "2 NaN NaN NaN NaN NaN NaN \n", - "3 0.0 0.0 1.0 0.0 0.0 2.0 \n", - "4 0.0 0.0 3.0 0.0 4.0 0.0 \n", + "0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 1.0 0.0 0.0 2.0 \n", + "2 0.0 0.0 3.0 0.0 4.0 0.0 \n", + "3 0.0 0.0 5.0 0.0 5.0 1.0 \n", + "4 0.0 0.0 1.0 0.0 3.0 0.0 \n", "\n", " Bulgaria Croatia ... San Marino Serbia Slovakia Slovenia Spain \\\n", - "0 NaN NaN ... NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", - "2 NaN NaN ... NaN NaN NaN NaN NaN \n", - "3 0.0 1.0 ... 0.0 3.0 0.0 1.0 0.0 \n", - "4 6.0 0.0 ... 0.0 2.0 0.0 0.0 8.0 \n", + "0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 \n", + "1 0.0 1.0 ... 0.0 3.0 0.0 1.0 0.0 \n", + "2 6.0 0.0 ... 0.0 2.0 0.0 0.0 8.0 \n", + "3 1.0 6.0 ... 0.0 2.0 0.0 2.0 31.0 \n", + "4 1.0 3.0 ... 0.0 0.0 1.0 0.0 53.0 \n", "\n", " Sweden Switzerland Ukraine United Kingdom Vatican City \n", - "0 NaN NaN NaN NaN NaN \n", - "1 0.0 0.0 0.0 1.0 0.0 \n", - "2 NaN NaN NaN NaN NaN \n", - "3 3.0 1.0 0.0 0.0 0.0 \n", - "4 0.0 1.0 0.0 10.0 0.0 \n", + "0 0.0 0.0 0.0 1.0 0.0 \n", + "1 3.0 1.0 0.0 0.0 0.0 \n", + "2 0.0 1.0 0.0 10.0 0.0 \n", + "3 9.0 7.0 0.0 22.0 0.0 \n", + "4 2.0 3.0 0.0 63.0 0.0 \n", "\n", - "[5 rows x 49 columns]" + "[5 rows x 48 columns]" ] }, - "execution_count": 128, + "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Some countries have column values = 'NaN'. These countries are not in Europe according to the dataset,\n", - "# but I choose to include them in the map as 'no data' (i.e. in grey)\n", + "# Note the inner join to drop countries that we do not consider part of 'Europe'\n", + "# (according to the countries.csv file)\n", "yurop = gpd.read_file(\"./datasets/europe.geojson\") \\\n", " .loc[:, ['NAME', 'geometry']] \\\n", " .set_index('NAME') \\\n", - " .join(df_routes_count, how='left') \\\n", - " .reset_index()\n", + " .join(df_routes_count, how='inner') \\\n", + " .reset_index(names='NAME')\n", "\n", "yurop.head()" ] }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 218, "id": "11612845", "metadata": {}, "outputs": [ @@ -2036,11 +2030,11 @@ "data": { "application/vnd.bokehjs_exec.v0+json": "", "text/html": [ - "