maps_europe.py

import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import font_manager as fm, rcParams
import os
import matplotlib.font_manager as font_manager
import matplotlib.patheffects as patheffects

path_to_data = r"Data_for_Maps.dta"
data = pd.read_stata(path_to_data)


# Region codes don't match the NUTS shapefile. Manual changes:
data['GEO'] = np.where(data['c']=='EE', 'EST', data['GEO'])
reg_codes_correction = {'LUX': 'LU00',
                        'ISL': 'IS00',
                        'NLD': 'NL',
                        'CYP': 'CY0',
                        'LVA': 'LV0',
                        'EST': 'EE0',
                        'LTU': 'LT0'}
data['GEO'] = data['GEO'].map(reg_codes_correction).fillna(data['GEO'])


###############################################################################
# Employment rate of natives for the year 2010 for the three groups of countries (3 maps)
legend_title1 = "Employment rate of natives, 2010"

df = data[data['t']==2010]
df = df[['c', 'GEO', 'Native_Employment_rate', 'group_1', 'group_2', 'group_3']]
df.rename(columns={'Native_Employment_rate': 'VALUE',
                   'c': 'iso2'
                   },
          inplace=True)

labels = ['Lower than 65%',
          'Between 65% and 70%',
          'Between 70% and 75%',
          'Between 75% and 80%',
          'Higher than 80%']
df['CATEGORY'] = pd.cut(df['VALUE'],
                        bins=[-np.inf, 65, 70, 75, 80, np.inf],
                        labels=labels)
df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])

colors1 = {
        'Lower than 65%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255),
        'Between 65% and 70%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
        'Between 70% and 75%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
        'Between 75% and 80%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
        #'Between 80% and 50%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
        'Higher than 80%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
        'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
        }

df1 = df.copy()


###############################################################################
# Employment rate of natives for the year 2019 for the three groups of countries (3 maps)
legend_title2 = "Employment rate of natives, 2019"

df = data[data['t']==2019]
df = df[['c', 'GEO', 'Native_Employment_rate', 'group_1', 'group_2', 'group_3']]
df.rename(columns={'Native_Employment_rate': 'VALUE',
                   'c': 'iso2'
                   },
          inplace=True)

labels = ['Lower than 65%',
          'Between 65% and 70%',
          'Between 70% and 75%',
          'Between 75% and 80%',
          'Higher than 80%']
df['CATEGORY'] = pd.cut(df['VALUE'],
                        bins=[-np.inf, 65, 70, 75, 80, np.inf],
                        labels=labels)
df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])

colors2 = {
        'Lower than 65%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255),
        'Between 65% and 70%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
        'Between 70% and 75%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
        'Between 75% and 80%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
        #'Between 80% and 50%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
        'Higher than 80%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
        'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
        }

df2 = df.copy()


###############################################################################
# Share of immigrants for the year 2010 for the three groups of countries (3 maps)
legend_title3 = "Share of immigrants, 2010"

df = data[data['t']==2010]
df = df[['c', 'GEO', 'share_of_immigrants', 'group_1', 'group_2', 'group_3']]
df.rename(columns={'share_of_immigrants': 'VALUE',
                   'c': 'iso2'
                   },
          inplace=True)

labels = ['Lower than 3%',
          'Between 3% and 6%',
          'Between 6% and 9%',
          'Between 9% and 12%',
          'Between 12% and 15%',
          'Higher than 15%']
df['CATEGORY'] = pd.cut(df['VALUE'],
                        bins=[-np.inf, 3, 6, 9, 12, 15, np.inf],
                        labels=labels)
df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])

colors3 = {
        'Lower than 3%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255),
        'Between 3% and 6%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
        'Between 6% and 9%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
        'Between 9% and 12%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
        'Between 12% and 15%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
        'Higher than 15%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
        'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
        }

df3 = df.copy()


###############################################################################
# Share of immigrants for the year 2019 for the three groups of countries (3 maps)
legend_title4 = "Share of immigrants, 2019"

df = data[data['t']==2019]
df = df[['c', 'GEO', 'share_of_immigrants', 'group_1', 'group_2', 'group_3']]
df.rename(columns={'share_of_immigrants': 'VALUE',
                   'c': 'iso2'
                   },
          inplace=True)

labels = ['Lower than 3%',
          'Between 3% and 6%',
          'Between 6% and 9%',
          'Between 9% and 12%',
          'Between 12% and 15%',
          'Higher than 15%']
df['CATEGORY'] = pd.cut(df['VALUE'],
                        bins=[-np.inf, 3, 6, 9, 12, 15, np.inf],
                        labels=labels)
df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])

colors4 = {
        'Lower than 3%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255),
        'Between 3% and 6%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
        'Between 6% and 9%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
        'Between 9% and 12%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
        'Between 12% and 15%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
        'Higher than 15%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
        'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
        }

df4 = df.copy()


# This dict needs to be filled with the different parameters of each map.
# The key is the name of the file (the PNG file name).
# The value is a list of 3 elements:
# Index 0 is the pandas dataframe containing the data. It must contain the iso3, group_1, group_2, group_3, GEO and CATEGORY fields.
# Index 1 is the colors dict. The keys must be consitent with the CATEGORY field. Colors are given in RGB.
# Index 2 is the title of the legend. Use '\n' in the string to split the text into several lines.
# Index 3 is the group of country. Each group of country has a specific zoom level, and placement of legend.
# The three groups of countries are (1) the 13 countries used in our baseline IV (2) EU18 countries and (3) EU28 countries.

MAP_LIST = {
    'Native_Employment_rate_2010_group1': [df1,
                                       colors1,
                                       legend_title1,
                                       'group_1'],
    'Native_Employment_rate_2010_group2': [df1,
                                       colors1,
                                       legend_title1,
                                       'group_2'],
    'Native_Employment_rate_2010_group3': [df1,
                                       colors1,
                                       legend_title1,
                                       'group_3'],
    'Native_Employment_rate_2019_group1': [df2,
                                       colors2,
                                       legend_title2,
                                       'group_1'],
    'Native_Employment_rate_2019_group2': [df2,
                                       colors2,
                                       legend_title2,
                                       'group_2'],
    'Native_Employment_rate_2019_group3': [df2,
                                       colors2,
                                       legend_title2,
                                       'group_3'],
    'share_of_immigrants_2010_group1': [df3,
                                       colors3,
                                       legend_title3,
                                       'group_1'],
    'share_of_immigrants_2010_group2': [df3,
                                       colors3,
                                       legend_title3,
                                       'group_2'],
    'share_of_immigrants_2010_group3': [df3,
                                       colors3,
                                       legend_title3,
                                       'group_3'],
    'share_of_immigrants_2019_group1': [df4,
                                       colors4,
                                       legend_title4,
                                       'group_1'],
    'share_of_immigrants_2019_group2': [df4,
                                       colors4,
                                       legend_title4,
                                       'group_2'],
    'share_of_immigrants_2019_group3': [df4,
                                       colors4,
                                       legend_title4,
                                       'group_3'],

    }


##############################################################################
# No changes are required below this line, unless you want to change the style of the map.

# Set the font
font_path = r"\\main.oecd.org\Homedir2\Hoffmann_C\Desktop\RESSOURCES\arial-narrow.ttf"
font_bold_path = r"\\main.oecd.org\Homedir2\Hoffmann_C\Desktop\RESSOURCES\arial-narrow-bold.ttf"
font_name = fm.FontProperties(fname=font_path).get_name()
prop_legend_title = fm.FontProperties(fname=font_bold_path, size=20)
prop_legend = fm.FontProperties(fname=font_path, size=20)

regions2016 = gpd.read_file(r"NUTS_RG_20M_2016_4326.geojson") # Downloaded from https://ec.europa.eu/eurostat/fr/web/gisco/geodata/reference-data/administrative-units-statistical-units/nuts
regions2021 = gpd.read_file(r"NUTS_RG_20M_2021_4326.geojson")
regions2013 = gpd.read_file(r"NUTS_RG_20M_2013_4326.geojson")
regions = regions2016.append(regions2021)
regions = regions.append(regions2013)
regions = regions.drop_duplicates(subset=['NUTS_ID'])

# Connect to the oecd_tl PostGIS database
connection = psycopg2.connect(host = "gis.main.oecd.org",
                              database = 'oecd_tl',
                              user = "Hoffmann_c")
# Get the geographic files
background = gpd.GeoDataFrame.from_postgis("SELECT iso3, iso2, geom FROM public.tl1",
                                           connection,
                                           geom_col='geom')

# Reproject layers to an appropriate projection for Europe
regions = regions.to_crs("EPSG:3035")
background = background.to_crs("EPSG:3035")


# Here starts the loop over the dataframes we prepared.
for file_name, map_elements in MAP_LIST.items():
    df = map_elements[0]
    colors_dict = map_elements[1]
    legend_title = map_elements[2]
    country_group = map_elements[3]
    df = df[df[country_group]==1]

    # Get the country borders
    borders = gpd.GeoDataFrame.from_postgis("select geom, iso2_left, iso2_right from public.country_boundaries",
                                            connection,
                                            geom_col='geom')
    borders['iso2_left'] = np.where(borders['iso2_left']=='GB', 'UK', borders['iso2_left'])
    borders['iso2_right'] = np.where(borders['iso2_right']=='GB', 'UK', borders['iso2_right'])
    borders = borders[(borders['iso2_left'].isin(df['iso2'].unique()))&(borders['iso2_right'].isin(df['iso2'].unique()))]
    borders = borders.to_crs("EPSG:3035")

    # Join the data
    gdf = regions.merge(df, left_on='NUTS_ID', right_on='GEO', how='left')

    fig, ax = plt.subplots(figsize=(15, 12),
                           facecolor='#{:02x}{:02x}{:02x}'.format(233, 248, 255))

    # Plot the grey background
    background.plot(color='#{:02x}{:02x}{:02x}'.format(225, 225, 225),
                    ax=ax,
                    linewidth=0,
                    edgecolor='white')
    # Plot the countries in the data in white (for missing data)
    missing_regions = background[background['iso2'].isin(df['iso2'].unique())]
    missing_regions.plot(color='white',
                    ax=ax,
                    linewidth=0,
                    edgecolor='white')

    for class_label in colors_dict.keys():
        color =  colors_dict[class_label]
        gdf_part = gdf[gdf['CATEGORY']==class_label]
        gdf_part.plot(color=color,
                      ax=ax,
                      linewidth=0.3,
                      edgecolor='white')

    borders.plot(color='black',
                    ax=ax,
                    linewidth=1)

    # Legend
    patches_list = []

    for class_label, color in colors_dict.items():
        element_patch = mpatches.Patch(color=color, label=class_label, ec='grey')
        patches_list.append(element_patch)

    plt.rcParams['legend.handlelength'] = 1.5
    plt.rcParams['legend.handleheight'] = 1

    # The legend location depends on the group of countries
    if country_group == 'group_1':
        legend_bbox_to_anchor = (0, 1)
        legend_loc = 'upper left'
    elif country_group == 'group_2':
        legend_bbox_to_anchor = (1, 1)
        legend_loc = 'upper right'
    elif country_group == 'group_3':
        legend_bbox_to_anchor = (1, 1)
        legend_loc = 'upper right'

    ax.legend(handles = patches_list,
            bbox_to_anchor = legend_bbox_to_anchor,
            loc = legend_loc,
            ncol = 1,
            frameon = True,
            edgecolor = 'grey',
            prop = prop_legend)

    ax.get_legend()._legend_box.align='left'
    ax.get_legend().set_title(legend_title,
                                        prop=prop_legend_title)

    # Drop axis
    plt.axis('off')

    # Limit the frame to Europe, but different zoom for each group
    if country_group == 'group_1':
        ax.set_xlim((0.23*1e7, 0.6*1e7))
        ax.set_ylim((0.12*1e7, 0.55*1e7))

    if country_group == 'group_2':
        ax.set_xlim((0.23*1e7, 0.68*1e7))
        ax.set_ylim((0.12*1e7, 0.55*1e7))

    elif country_group == 'group_3':
        ax.set_xlim((0.23*1e7, 0.68*1e7))
        ax.set_ylim((0.12*1e7, 0.55*1e7))

    # Add disclaimer
    textstr = """
    This map is for illustrative purposes and is without prejudice to the status\n
    of or sovereignty over any territory covered by this map. Source of \n
    administrative boundaries: National Statistical Offices and FAO Global \n
    Administrative Unit Layers (GAUL).
    """
    props = dict(boxstyle='Square',
                 facecolor='white',
                 alpha=0.9,
                 edgecolor='grey')

    if country_group == 'group_1':
        x_disclaimer = 0.805
        y_disclaimer = 0.04

    elif country_group == 'group_3' or country_group == 'group_2':
        x_disclaimer = 0.84
        y_disclaimer = 0.04

    ax.text(
        x_disclaimer,
        y_disclaimer,
        textstr,
        transform=ax.transAxes,
        fontsize=10,
        linespacing=0.6,
        bbox=props,
        horizontalalignment='center',
        verticalalignment='center',
        fontproperties=prop_legend)


    # Export map as png
    plt.tight_layout()
    plt.savefig(f"{file_name}.png",
                facecolor=fig.get_facecolor(),
                dpi=300,
                bbox_inches='tight',
                pad_inches=0.05)

# Close connection to oecd_tl
connection.close()