Skip to content
Snippets Groups Projects

Python-maps-europe

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Claire.HOFFMANN
    maps_europe.py 15.74 KiB
    import pandas as pd
    import numpy as np
    import geopandas as gpd
    import psycopg2
    import matplotlib.pyplot as plt
    import matplotlib.patches as mpatches
    from matplotlib import font_manager as fm, rcParams
    import os
    import matplotlib.font_manager as font_manager
    import matplotlib.patheffects as patheffects
    
    path_to_data = r"Data_for_Maps.dta"
    data = pd.read_stata(path_to_data)
    
    
    # Region codes don't match the NUTS shapefile. Manual changes:
    data['GEO'] = np.where(data['c']=='EE', 'EST', data['GEO'])
    reg_codes_correction = {'LUX': 'LU00',
                            'ISL': 'IS00',
                            'NLD': 'NL',
                            'CYP': 'CY0',
                            'LVA': 'LV0',
                            'EST': 'EE0',
                            'LTU': 'LT0'}
    data['GEO'] = data['GEO'].map(reg_codes_correction).fillna(data['GEO']) 
    
    
    
    
    ###############################################################################
    # Employment rate of natives for the year 2010 for the three groups of countries (3 maps)
    legend_title1 = "Employment rate of natives, 2010"
    
    df = data[data['t']==2010]
    df = df[['c', 'GEO', 'Native_Employment_rate', 'group_1', 'group_2', 'group_3']]
    df.rename(columns={'Native_Employment_rate': 'VALUE',
                       'c': 'iso2'
                       },
              inplace=True)
    
    labels = ['Lower than 65%', 
              'Between 65% and 70%',
              'Between 70% and 75%',
              'Between 75% and 80%',
              'Higher than 80%']
    df['CATEGORY'] = pd.cut(df['VALUE'],
                            bins=[-np.inf, 65, 70, 75, 80, np.inf],
                            labels=labels)
    df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])
    
    colors1 = {
            'Lower than 65%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255), 
            'Between 65% and 70%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
            'Between 70% and 75%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
            'Between 75% and 80%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
            #'Between 80% and 50%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
            'Higher than 80%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
            'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
            }
    
    df1 = df.copy()
    
    
    ###############################################################################
    # Employment rate of natives for the year 2019 for the three groups of countries (3 maps) 
    legend_title2 = "Employment rate of natives, 2019"
    
    df = data[data['t']==2019]
    df = df[['c', 'GEO', 'Native_Employment_rate', 'group_1', 'group_2', 'group_3']]
    df.rename(columns={'Native_Employment_rate': 'VALUE',
                       'c': 'iso2'
                       },
              inplace=True)
    
    labels = ['Lower than 65%', 
              'Between 65% and 70%',
              'Between 70% and 75%',
              'Between 75% and 80%',
              'Higher than 80%']
    df['CATEGORY'] = pd.cut(df['VALUE'],
                            bins=[-np.inf, 65, 70, 75, 80, np.inf],
                            labels=labels)
    df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])
    
    colors2 = {
            'Lower than 65%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255), 
            'Between 65% and 70%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
            'Between 70% and 75%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
            'Between 75% and 80%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
            #'Between 80% and 50%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
            'Higher than 80%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
            'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
            }
    
    df2 = df.copy()
    
    
    ###############################################################################
    # Share of immigrants for the year 2010 for the three groups of countries (3 maps)
    legend_title3 = "Share of immigrants, 2010"
    
    df = data[data['t']==2010]
    df = df[['c', 'GEO', 'share_of_immigrants', 'group_1', 'group_2', 'group_3']]
    df.rename(columns={'share_of_immigrants': 'VALUE',
                       'c': 'iso2'
                       },
              inplace=True)
    
    labels = ['Lower than 3%', 
              'Between 3% and 6%',
              'Between 6% and 9%',
              'Between 9% and 12%',
              'Between 12% and 15%',
              'Higher than 15%']
    df['CATEGORY'] = pd.cut(df['VALUE'],
                            bins=[-np.inf, 3, 6, 9, 12, 15, np.inf],
                            labels=labels)
    df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])
    
    colors3 = {
            'Lower than 3%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255), 
            'Between 3% and 6%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
            'Between 6% and 9%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
            'Between 9% and 12%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
            'Between 12% and 15%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
            'Higher than 15%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
            'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
            }
    
    df3 = df.copy()
    
    
    ###############################################################################
    # Share of immigrants for the year 2019 for the three groups of countries (3 maps)
    legend_title4 = "Share of immigrants, 2019"
    
    df = data[data['t']==2019]
    df = df[['c', 'GEO', 'share_of_immigrants', 'group_1', 'group_2', 'group_3']]
    df.rename(columns={'share_of_immigrants': 'VALUE',
                       'c': 'iso2'
                       },
              inplace=True)
    
    labels = ['Lower than 3%', 
              'Between 3% and 6%',
              'Between 6% and 9%',
              'Between 9% and 12%',
              'Between 12% and 15%',
              'Higher than 15%']
    df['CATEGORY'] = pd.cut(df['VALUE'],
                            bins=[-np.inf, 3, 6, 9, 12, 15, np.inf],
                            labels=labels)
    df['CATEGORY'] = np.where(df['VALUE'].isnull(), 'No data available', df['CATEGORY'])
    
    colors4 = {
            'Lower than 3%': '#{:02x}{:02x}{:02x}'.format(208, 224, 255), 
            'Between 3% and 6%': '#{:02x}{:02x}{:02x}'.format(160, 192, 224),
            'Between 6% and 9%': '#{:02x}{:02x}{:02x}'.format(128, 144, 192),
            'Between 9% and 12%': '#{:02x}{:02x}{:02x}'.format(80, 112, 176),
            'Between 12% and 15%': '#{:02x}{:02x}{:02x}'.format(48, 80, 144),
            'Higher than 15%': '#{:02x}{:02x}{:02x}'.format(16, 48, 112),
            'No data available': '#{:02x}{:02x}{:02x}'.format(255, 255, 255)
            }
    
    df4 = df.copy()
    
    
    # This dict needs to be filled with the different parameters of each map.
    # The key is the name of the file (the PNG file name).
    # The value is a list of 3 elements:
    # Index 0 is the pandas dataframe containing the data. It must contain the iso3, group_1, group_2, group_3, GEO and CATEGORY fields.
    # Index 1 is the colors dict. The keys must be consitent with the CATEGORY field. Colors are given in RGB.
    # Index 2 is the title of the legend. Use '\n' in the string to split the text into several lines. 
    # Index 3 is the group of country. Each group of country has a specific zoom level, and placement of legend.
    # The three groups of countries are (1) the 13 countries used in our baseline IV (2) EU18 countries and (3) EU28 countries.
    
    MAP_LIST = {
        'Native_Employment_rate_2010_group1': [df1, 
                                           colors1, 
                                           legend_title1,
                                           'group_1'],
        'Native_Employment_rate_2010_group2': [df1, 
                                           colors1, 
                                           legend_title1,
                                           'group_2'],
        'Native_Employment_rate_2010_group3': [df1, 
                                           colors1, 
                                           legend_title1,
                                           'group_3'],
        'Native_Employment_rate_2019_group1': [df2, 
                                           colors2, 
                                           legend_title2,
                                           'group_1'],
        'Native_Employment_rate_2019_group2': [df2, 
                                           colors2, 
                                           legend_title2,
                                           'group_2'],
        'Native_Employment_rate_2019_group3': [df2, 
                                           colors2, 
                                           legend_title2,
                                           'group_3'],
        'share_of_immigrants_2010_group1': [df3, 
                                           colors3, 
                                           legend_title3,
                                           'group_1'],
        'share_of_immigrants_2010_group2': [df3, 
                                           colors3, 
                                           legend_title3,
                                           'group_2'],
        'share_of_immigrants_2010_group3': [df3, 
                                           colors3, 
                                           legend_title3,
                                           'group_3'],
        'share_of_immigrants_2019_group1': [df4, 
                                           colors4, 
                                           legend_title4,
                                           'group_1'],
        'share_of_immigrants_2019_group2': [df4, 
                                           colors4, 
                                           legend_title4,
                                           'group_2'],
        'share_of_immigrants_2019_group3': [df4, 
                                           colors4, 
                                           legend_title4,
                                           'group_3'],
        
        }
    
    
    
    ##############################################################################
    # No changes are required below this line, unless you want to change the style of the map.
    
    # Set the font
    font_path = r"\\main.oecd.org\Homedir2\Hoffmann_C\Desktop\RESSOURCES\arial-narrow.ttf"
    font_bold_path = r"\\main.oecd.org\Homedir2\Hoffmann_C\Desktop\RESSOURCES\arial-narrow-bold.ttf"
    font_name = fm.FontProperties(fname=font_path).get_name()
    prop_legend_title = fm.FontProperties(fname=font_bold_path, size=20)
    prop_legend = fm.FontProperties(fname=font_path, size=20)
            
    regions2016 = gpd.read_file(r"NUTS_RG_20M_2016_4326.geojson") # Downloaded from https://ec.europa.eu/eurostat/fr/web/gisco/geodata/reference-data/administrative-units-statistical-units/nuts
    regions2021 = gpd.read_file(r"NUTS_RG_20M_2021_4326.geojson")
    regions2013 = gpd.read_file(r"NUTS_RG_20M_2013_4326.geojson")
    regions = regions2016.append(regions2021)
    regions = regions.append(regions2013)
    regions = regions.drop_duplicates(subset=['NUTS_ID'])
    
    # Connect to the oecd_tl PostGIS database
    connection = psycopg2.connect(host = "gis.main.oecd.org", 
                                  database = 'oecd_tl', 
                                  user = "Hoffmann_c")
    # Get the geographic files
    background = gpd.GeoDataFrame.from_postgis("SELECT iso3, iso2, geom FROM public.tl1",
                                               connection,
                                               geom_col='geom')
    
    # Reproject layers to an appropriate projection for Europe
    regions = regions.to_crs("EPSG:3035")
    background = background.to_crs("EPSG:3035")
        
    
    
    # Here starts the loop over the dataframes we prepared.
    for file_name, map_elements in MAP_LIST.items(): 
        df = map_elements[0]
        colors_dict = map_elements[1]
        legend_title = map_elements[2]
        country_group = map_elements[3]
        df = df[df[country_group]==1]
        
        # Get the country borders
        borders = gpd.GeoDataFrame.from_postgis("select geom, iso2_left, iso2_right from public.country_boundaries", 
                                                connection, 
                                                geom_col='geom')
        borders['iso2_left'] = np.where(borders['iso2_left']=='GB', 'UK', borders['iso2_left'])
        borders['iso2_right'] = np.where(borders['iso2_right']=='GB', 'UK', borders['iso2_right'])
        borders = borders[(borders['iso2_left'].isin(df['iso2'].unique()))&(borders['iso2_right'].isin(df['iso2'].unique()))]
        borders = borders.to_crs("EPSG:3035")
        
        # Join the data
        gdf = regions.merge(df, left_on='NUTS_ID', right_on='GEO', how='left')
        
        fig, ax = plt.subplots(figsize=(15, 12),
                               facecolor='#{:02x}{:02x}{:02x}'.format(233, 248, 255))
        
        # Plot the grey background
        background.plot(color='#{:02x}{:02x}{:02x}'.format(225, 225, 225),
                        ax=ax,
                        linewidth=0, 
                        edgecolor='white')
        # Plot the countries in the data in white (for missing data)
        missing_regions = background[background['iso2'].isin(df['iso2'].unique())]
        missing_regions.plot(color='white',
                        ax=ax,
                        linewidth=0, 
                        edgecolor='white')
      
        for class_label in colors_dict.keys():
            color =  colors_dict[class_label]
            gdf_part = gdf[gdf['CATEGORY']==class_label]
            gdf_part.plot(color=color,
                          ax=ax,
                          linewidth=0.3, 
                          edgecolor='white')
    
        borders.plot(color='black',
                        ax=ax,
                        linewidth=1)
    
        # Legend
        patches_list = []
        
        for class_label, color in colors_dict.items():
            element_patch = mpatches.Patch(color=color, label=class_label, ec='grey')
            patches_list.append(element_patch)
            
        plt.rcParams['legend.handlelength'] = 1.5
        plt.rcParams['legend.handleheight'] = 1
        
        # The legend location depends on the group of countries
        if country_group == 'group_1':
            legend_bbox_to_anchor = (0, 1)
            legend_loc = 'upper left'
        elif country_group == 'group_2':
            legend_bbox_to_anchor = (1, 1)
            legend_loc = 'upper right'
        elif country_group == 'group_3':
            legend_bbox_to_anchor = (1, 1)
            legend_loc = 'upper right'
        
        ax.legend(handles = patches_list, 
                bbox_to_anchor = legend_bbox_to_anchor,
                loc = legend_loc,
                ncol = 1,
                frameon = True,
                edgecolor = 'grey',
                prop = prop_legend)
                                            
        ax.get_legend()._legend_box.align='left'
        ax.get_legend().set_title(legend_title,
                                            prop=prop_legend_title)
    
        # Drop axis
        plt.axis('off')
        
        # Limit the frame to Europe, but different zoom for each group
        if country_group == 'group_1':
            ax.set_xlim((0.23*1e7, 0.6*1e7))
            ax.set_ylim((0.12*1e7, 0.55*1e7))
            
        if country_group == 'group_2':
            ax.set_xlim((0.23*1e7, 0.68*1e7))
            ax.set_ylim((0.12*1e7, 0.55*1e7))
    
        elif country_group == 'group_3':
            ax.set_xlim((0.23*1e7, 0.68*1e7))
            ax.set_ylim((0.12*1e7, 0.55*1e7))
        
        # Add disclaimer
        textstr = """ 
        This map is for illustrative purposes and is without prejudice to the status\n
        of or sovereignty over any territory covered by this map. Source of \n
        administrative boundaries: National Statistical Offices and FAO Global \n
        Administrative Unit Layers (GAUL).
        """
        props = dict(boxstyle='Square', 
                     facecolor='white', 
                     alpha=0.9,
                     edgecolor='grey')
        
        if country_group == 'group_1':
            x_disclaimer = 0.805
            y_disclaimer = 0.04
    
        elif country_group == 'group_3' or country_group == 'group_2':
            x_disclaimer = 0.84
            y_disclaimer = 0.04
            
        ax.text(
            x_disclaimer,
            y_disclaimer,
            textstr,
            transform=ax.transAxes, 
            fontsize=10,
            linespacing=0.6,
            bbox=props, 
            horizontalalignment='center', 
            verticalalignment='center',
            fontproperties=prop_legend)
        
        
        # Export map as png
        plt.tight_layout()
        plt.savefig(f"{file_name}.png", 
                    facecolor=fig.get_facecolor(),
                    dpi=300,
                    bbox_inches='tight',
                    pad_inches=0.05)
            
    # Close connection to oecd_tl
    connection.close()
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment