diff --git a/analysis/bootstrapping.py b/analysis/bootstrapping.py
index 0544e2d1..f042052c 100644
--- a/analysis/bootstrapping.py
+++ b/analysis/bootstrapping.py
@@ -2,10 +2,12 @@
 import copy
 import analysis.statistics
 
+
 def get_seeds(number_of_seeds):
     return np.arange(1, number_of_seeds + 1) * 1000
 
-def configure(context, stage, sample_size, parameters = {}, alias = None, ephemeral = True):
+
+def configure(context, stage, sample_size, parameters={}, alias=None, ephemeral=True):
     if alias is None:
         alias = stage
 
@@ -15,11 +17,18 @@ def configure(context, stage, sample_size, parameters = {}, alias = None, epheme
         sample_parameters = copy.copy(parameters)
         sample_parameters["random_seed"] = int(random_seed)
 
-        context.stage(stage, sample_parameters, alias = "bootstrap_%s_%d" % (alias, index), ephemeral = ephemeral)
+        context.stage(
+            stage,
+            sample_parameters,
+            alias="bootstrap_%s_%d" % (alias, index),
+            ephemeral=ephemeral,
+        )
+
 
 def get_stage(context, alias, index):
     return context.stage("bootstrap_%s_%d" % (alias, index))
 
+
 def get_stages(context, alias, sample_size):
     for index in range(sample_size):
         yield get_stage(context, alias, index)
diff --git a/analysis/chains.py b/analysis/chains.py
index 886bd765..66a809d6 100644
--- a/analysis/chains.py
+++ b/analysis/chains.py
@@ -9,22 +9,30 @@
     ("chain", "sex"),
     ("chain_length_class", "age_class"),
     ("chain_length_class", "sex"),
-    ("chain",), ("chain_length_class",),
+    ("chain",),
+    ("chain_length_class",),
     ("age_range", "sex", "chain"),
-    ("age_range", "sex", "chain_length_class")
+    ("age_range", "sex", "chain_length_class"),
 ]
 
 PURPOSE_MAPPING = {
-    "home": "h", "work": "w", "education": "e",
-    "shop": "s", "leisure": "l", "other": "o"
+    "home": "h",
+    "work": "w",
+    "education": "e",
+    "shop": "s",
+    "leisure": "l",
+    "other": "o",
 }
 
+
 def aggregate_chains(df_chains):
     current_person_id = None
     current_chain = None
     records = []
 
-    for person_id, purpose in zip(df_chains["person_id"].values, df_chains["purpose"].values):
+    for person_id, purpose in zip(
+        df_chains["person_id"].values, df_chains["purpose"].values
+    ):
         if not person_id == current_person_id:
             if not current_person_id is None:
                 records.append((current_person_id, current_chain))
@@ -36,11 +44,11 @@ def aggregate_chains(df_chains):
 
     records.append((current_person_id, current_chain))
 
-    df_chains = pd.DataFrame.from_records(records, columns = ["person_id", "chain"])
+    df_chains = pd.DataFrame.from_records(records, columns=["person_id", "chain"])
 
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"w+", "w", x))
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"e+", "e", x))
-    #df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"h+", "h", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"w+", "w", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"e+", "e", x))
+    # df_chains["chain"] = df_chains["chain"].apply(lambda x: re.sub(r"h+", "h", x))
 
     df_chains["chain_length"] = df_chains["chain"].str.len()
 
diff --git a/analysis/debug/sc.py b/analysis/debug/sc.py
index 2f73f125..8bbeeaab 100644
--- a/analysis/debug/sc.py
+++ b/analysis/debug/sc.py
@@ -1,11 +1,13 @@
 import numpy as np
 import pandas as pd
 
+
 def configure(context):
-    context.stage("data.census.filtered", alias = "census")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.census.filtered", alias="census")
+    context.stage("data.hts.selected", alias="hts")
     context.config("output_path")
 
+
 def execute(context):
     df_census = context.stage("census")
     df_hts = context.stage("hts")[1]
@@ -19,14 +21,16 @@ def execute(context):
         f_census = df_census["socioprofessional_class"] == value
         f_hts = df_hts["socioprofessional_class"] == value
 
-        df_output.append({
-            "value": value,
-            "census_count": np.count_nonzero(f_census),
-            "hts_count": np.count_nonzero(f_hts),
-            "census_weight": df_census[f_census]["weight"].sum(),
-            "hts_weight": df_hts[f_hts]["person_weight"].sum()
-        })
+        df_output.append(
+            {
+                "value": value,
+                "census_count": np.count_nonzero(f_census),
+                "hts_count": np.count_nonzero(f_hts),
+                "census_weight": df_census[f_census]["weight"].sum(),
+                "hts_weight": df_hts[f_hts]["person_weight"].sum(),
+            }
+        )
 
     pd.DataFrame.from_records(df_output).to_csv(
-        "{}/debug_sc.csv".format(context.config("output_path")),
-        sep = ";", index = False)
+        "{}/debug_sc.csv".format(context.config("output_path")), sep=";", index=False
+    )
diff --git a/analysis/grid/comparison_flow_volume.py b/analysis/grid/comparison_flow_volume.py
index b2506ea1..4201d3d6 100644
--- a/analysis/grid/comparison_flow_volume.py
+++ b/analysis/grid/comparison_flow_volume.py
@@ -1,116 +1,268 @@
 import pandas as pd
 import geopandas as gpd
 
-import plotly.express as px 
+import plotly.express as px
 
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
 
-    if not context.config("analysis_from_file",False) :
+    if not context.config("analysis_from_file", False):
         context.stage("synthesis.population.trips")
         context.stage("synthesis.population.spatial.locations")
         context.stage("synthesis.population.enriched")
     context.stage("data.spatial.departments")
 
-    context.config("comparison_file_prefix",None)
+    context.config("comparison_file_prefix", None)
     context.config("output_prefix", "ile_de_france_")
     context.config("output_formats", ["csv", "gpkg"])
     context.config("output_path")
     context.config("data_path")
 
-def stat_grid(df_trips,df_locations,df_persons,df_grid):
-    
+
+def stat_grid(df_trips, df_locations, df_persons, df_grid):
+
     # Write spatial trips
-    df_spatial = pd.merge(df_trips, df_locations[[
-        "person_id", "activity_index", "geometry"
-    ]].rename(columns = {
-        "activity_index": "following_activity_index",
-    }), how = "left", on = ["person_id", "following_activity_index"])
-    df_spatial = pd.merge(df_spatial,df_persons,how = "left", on = ["person_id",])
-    df_spatial = gpd.GeoDataFrame(df_spatial, crs = "EPSG:2154").to_crs("4326")
-
-    df_stats = gpd.sjoin(df_grid,df_spatial,how="left")
-    return df_stats[['id_carr_1km', 'geometry','person_id', 'following_purpose', 'household_id', 'age']]
+    df_spatial = pd.merge(
+        df_trips,
+        df_locations[["person_id", "activity_index", "geometry"]].rename(
+            columns={
+                "activity_index": "following_activity_index",
+            }
+        ),
+        how="left",
+        on=["person_id", "following_activity_index"],
+    )
+    df_spatial = pd.merge(
+        df_spatial,
+        df_persons,
+        how="left",
+        on=[
+            "person_id",
+        ],
+    )
+    df_spatial = gpd.GeoDataFrame(df_spatial, crs="EPSG:2154").to_crs("4326")
+
+    df_stats = gpd.sjoin(df_grid, df_spatial, how="left")
+    return df_stats[
+        [
+            "id_carr_1km",
+            "geometry",
+            "person_id",
+            "following_purpose",
+            "household_id",
+            "age",
+        ]
+    ]
+
+
 def execute(context):
-    
+
     figures = {
-        "Yrs:0-10":{"min_age": 0, "max_age": 10,},
-        "Yrs:11-14":{"min_age": 11, "max_age": 14,},
-        "Yrs:15-18":{"min_age": 15, "max_age": 17,},
-        "Yrs:18-25":{"min_age": 18, "max_age": 25,},
-        "Yrs:25-50":{"min_age": 26, "max_age": 50,},
-        "Yrs:50-65":{"min_age": 51, "max_age": 65,},
-        "Yrs:65-75":{"min_age": 66, "max_age": 75,},
-        "Yrs:75+":{"min_age": 76, "max_age": 110,},}
-    comparison_file = context.config("output_prefix") if context.config("comparison_file_prefix") is None else context.config("comparison_file_prefix")
-    
+        "Yrs:0-10": {
+            "min_age": 0,
+            "max_age": 10,
+        },
+        "Yrs:11-14": {
+            "min_age": 11,
+            "max_age": 14,
+        },
+        "Yrs:15-18": {
+            "min_age": 15,
+            "max_age": 17,
+        },
+        "Yrs:18-25": {
+            "min_age": 18,
+            "max_age": 25,
+        },
+        "Yrs:25-50": {
+            "min_age": 26,
+            "max_age": 50,
+        },
+        "Yrs:50-65": {
+            "min_age": 51,
+            "max_age": 65,
+        },
+        "Yrs:65-75": {
+            "min_age": 66,
+            "max_age": 75,
+        },
+        "Yrs:75+": {
+            "min_age": 76,
+            "max_age": 110,
+        },
+    }
+    comparison_file = (
+        context.config("output_prefix")
+        if context.config("comparison_file_prefix") is None
+        else context.config("comparison_file_prefix")
+    )
+
     if not context.config("analysis_from_file"):
         print("Récupération simu données ...")
         # from simulation cache
         df_trips = context.stage("synthesis.population.trips")
-        df_persons = context.stage("synthesis.population.enriched")[["person_id", "household_id","age"]]
-        df_locations = context.stage("synthesis.population.spatial.locations")[[
-            "person_id", "activity_index", "geometry"
-        ]]
+        df_persons = context.stage("synthesis.population.enriched")[
+            ["person_id", "household_id", "age"]
+        ]
+        df_locations = context.stage("synthesis.population.spatial.locations")[
+            ["person_id", "activity_index", "geometry"]
+        ]
         df_trips["preceding_activity_index"] = df_trips["trip_index"]
         df_trips["following_activity_index"] = df_trips["trip_index"] + 1
 
-    else :
+    else:
         # from file trips, activites and person
         print("Récupération données ...")
-        df_trips = pd.read_csv(f'{context.config("output_path")}/{context.config("output_prefix")}trips.csv',sep=';')[["person_id","trip_index" ,"following_activity_index","following_purpose"]]
-        df_locations = gpd.read_parquet(f'{context.config("output_path")}/{context.config("output_prefix")}activities.geoparquet') if "geoparquet" in context.config("output_formats") else gpd.read_file(f'{context.config("output_path")}/{context.config("output_prefix")}activities.gpkg')
-        df_persons = pd.read_csv(f'{context.config("output_path")}/{context.config("output_prefix")}persons.csv',sep=';')[["person_id", "household_id","age"]]  
+        df_trips = pd.read_csv(
+            f'{context.config("output_path")}/{context.config("output_prefix")}trips.csv',
+            sep=";",
+        )[["person_id", "trip_index", "following_activity_index", "following_purpose"]]
+        df_locations = (
+            gpd.read_parquet(
+                f'{context.config("output_path")}/{context.config("output_prefix")}activities.geoparquet'
+            )
+            if "geoparquet" in context.config("output_formats")
+            else gpd.read_file(
+                f'{context.config("output_path")}/{context.config("output_prefix")}activities.gpkg'
+            )
+        )
+        df_persons = pd.read_csv(
+            f'{context.config("output_path")}/{context.config("output_prefix")}persons.csv',
+            sep=";",
+        )[["person_id", "household_id", "age"]]
     print("Récupération comp données ...")
-    df_trips_comp = pd.read_csv(f'{context.config("output_path")}/{comparison_file}trips.csv',sep=';')[["person_id","trip_index" ,"following_activity_index","following_purpose"]]
-    df_locations_comp = gpd.read_parquet(f'{context.config("output_path")}/{comparison_file}activities.geoparquet') if "geoparquet" in context.config("output_formats") else gpd.read_file(f'{context.config("output_path")}/{comparison_file}activities.gpkg')
-    df_persons_comp = pd.read_csv(f'{context.config("output_path")}/{comparison_file}persons.csv',sep=';')[["person_id", "household_id","age"]]  
-    
+    df_trips_comp = pd.read_csv(
+        f'{context.config("output_path")}/{comparison_file}trips.csv', sep=";"
+    )[["person_id", "trip_index", "following_activity_index", "following_purpose"]]
+    df_locations_comp = (
+        gpd.read_parquet(
+            f'{context.config("output_path")}/{comparison_file}activities.geoparquet'
+        )
+        if "geoparquet" in context.config("output_formats")
+        else gpd.read_file(
+            f'{context.config("output_path")}/{comparison_file}activities.gpkg'
+        )
+    )
+    df_persons_comp = pd.read_csv(
+        f'{context.config("output_path")}/{comparison_file}persons.csv', sep=";"
+    )[["person_id", "household_id", "age"]]
+
     list_purpose = list(df_trips["following_purpose"].unique())
 
     # grid 1km of location data
     df_departments = context.stage("data.spatial.departments")
     poly_dep = df_departments.unary_union
     df_grids = gpd.read_file(
-            f'{context.config("data_path")}/grid/grille200m_metropole.gpkg',
-            mask=poly_dep,
-        )
+        f'{context.config("data_path")}/grid/grille200m_metropole.gpkg',
+        mask=poly_dep,
+    )
     df_grids = df_grids.to_crs("4326")
-    df_grid = df_grids[["id_carr_1km","geometry"]].dissolve(by="id_carr_1km").reset_index()
+    df_grid = (
+        df_grids[["id_carr_1km", "geometry"]].dissolve(by="id_carr_1km").reset_index()
+    )
 
-    df_stats = stat_grid(df_trips,df_locations,df_persons,df_grid)
-    df_grids = stat_grid(df_trips_comp,df_locations_comp,df_persons_comp,df_grid)
-    point = df_grid.unary_union.centroid # a changé avec ploy_dep
+    df_stats = stat_grid(df_trips, df_locations, df_persons, df_grid)
+    df_grids = stat_grid(df_trips_comp, df_locations_comp, df_persons_comp, df_grid)
+    point = df_grid.unary_union.centroid  # a changé avec ploy_dep
     print("Printing grids...")
     for prefix, figure in figures.items():
-        df_select_age = df_stats[df_stats["age"].between(figure["min_age"],figure["max_age"])]
-        df_select_age = df_select_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
+        df_select_age = df_stats[
+            df_stats["age"].between(figure["min_age"], figure["max_age"])
+        ]
+        df_select_age = df_select_age.dissolve(
+            by=["id_carr_1km", "following_purpose"], aggfunc="count"
+        ).reset_index()
         df_select_age = df_select_age[~(df_select_age["geometry"].isna())]
-        df_select_age["following_purpose"] = df_select_age["following_purpose"].astype('str')
+        df_select_age["following_purpose"] = df_select_age["following_purpose"].astype(
+            "str"
+        )
 
-        df_grids_age = df_grids[df_grids["age"].between(figure["min_age"],figure["max_age"])]
-        df_grids_age = df_grids_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
+        df_grids_age = df_grids[
+            df_grids["age"].between(figure["min_age"], figure["max_age"])
+        ]
+        df_grids_age = df_grids_age.dissolve(
+            by=["id_carr_1km", "following_purpose"], aggfunc="count"
+        ).reset_index()
         df_grids_age = df_grids_age[~(df_grids_age["geometry"].isna())]
-        df_grids_age["following_purpose"] = df_grids_age["following_purpose"].astype('str')
-
-        for purpose in list_purpose :
-            df_select = df_select_age[df_select_age["following_purpose"]==purpose].rename(columns={"person_id":"count"})
-            df_grids_select = df_grids_age[df_grids_age["following_purpose"]==purpose].rename(columns={"person_id":"count"})
-            if context.config("output_prefix") == comparison_file :
-                df_select = gpd.sjoin(df_select,df_grid,how='right',predicate="contains").fillna(0)
-                df_select  = df_select[df_select["count"] != 0]
-                fig = px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="count", opacity= 0.7,color_continuous_scale='reds',
-                                        mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Localisation flow distribution for {prefix} group with {purpose} purpose")
-                fig.write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
-            else :
-                df_grids_select = gpd.sjoin(df_grids_select,df_grid,how='right',predicate="contains").fillna(0)
-                df_select = gpd.sjoin(df_select,df_grids_select.drop(columns=[ 'index_left']),how='right',predicate="contains").rename(columns={"count_left":"volume_studied_simu","count_right":"volume_compared_simu"}).fillna(0)
-                df_select["volume_difference"] = df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
-                df_select  = df_select[(df_select["volume_studied_simu"] != 0 )| (df_select["volume_compared_simu"] != 0)]
-                df_select["pourcentage_vol"] = df_select["volume_difference"] / df_select["volume_compared_simu"]
-                px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="volume_difference", opacity= 0.7,color_continuous_scale="picnic", color_continuous_midpoint= 0,hover_name="id_carr_1km_right", hover_data=["volume_studied_simu", "volume_compared_simu","pourcentage_vol"],
-                                        mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
-
-            
\ No newline at end of file
+        df_grids_age["following_purpose"] = df_grids_age["following_purpose"].astype(
+            "str"
+        )
+
+        for purpose in list_purpose:
+            df_select = df_select_age[
+                df_select_age["following_purpose"] == purpose
+            ].rename(columns={"person_id": "count"})
+            df_grids_select = df_grids_age[
+                df_grids_age["following_purpose"] == purpose
+            ].rename(columns={"person_id": "count"})
+            if context.config("output_prefix") == comparison_file:
+                df_select = gpd.sjoin(
+                    df_select, df_grid, how="right", predicate="contains"
+                ).fillna(0)
+                df_select = df_select[df_select["count"] != 0]
+                fig = px.choropleth_mapbox(
+                    df_select,
+                    geojson=df_select.geometry,
+                    locations=df_select.index,
+                    color="count",
+                    opacity=0.7,
+                    color_continuous_scale="reds",
+                    mapbox_style="open-street-map",
+                    center=dict(lat=point.y, lon=point.x),
+                    title=f"Localisation flow distribution for {prefix} group with {purpose} purpose",
+                )
+                fig.write_html(
+                    f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html'
+                )
+            else:
+                df_grids_select = gpd.sjoin(
+                    df_grids_select, df_grid, how="right", predicate="contains"
+                ).fillna(0)
+                df_select = (
+                    gpd.sjoin(
+                        df_select,
+                        df_grids_select.drop(columns=["index_left"]),
+                        how="right",
+                        predicate="contains",
+                    )
+                    .rename(
+                        columns={
+                            "count_left": "volume_studied_simu",
+                            "count_right": "volume_compared_simu",
+                        }
+                    )
+                    .fillna(0)
+                )
+                df_select["volume_difference"] = (
+                    df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
+                )
+                df_select = df_select[
+                    (df_select["volume_studied_simu"] != 0)
+                    | (df_select["volume_compared_simu"] != 0)
+                ]
+                df_select["pourcentage_vol"] = (
+                    df_select["volume_difference"] / df_select["volume_compared_simu"]
+                )
+                px.choropleth_mapbox(
+                    df_select,
+                    geojson=df_select.geometry,
+                    locations=df_select.index,
+                    color="volume_difference",
+                    opacity=0.7,
+                    color_continuous_scale="picnic",
+                    color_continuous_midpoint=0,
+                    hover_name="id_carr_1km_right",
+                    hover_data=[
+                        "volume_studied_simu",
+                        "volume_compared_simu",
+                        "pourcentage_vol",
+                    ],
+                    mapbox_style="open-street-map",
+                    center=dict(lat=point.y, lon=point.x),
+                    title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose",
+                ).write_html(
+                    f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html'
+                )
diff --git a/analysis/marginals.py b/analysis/marginals.py
index 98baf359..7e78720f 100644
--- a/analysis/marginals.py
+++ b/analysis/marginals.py
@@ -20,25 +20,44 @@
 CENSUS_PERSON_MARGINALS = GENERAL_PERSON_MARGINALS + [("socioprofessional_class",)]
 CENSUS_HOUSEHOLD_MARGINALS = GENERAL_HOUSEHOLD_MARGINALS
 
-HTS_PERSON_MARGINALS = GENERAL_PERSON_MARGINALS + [("has_license",), ("has_pt_subscription",)]
+HTS_PERSON_MARGINALS = GENERAL_PERSON_MARGINALS + [
+    ("has_license",),
+    ("has_pt_subscription",),
+]
 HTS_HOUSEHOLD_MARGINALS = GENERAL_HOUSEHOLD_MARGINALS + [("number_of_bikes_class",)]
 
 SOCIOPROFESIONAL_CLASS_LABELS = [
-    "???", "Agriculture", "Independent", "Science", "Intermediate", "Employee", "Worker", "Retired", "Other"
+    "???",
+    "Agriculture",
+    "Independent",
+    "Science",
+    "Intermediate",
+    "Employee",
+    "Worker",
+    "Retired",
+    "Other",
 ]
 
+
 def prepare_classes(df):
     if "age" in df:
-        df["age_class"] = np.digitize(df["age"], AGE_CLASS_BOUNDS, right = True)
+        df["age_class"] = np.digitize(df["age"], AGE_CLASS_BOUNDS, right=True)
 
     if "household_size" in df:
-        df["household_size_class"] = np.digitize(df["household_size"], HOUSEHOLD_SIZE_BOUNDS, right = True)
+        df["household_size_class"] = np.digitize(
+            df["household_size"], HOUSEHOLD_SIZE_BOUNDS, right=True
+        )
 
     if "number_of_vehicles" in df:
-        df["number_of_vehicles_class"] = np.digitize(df["number_of_vehicles"], NUMBER_OF_VEHICLES_BOUNDS, right = True)
+        df["number_of_vehicles_class"] = np.digitize(
+            df["number_of_vehicles"], NUMBER_OF_VEHICLES_BOUNDS, right=True
+        )
 
     if "number_of_bikes" in df:
-        df["number_of_bikes_class"] = np.digitize(df["number_of_bikes"], NUMBER_OF_BIKES_BOUNDS, right = True)
+        df["number_of_bikes_class"] = np.digitize(
+            df["number_of_bikes"], NUMBER_OF_BIKES_BOUNDS, right=True
+        )
+
 
 def cross(*marginals):
     result = []
@@ -56,6 +75,7 @@ def cross(*marginals):
 
     return list(set(result))
 
+
 def combine(*marginals):
     result = []
 
@@ -64,21 +84,22 @@ def combine(*marginals):
 
     return list(set(result))
 
+
 ALL_PERSON_MARGINALS = combine(CENSUS_PERSON_MARGINALS, HTS_PERSON_MARGINALS)
 ALL_HOUSEHOLD_MARGINALS = combine(CENSUS_HOUSEHOLD_MARGINALS, HTS_HOUSEHOLD_MARGINALS)
 
 SPATIAL_MARGINALS = [("departement_id",), ("commune_id",)]
 
 ANALYSIS_PERSON_MARGINALS = combine(
-    ALL_PERSON_MARGINALS, ALL_HOUSEHOLD_MARGINALS,
+    ALL_PERSON_MARGINALS,
+    ALL_HOUSEHOLD_MARGINALS,
     cross(ALL_PERSON_MARGINALS, ALL_PERSON_MARGINALS),
     cross(ALL_HOUSEHOLD_MARGINALS, ALL_HOUSEHOLD_MARGINALS),
-    cross(ALL_PERSON_MARGINALS, ALL_HOUSEHOLD_MARGINALS)
+    cross(ALL_PERSON_MARGINALS, ALL_HOUSEHOLD_MARGINALS),
 )
 
 ANALYSIS_HOUSEHOLD_MARGINALS = combine(
-    ALL_HOUSEHOLD_MARGINALS,
-    cross(ALL_HOUSEHOLD_MARGINALS, ALL_HOUSEHOLD_MARGINALS)
+    ALL_HOUSEHOLD_MARGINALS, cross(ALL_HOUSEHOLD_MARGINALS, ALL_HOUSEHOLD_MARGINALS)
 )
 
 SPATIAL_PERSON_MARGINALS = combine(
diff --git a/analysis/methods/income/compare_methods.py b/analysis/methods/income/compare_methods.py
index d8573521..f4f545cd 100644
--- a/analysis/methods/income/compare_methods.py
+++ b/analysis/methods/income/compare_methods.py
@@ -29,10 +29,17 @@ def execute(context):
     df_population = add_household_type_attribute(df_population)
 
     # get most populated commune
-    commune_id = df_population.groupby(["commune_id"], observed=True)["commune_id"].count().drop("undefined").idxmax()
+    commune_id = (
+        df_population.groupby(["commune_id"], observed=True)["commune_id"]
+        .count()
+        .drop("undefined")
+        .idxmax()
+    )
 
     # get income distributions by attributes
-    income_df = context.stage("data.income.municipality").query(f"commune_id == '{commune_id}'")
+    income_df = context.stage("data.income.municipality").query(
+        f"commune_id == '{commune_id}'"
+    )
     income_df = income_df.rename(
         columns={
             "value": "modality",
@@ -48,22 +55,22 @@ def execute(context):
         }
     )
 
-    households_with_attributes = df_population[[
-        "household_id", "commune_id", "size", "family_comp"
-    ]].drop_duplicates("household_id")
+    households_with_attributes = df_population[
+        ["household_id", "commune_id", "size", "family_comp"]
+    ].drop_duplicates("household_id")
 
     # get enriched population with different methods
     uniform_pop_df = context.stage("uniform")
     uniform_pop_df = uniform_pop_df.merge(households_with_attributes, on="household_id")
     uniform_pop_df["household_income"] = (
-            uniform_pop_df["household_income"] * 12 / uniform_pop_df["consumption_units"]
+        uniform_pop_df["household_income"] * 12 / uniform_pop_df["consumption_units"]
     )
     uniform_pop_df = uniform_pop_df.query(f"commune_id == '{commune_id}'")
 
     bhepop2_pop_df = context.stage("bhepop2")
     bhepop2_pop_df = bhepop2_pop_df.merge(households_with_attributes, on="household_id")
     bhepop2_pop_df["household_income"] = (
-            bhepop2_pop_df["household_income"] * 12 / bhepop2_pop_df["consumption_units"]
+        bhepop2_pop_df["household_income"] * 12 / bhepop2_pop_df["consumption_units"]
     )
     bhepop2_pop_df = bhepop2_pop_df.query(f"commune_id == '{commune_id}'")
 
@@ -76,28 +83,29 @@ def execute(context):
         ["size", "family_comp"],
         0,
         relative_maximum=MAXIMUM_INCOME_FACTOR,
-        delta_min=1000
+        delta_min=1000,
     )
 
     # check output folder existence
-    compare_output_path = os.path.join(context.config("output_path"), COMPARE_INCOME_FOLDER)
+    compare_output_path = os.path.join(
+        context.config("output_path"), COMPARE_INCOME_FOLDER
+    )
     if not os.path.exists(compare_output_path):
         os.mkdir(compare_output_path)
 
     # create an analysis instance
     analysis = marginal_distributions_source.compare_with_populations(
-        {
-            "Uniform": uniform_pop_df,
-            "Bhepop2": bhepop2_pop_df
-        },
+        {"Uniform": uniform_pop_df, "Bhepop2": bhepop2_pop_df},
         feature_name="household_income",
-        output_folder=compare_output_path
+        output_folder=compare_output_path,
+    )
+    analysis.plot_title_format = (
+        analysis.plot_title_format + f" \n(commune={commune_id})"
     )
-    analysis.plot_title_format = analysis.plot_title_format + f" \n(commune={commune_id})"
 
     analysis.generate_analysis_plots()
     analysis.generate_analysis_error_table()
 
-    print(f"Generated compared analysis of income assignation methods in {compare_output_path}")
-
-
+    print(
+        f"Generated compared analysis of income assignation methods in {compare_output_path}"
+    )
diff --git a/analysis/reference/census/sociodemographics.py b/analysis/reference/census/sociodemographics.py
index 47c6204d..203c8d4f 100644
--- a/analysis/reference/census/sociodemographics.py
+++ b/analysis/reference/census/sociodemographics.py
@@ -1,34 +1,39 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     context.stage("data.census.filtered")
 
+
 def execute(context):
     person_marginals = marginals.combine(
         marginals.TOTAL_MARGINAL,
-
         marginals.CENSUS_PERSON_MARGINALS,
         marginals.CENSUS_HOUSEHOLD_MARGINALS,
-
-        marginals.cross(marginals.CENSUS_PERSON_MARGINALS, marginals.CENSUS_PERSON_MARGINALS),
-        marginals.cross(marginals.CENSUS_HOUSEHOLD_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS),
-
-        marginals.cross(marginals.CENSUS_PERSON_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS),
-
+        marginals.cross(
+            marginals.CENSUS_PERSON_MARGINALS, marginals.CENSUS_PERSON_MARGINALS
+        ),
+        marginals.cross(
+            marginals.CENSUS_HOUSEHOLD_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS
+        ),
+        marginals.cross(
+            marginals.CENSUS_PERSON_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS
+        ),
         marginals.SPATIAL_MARGINALS,
-        marginals.cross(marginals.SPATIAL_MARGINALS, marginals.CENSUS_PERSON_MARGINALS)
+        marginals.cross(marginals.SPATIAL_MARGINALS, marginals.CENSUS_PERSON_MARGINALS),
     )
 
     household_marginals = marginals.combine(
         marginals.TOTAL_MARGINAL,
-
         marginals.CENSUS_HOUSEHOLD_MARGINALS,
-
-        marginals.cross(marginals.CENSUS_HOUSEHOLD_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS),
-
+        marginals.cross(
+            marginals.CENSUS_HOUSEHOLD_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS
+        ),
         marginals.SPATIAL_MARGINALS,
-        marginals.cross(marginals.SPATIAL_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS)
+        marginals.cross(
+            marginals.SPATIAL_MARGINALS, marginals.CENSUS_HOUSEHOLD_MARGINALS
+        ),
     )
 
     df_persons = context.stage("data.census.filtered")
@@ -37,6 +42,6 @@ def execute(context):
     df_households = df_persons.drop_duplicates("household_id").copy()
 
     return dict(
-        person = stats.marginalize(df_persons, person_marginals),
-        household = stats.marginalize(df_households, household_marginals)
+        person=stats.marginalize(df_persons, person_marginals),
+        household=stats.marginalize(df_households, household_marginals),
     )
diff --git a/analysis/reference/hts/activities.py b/analysis/reference/hts/activities.py
index f1268709..b098e95a 100644
--- a/analysis/reference/hts/activities.py
+++ b/analysis/reference/hts/activities.py
@@ -1,14 +1,21 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 PURPOSE_MAPPING = {
-    "home": "h", "work": "w", "education": "e",
-    "shop": "s", "leisure": "l", "other": "o"
+    "home": "h",
+    "work": "w",
+    "education": "e",
+    "shop": "s",
+    "leisure": "l",
+    "other": "o",
 }
 
+
 def execute(context):
     df_households, df_persons, df_activities = context.stage("hts")
 
@@ -36,13 +43,37 @@ def execute(context):
     df_last["is_first"] = False
     df_last["is_last"] = True
 
-    df_activities = pd.concat([
-        df_activities[["person_id", "activity_id", "purpose", "start_time", "end_time", "is_first", "is_last"]],
-        df_last[["person_id", "activity_id", "purpose", "start_time", "end_time", "is_first", "is_last"]]
-    ]).sort_values(by = ["person_id", "activity_id"])
+    df_activities = pd.concat(
+        [
+            df_activities[
+                [
+                    "person_id",
+                    "activity_id",
+                    "purpose",
+                    "start_time",
+                    "end_time",
+                    "is_first",
+                    "is_last",
+                ]
+            ],
+            df_last[
+                [
+                    "person_id",
+                    "activity_id",
+                    "purpose",
+                    "start_time",
+                    "end_time",
+                    "is_first",
+                    "is_last",
+                ]
+            ],
+        ]
+    ).sort_values(by=["person_id", "activity_id"])
 
     # Add activities for people without trips
-    df_missing = df_persons[~df_persons["person_id"].isin(df_activities["person_id"])][["person_id"]]
+    df_missing = df_persons[~df_persons["person_id"].isin(df_activities["person_id"])][
+        ["person_id"]
+    ]
 
     df_missing["activity_id"] = 0
     df_missing["purpose"] = "home"
diff --git a/analysis/reference/hts/chains.py b/analysis/reference/hts/chains.py
index 9f7cd0dc..b10730d5 100644
--- a/analysis/reference/hts/chains.py
+++ b/analysis/reference/hts/chains.py
@@ -5,29 +5,47 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
-from analysis.chains import aggregate_chains, CHAIN_MARGINALS, CHAIN_LENGTH_LIMIT, CHAIN_TOP_K
+from analysis.chains import (
+    aggregate_chains,
+    CHAIN_MARGINALS,
+    CHAIN_LENGTH_LIMIT,
+    CHAIN_TOP_K,
+)
+
 
 def configure(context):
     context.stage("analysis.reference.hts.activities")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def execute(context):
-    df_chains = context.stage("analysis.reference.hts.activities")[[
-        "person_id", "activity_id", "purpose"
-    ]].sort_values(by = ["person_id", "activity_id"])
+    df_chains = context.stage("analysis.reference.hts.activities")[
+        ["person_id", "activity_id", "purpose"]
+    ].sort_values(by=["person_id", "activity_id"])
     df_chains = aggregate_chains(df_chains)
 
     df_population = context.stage("hts")[1]
     marginals.prepare_classes(df_population)
 
-    df_chains = pd.merge(df_population[["person_id", "age_class", "sex", "person_weight", "age"]], df_chains, on = "person_id")
-    df_chains["chain_length_class"] = np.minimum(df_chains["chain_length"], CHAIN_LENGTH_LIMIT)
-
-    top_k_chains = df_chains.groupby("chain")["person_weight"].sum().reset_index().sort_values(
-        by = "person_weight", ascending = False
-    ).head(CHAIN_TOP_K)["chain"].values
+    df_chains = pd.merge(
+        df_population[["person_id", "age_class", "sex", "person_weight", "age"]],
+        df_chains,
+        on="person_id",
+    )
+    df_chains["chain_length_class"] = np.minimum(
+        df_chains["chain_length"], CHAIN_LENGTH_LIMIT
+    )
+
+    top_k_chains = (
+        df_chains.groupby("chain")["person_weight"]
+        .sum()
+        .reset_index()
+        .sort_values(by="person_weight", ascending=False)
+        .head(CHAIN_TOP_K)["chain"]
+        .values
+    )
     df_chains = df_chains[df_chains["chain"].isin(top_k_chains)]
 
     df_chains["age_range"] = (df_chains["age"] >= 18) & (df_chains["age"] <= 40)
 
-    return stats.marginalize(df_chains, CHAIN_MARGINALS, weight_column = "person_weight")
+    return stats.marginalize(df_chains, CHAIN_MARGINALS, weight_column="person_weight")
diff --git a/analysis/reference/hts/commute_distance.py b/analysis/reference/hts/commute_distance.py
index 70cd8931..72897de6 100644
--- a/analysis/reference/hts/commute_distance.py
+++ b/analysis/reference/hts/commute_distance.py
@@ -5,33 +5,49 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def execute(context):
-    df_weight = context.stage("hts")[1][["person_id", "person_weight"]].rename(columns = { "person_weight": "weight" })
-    df_trips = pd.merge(context.stage("hts")[2], df_weight, on = "person_id")
+    df_weight = context.stage("hts")[1][["person_id", "person_weight"]].rename(
+        columns={"person_weight": "weight"}
+    )
+    df_trips = pd.merge(context.stage("hts")[2], df_weight, on="person_id")
 
     # Prepare data frames
     df_work = df_trips[
-        ((df_trips["preceding_purpose"] == "home") & (df_trips["following_purpose"] == "work")) |
-        ((df_trips["preceding_purpose"] == "work") & (df_trips["following_purpose"] == "home"))
-    ].drop_duplicates("person_id", keep = "first")[["euclidean_distance", "weight"]]
+        (
+            (df_trips["preceding_purpose"] == "home")
+            & (df_trips["following_purpose"] == "work")
+        )
+        | (
+            (df_trips["preceding_purpose"] == "work")
+            & (df_trips["following_purpose"] == "home")
+        )
+    ].drop_duplicates("person_id", keep="first")[["euclidean_distance", "weight"]]
 
     df_education = df_trips[
-        ((df_trips["preceding_purpose"] == "home") & (df_trips["following_purpose"] == "education")) |
-        ((df_trips["preceding_purpose"] == "education") & (df_trips["following_purpose"] == "home"))
-    ].drop_duplicates("person_id", keep = "first")[["euclidean_distance", "weight"]]
+        (
+            (df_trips["preceding_purpose"] == "home")
+            & (df_trips["following_purpose"] == "education")
+        )
+        | (
+            (df_trips["preceding_purpose"] == "education")
+            & (df_trips["following_purpose"] == "home")
+        )
+    ].drop_duplicates("person_id", keep="first")[["euclidean_distance", "weight"]]
 
     # Prepare distributions
-    df_work = df_work.sort_values(by = "euclidean_distance")
+    df_work = df_work.sort_values(by="euclidean_distance")
     df_work["cdf"] = np.cumsum(df_work["weight"])
     df_work["cdf"] /= df_work["cdf"].max()
     df_work = df_work[["euclidean_distance", "cdf"]]
 
-    df_education = df_education.sort_values(by = "euclidean_distance")
+    df_education = df_education.sort_values(by="euclidean_distance")
     df_education["cdf"] = np.cumsum(df_education["weight"])
     df_education["cdf"] /= df_education["cdf"].max()
     df_education = df_education[["euclidean_distance", "cdf"]]
 
-    return dict(work = df_work, education = df_education)
+    return dict(work=df_work, education=df_education)
diff --git a/analysis/reference/hts/commute_flow.py b/analysis/reference/hts/commute_flow.py
index 5a922409..6cf2722f 100644
--- a/analysis/reference/hts/commute_flow.py
+++ b/analysis/reference/hts/commute_flow.py
@@ -5,64 +5,115 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
 
-def execute(context):
-    df_weight = context.stage("hts")[1][["person_id", "person_weight"]].rename(columns = { "person_weight": "weight" })
 
-    df_trips = context.stage("hts")[2][[
-        "person_id", "origin_departement_id", "destination_departement_id",
-        "preceding_purpose", "following_purpose"
-    ]]
+def execute(context):
+    df_weight = context.stage("hts")[1][["person_id", "person_weight"]].rename(
+        columns={"person_weight": "weight"}
+    )
+
+    df_trips = context.stage("hts")[2][
+        [
+            "person_id",
+            "origin_departement_id",
+            "destination_departement_id",
+            "preceding_purpose",
+            "following_purpose",
+        ]
+    ]
 
     # Prepare homes
-    df_homes = df_trips[df_trips["preceding_purpose"] == "home"][["person_id", "origin_departement_id"]].rename(
-        columns = { "origin_departement_id": "home" }
-    ).drop_duplicates("person_id")
+    df_homes = (
+        df_trips[df_trips["preceding_purpose"] == "home"][
+            ["person_id", "origin_departement_id"]
+        ]
+        .rename(columns={"origin_departement_id": "home"})
+        .drop_duplicates("person_id")
+    )
 
     # Calculate work
-    df_work = df_trips[df_trips["following_purpose"] == "work"][["person_id", "destination_departement_id"]].rename(
-        columns = { "destination_departement_id": "work" }
-    ).drop_duplicates("person_id")
+    df_work = (
+        df_trips[df_trips["following_purpose"] == "work"][
+            ["person_id", "destination_departement_id"]
+        ]
+        .rename(columns={"destination_departement_id": "work"})
+        .drop_duplicates("person_id")
+    )
 
-    df_work = pd.merge(df_homes, df_work, on = "person_id")
-    df_work = pd.merge(df_work, df_weight, on = "person_id", how = "left")
+    df_work = pd.merge(df_homes, df_work, on="person_id")
+    df_work = pd.merge(df_work, df_weight, on="person_id", how="left")
 
     df_work = df_work.groupby(["home", "work"])["weight"].sum()
     df_work = df_work.reset_index()
 
     # Calculate education
-    df_education = df_trips[df_trips["following_purpose"] == "education"][["person_id", "destination_departement_id"]].rename(
-        columns = { "destination_departement_id": "education" }
-    ).drop_duplicates("person_id")
+    df_education = (
+        df_trips[df_trips["following_purpose"] == "education"][
+            ["person_id", "destination_departement_id"]
+        ]
+        .rename(columns={"destination_departement_id": "education"})
+        .drop_duplicates("person_id")
+    )
 
-    df_education = pd.merge(df_homes, df_education, on = "person_id")
-    df_education = pd.merge(df_education, df_weight, on = "person_id", how = "left")
+    df_education = pd.merge(df_homes, df_education, on="person_id")
+    df_education = pd.merge(df_education, df_weight, on="person_id", how="left")
 
     df_education = df_education.groupby(["home", "education"])["weight"].sum()
     df_education = df_education.reset_index()
 
     # Calculate corrections for employed non-movers
-    df_existing = context.stage("hts")[1][["employed", "departement_id", "person_weight"]].rename(columns = { "person_weight": "weight", "departement_id": "home" })
+    df_existing = context.stage("hts")[1][
+        ["employed", "departement_id", "person_weight"]
+    ].rename(columns={"person_weight": "weight", "departement_id": "home"})
     df_existing = df_existing[df_existing["employed"]]
-    df_existing = df_existing.groupby("home")["weight"].sum().reset_index().rename(columns = { "weight": "existing" })
-
-    df_origin = df_work.groupby("home")["weight"].sum().reset_index().rename(columns = { "weight": "active" })
-
-    df_work_correction = pd.merge(df_existing, df_origin, on = "home")
-    df_work_correction["factor"] = df_work_correction["active"] / df_work_correction["existing"]
+    df_existing = (
+        df_existing.groupby("home")["weight"]
+        .sum()
+        .reset_index()
+        .rename(columns={"weight": "existing"})
+    )
+
+    df_origin = (
+        df_work.groupby("home")["weight"]
+        .sum()
+        .reset_index()
+        .rename(columns={"weight": "active"})
+    )
+
+    df_work_correction = pd.merge(df_existing, df_origin, on="home")
+    df_work_correction["factor"] = (
+        df_work_correction["active"] / df_work_correction["existing"]
+    )
     df_work_correction = df_work_correction[["home", "factor"]]
 
     # Calculate corrections for studying non-movers
-    df_existing = context.stage("hts")[1][["studies", "departement_id", "person_weight"]].rename(columns = { "person_weight": "weight", "departement_id": "home" })
+    df_existing = context.stage("hts")[1][
+        ["studies", "departement_id", "person_weight"]
+    ].rename(columns={"person_weight": "weight", "departement_id": "home"})
     df_existing = df_existing[df_existing["studies"]]
-    df_existing = df_existing.groupby("home")["weight"].sum().reset_index().rename(columns = { "weight": "existing" })
-
-    df_origin = df_education.groupby("home")["weight"].sum().reset_index().rename(columns = { "weight": "active" })
-
-    df_education_correction = pd.merge(df_existing, df_origin, on = "home")
-    df_education_correction["factor"] = df_education_correction["active"] / df_education_correction["existing"]
+    df_existing = (
+        df_existing.groupby("home")["weight"]
+        .sum()
+        .reset_index()
+        .rename(columns={"weight": "existing"})
+    )
+
+    df_origin = (
+        df_education.groupby("home")["weight"]
+        .sum()
+        .reset_index()
+        .rename(columns={"weight": "active"})
+    )
+
+    df_education_correction = pd.merge(df_existing, df_origin, on="home")
+    df_education_correction["factor"] = (
+        df_education_correction["active"] / df_education_correction["existing"]
+    )
     df_education_correction = df_education_correction[["home", "factor"]]
 
-    return dict(work = df_work, education = df_education), dict(work = df_work_correction, education = df_education_correction)
+    return dict(work=df_work, education=df_education), dict(
+        work=df_work_correction, education=df_education_correction
+    )
diff --git a/analysis/reference/hts/mode_distances.py b/analysis/reference/hts/mode_distances.py
index 6b556bd5..f347a72c 100644
--- a/analysis/reference/hts/mode_distances.py
+++ b/analysis/reference/hts/mode_distances.py
@@ -1,9 +1,11 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
     context.stage("data.hts.selected")
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.selected")
     df = pd.merge(df_trips, df_persons[["person_id", "person_weight"]])
@@ -12,11 +14,13 @@ def execute(context):
     df["travel_time"] = df["arrival_time"] - df["departure_time"]
 
     primary_activities = ["home", "work", "education"]
-    #primary_activities = []
-    df = df[~(
-        df["preceding_purpose"].isin(primary_activities) &
-        df["following_purpose"].isin(primary_activities)
-    )]
+    # primary_activities = []
+    df = df[
+        ~(
+            df["preceding_purpose"].isin(primary_activities)
+            & df["following_purpose"].isin(primary_activities)
+        )
+    ]
 
     data = dict()
 
@@ -32,6 +36,6 @@ def execute(context):
             cdf = np.cumsum(weights[sorter])
             cdf /= cdf[-1]
 
-            data[mode] = dict(values = values, cdf = cdf)
+            data[mode] = dict(values=values, cdf=cdf)
 
     return data
diff --git a/analysis/reference/hts/sociodemographics.py b/analysis/reference/hts/sociodemographics.py
index d6acb58f..ad64a5d9 100644
--- a/analysis/reference/hts/sociodemographics.py
+++ b/analysis/reference/hts/sociodemographics.py
@@ -2,8 +2,10 @@
 import analysis.marginals as marginals
 import pandas as pd
 
+
 def configure(context):
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def execute(context):
     df_households, df_persons, _ = context.stage("hts")
@@ -13,7 +15,7 @@ def execute(context):
     household_columns -= person_columns
     household_columns.add("household_id")
 
-    df = pd.merge(df_persons, df_households[household_columns], on = "household_id")
+    df = pd.merge(df_persons, df_households[household_columns], on="household_id")
     assert len(df_persons) == len(df)
     df_persons = df
 
@@ -21,36 +23,36 @@ def execute(context):
 
     person_marginals = marginals.combine(
         marginals.TOTAL_MARGINAL,
-
         marginals.HTS_PERSON_MARGINALS,
         marginals.HTS_HOUSEHOLD_MARGINALS,
-
         marginals.cross(marginals.HTS_PERSON_MARGINALS, marginals.HTS_PERSON_MARGINALS),
-        marginals.cross(marginals.HTS_HOUSEHOLD_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS),
-
-        marginals.cross(marginals.HTS_PERSON_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS),
-
+        marginals.cross(
+            marginals.HTS_HOUSEHOLD_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS
+        ),
+        marginals.cross(
+            marginals.HTS_PERSON_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS
+        ),
         spatial_marginals,
-        marginals.cross(spatial_marginals, marginals.HTS_PERSON_MARGINALS)
+        marginals.cross(spatial_marginals, marginals.HTS_PERSON_MARGINALS),
     )
 
     household_marginals = marginals.combine(
         marginals.TOTAL_MARGINAL,
-
         marginals.HTS_HOUSEHOLD_MARGINALS,
-        marginals.cross(marginals.HTS_HOUSEHOLD_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS),
-
+        marginals.cross(
+            marginals.HTS_HOUSEHOLD_MARGINALS, marginals.HTS_HOUSEHOLD_MARGINALS
+        ),
         spatial_marginals,
-        marginals.cross(spatial_marginals, marginals.HTS_HOUSEHOLD_MARGINALS)
+        marginals.cross(spatial_marginals, marginals.HTS_HOUSEHOLD_MARGINALS),
     )
 
     marginals.prepare_classes(df_persons)
     df_households = df_persons.drop_duplicates("household_id").copy()
 
-    df_persons = df_persons.rename(columns = { "person_weight": "weight" })
-    df_households = df_households.rename(columns = { "household_weight": "weight" })
+    df_persons = df_persons.rename(columns={"person_weight": "weight"})
+    df_households = df_households.rename(columns={"household_weight": "weight"})
 
     return dict(
-        person = stats.marginalize(df_persons, person_marginals),
-        household = stats.marginalize(df_households, household_marginals)
+        person=stats.marginalize(df_persons, person_marginals),
+        household=stats.marginalize(df_households, household_marginals),
     )
diff --git a/analysis/reference/income.py b/analysis/reference/income.py
index 5b4e068d..c75e184a 100644
--- a/analysis/reference/income.py
+++ b/analysis/reference/income.py
@@ -4,11 +4,13 @@
 import data.hts.egt.cleaned
 import data.hts.entd.cleaned
 
+
 def configure(context):
     context.stage("data.hts.entd.cleaned")
     context.stage("data.hts.egt.cleaned")
     context.stage("data.income.region")
 
+
 def calculate_cdf(df):
     weights = df["household_weight"].values
     incomes = df["income"].values
@@ -16,33 +18,53 @@ def calculate_cdf(df):
     sorter = np.argsort(incomes)
     cdf = np.cumsum(weights[sorter]) / np.sum(weights)
 
-    return dict(income = incomes[sorter], cdf = cdf)
+    return dict(income=incomes[sorter], cdf=cdf)
+
 
 def execute(context):
     # Calculate ENTD income distribution
-    df_entd = context.stage("data.hts.entd.cleaned")[0][["household_weight", "income_class", "consumption_units"]].copy()
+    df_entd = context.stage("data.hts.entd.cleaned")[0][
+        ["household_weight", "income_class", "consumption_units"]
+    ].copy()
     entd_upper_bounds = data.hts.entd.cleaned.INCOME_CLASS_BOUNDS
     entd_lower_bounds = [0] + entd_upper_bounds[:-1]
 
-    df_entd["income"] = 12 * 0.5 * df_entd["income_class"].apply(lambda k: entd_lower_bounds[k] + entd_upper_bounds[k] if k >= 0 else np.nan)
+    df_entd["income"] = (
+        12
+        * 0.5
+        * df_entd["income_class"].apply(
+            lambda k: entd_lower_bounds[k] + entd_upper_bounds[k] if k >= 0 else np.nan
+        )
+    )
     df_entd = pd.DataFrame(calculate_cdf(df_entd))
     df_entd["source"] = "entd"
 
     # Calculate EGT income distribution
-    df_egt = context.stage("data.hts.egt.cleaned")[0][["household_weight", "income_class", "consumption_units"]].copy()
+    df_egt = context.stage("data.hts.egt.cleaned")[0][
+        ["household_weight", "income_class", "consumption_units"]
+    ].copy()
     egt_upper_bounds = data.hts.egt.cleaned.INCOME_CLASS_BOUNDS
     egt_lower_bounds = [0] + egt_upper_bounds[:-1]
 
-    df_egt["income"] = 12 * 0.5 * df_egt["income_class"].apply(lambda k: egt_lower_bounds[k] + egt_upper_bounds[k] if k >= 0 else np.nan)
+    df_egt["income"] = (
+        12
+        * 0.5
+        * df_egt["income_class"].apply(
+            lambda k: egt_lower_bounds[k] + egt_upper_bounds[k] if k >= 0 else np.nan
+        )
+    )
     df_egt["income"] /= df_egt["consumption_units"]
     df_egt = pd.DataFrame(calculate_cdf(df_egt))
     df_egt["source"] = "egt"
 
     # Calcultae FiLo income distribution
     df_filo = context.stage("data.income.region")
-    df_filo = pd.DataFrame(dict(
-        income = np.array([0.0] + df_filo.tolist()), cdf = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
-    ))
+    df_filo = pd.DataFrame(
+        dict(
+            income=np.array([0.0] + df_filo.tolist()),
+            cdf=np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
+        )
+    )
     df_filo["source"] = "filo"
 
     return pd.concat([df_entd, df_egt, df_filo])
diff --git a/analysis/reference/od/commute_distance.py b/analysis/reference/od/commute_distance.py
index 14a04eae..9bd3b13f 100644
--- a/analysis/reference/od/commute_distance.py
+++ b/analysis/reference/od/commute_distance.py
@@ -1,19 +1,21 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
     context.stage("data.od.cleaned")
     context.stage("data.spatial.centroid_distances")
 
+
 def execute(context):
     df_distances = context.stage("data.spatial.centroid_distances")
     result = {}
 
     for df_data, name in zip(context.stage("data.od.cleaned"), ("work", "education")):
-        df_data = pd.merge(df_data, df_distances, on = ["origin_id", "destination_id"])
+        df_data = pd.merge(df_data, df_distances, on=["origin_id", "destination_id"])
 
         df_data = df_data[["centroid_distance", "weight"]]
-        df_data = df_data.sort_values(by = "centroid_distance")
+        df_data = df_data.sort_values(by="centroid_distance")
         df_data["cdf"] = np.cumsum(df_data["weight"])
         df_data["cdf"] /= df_data["cdf"].max()
         df_data = df_data[["centroid_distance", "cdf"]]
diff --git a/analysis/reference/od/commute_flow.py b/analysis/reference/od/commute_flow.py
index 0a693a0f..1a8b1981 100644
--- a/analysis/reference/od/commute_flow.py
+++ b/analysis/reference/od/commute_flow.py
@@ -1,36 +1,56 @@
 import pandas as pd
 
+
 def configure(context):
     context.stage("data.od.cleaned")
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    df_codes = context.stage("data.spatial.municipalities")[[
-        "commune_id", "departement_id"
-    ]]
+    df_codes = context.stage("data.spatial.municipalities")[
+        ["commune_id", "departement_id"]
+    ]
 
     result = {}
 
     for df_data, name in zip(context.stage("data.od.cleaned"), ("work", "education")):
         df_data["origin_id"] = df_data["origin_id"].cat.remove_unused_categories()
-        df_data["destination_id"] = df_data["destination_id"].cat.remove_unused_categories()
-
-        df_data = pd.merge(df_data, df_codes.rename(columns = {
-            "commune_id": "origin_id",
-            "departement_id": "origin_departement_id"
-        }), how = "left", on = "origin_id")
-
-        df_data = pd.merge(df_data, df_codes.rename(columns = {
-            "commune_id": "destination_id",
-            "departement_id": "destination_departement_id"
-        }), how = "left", on = "destination_id")
-
-        df_data = df_data[[
-            "origin_departement_id", "destination_departement_id", "weight"
-        ]].rename(columns = {
-            "origin_departement_id": "home",
-            "destination_departement_id": name
-        })
+        df_data["destination_id"] = df_data[
+            "destination_id"
+        ].cat.remove_unused_categories()
+
+        df_data = pd.merge(
+            df_data,
+            df_codes.rename(
+                columns={
+                    "commune_id": "origin_id",
+                    "departement_id": "origin_departement_id",
+                }
+            ),
+            how="left",
+            on="origin_id",
+        )
+
+        df_data = pd.merge(
+            df_data,
+            df_codes.rename(
+                columns={
+                    "commune_id": "destination_id",
+                    "departement_id": "destination_departement_id",
+                }
+            ),
+            how="left",
+            on="destination_id",
+        )
+
+        df_data = df_data[
+            ["origin_departement_id", "destination_departement_id", "weight"]
+        ].rename(
+            columns={
+                "origin_departement_id": "home",
+                "destination_departement_id": name,
+            }
+        )
 
         df_data["home"] = df_data["home"].cat.remove_unused_categories()
         df_data[name] = df_data[name].cat.remove_unused_categories()
diff --git a/analysis/statistics.py b/analysis/statistics.py
index 498b9bff..1f577e42 100644
--- a/analysis/statistics.py
+++ b/analysis/statistics.py
@@ -4,11 +4,13 @@
 import numpy as np
 import pandas as pd
 
-@numba.jit(nopython = True, parallel = True)
+
+@numba.jit(nopython=True, parallel=True)
 def _combine_filter(filters):
     return np.logical_and.reduce(filters)
 
-def marginalize(df, marginals, weight_column = "weight", count_column = "weight"):
+
+def marginalize(df, marginals, weight_column="weight", count_column="weight"):
     """
     This function takes a data frame and a list of marginals in the form
 
@@ -58,16 +60,26 @@ def marginalize(df, marginals, weight_column = "weight", count_column = "weight"
     results = {}
 
     for columns in marginals:
-        if len(columns) == 0: # Total is requested
+        if len(columns) == 0:  # Total is requested
             total = len(df) if weight_column is None else df[weight_column].sum()
-            results[columns] = pd.DataFrame.from_records([["value", total]], columns = ["total", count_column])
+            results[columns] = pd.DataFrame.from_records(
+                [["value", total]], columns=["total", count_column]
+            )
         else:
             marginal_records = []
-            value_index_lists = [np.arange(len(unique_values[column])) for column in columns]
+            value_index_lists = [
+                np.arange(len(unique_values[column])) for column in columns
+            ]
 
             for value_indices in itertools.product(*value_index_lists):
-                marginal_values = [unique_values[column][value_index] for column, value_index in zip(columns, value_indices)]
-                marginal_filters = [filters[column][value_index] for column, value_index in zip(columns, value_indices)]
+                marginal_values = [
+                    unique_values[column][value_index]
+                    for column, value_index in zip(columns, value_indices)
+                ]
+                marginal_filters = [
+                    filters[column][value_index]
+                    for column, value_index in zip(columns, value_indices)
+                ]
                 f = np.logical_and.reduce(marginal_filters)
 
                 if weight_column is None:
@@ -77,18 +89,19 @@ def marginalize(df, marginals, weight_column = "weight", count_column = "weight"
 
                 marginal_records.append(marginal_values + [marginal_count])
 
-            marginal_records = pd.DataFrame.from_records(marginal_records, columns = list(columns) + [count_column])
+            marginal_records = pd.DataFrame.from_records(
+                marginal_records, columns=list(columns) + [count_column]
+            )
             results[columns] = marginal_records
 
     return results
 
+
 def apply_per_marginal(marginals, f):
-    return {
-        marginal: f(df)
-        for marginal, df in marginals.items()
-    }
+    return {marginal: f(df) for marginal, df in marginals.items()}
+
 
-def collect_sample(dfs, column = "realization"):
+def collect_sample(dfs, column="realization"):
     """
     This function combines multiple structurally equal data frames into one
     by adding an additional column denoting the number of the realization.
@@ -108,7 +121,8 @@ def collect_sample(dfs, column = "realization"):
 
     return pd.concat(new_dfs)
 
-def combine_marginals(realizations, column = "realization"):
+
+def combine_marginals(realizations, column="realization"):
     """
     This function combines multiple realizations of the "marginalize" output into
     a new data structure that is equivalent to the one of "marginalize", but with
@@ -117,7 +131,9 @@ def combine_marginals(realizations, column = "realization"):
     assert len(realizations) > 0
 
     marginals = realizations[0].keys()
-    marginal_columns = { marginal: list(realizations[0][marginal].columns) for marginal in marginals }
+    marginal_columns = {
+        marginal: list(realizations[0][marginal].columns) for marginal in marginals
+    }
 
     # Check that all realizations have the same structure as the first
     for realization in realizations:
@@ -130,21 +146,33 @@ def combine_marginals(realizations, column = "realization"):
     sample = {}
 
     for marginal in marginals:
-        sample[marginal] = collect_sample([realization[marginal] for realization in realizations], column)
+        sample[marginal] = collect_sample(
+            [realization[marginal] for realization in realizations], column
+        )
 
     return sample
 
-def bootstrap(df, bootstrap_size, random, realization_column = "realization", bootstrap_sample_size = None):
+
+def bootstrap(
+    df,
+    bootstrap_size,
+    random,
+    realization_column="realization",
+    bootstrap_sample_size=None,
+):
     unique_realizations = np.unique(df[realization_column])
 
     realizations = df[realization_column].values
-    indices = [list(np.where(realizations == realization)[0]) for realization in unique_realizations]
+    indices = [
+        list(np.where(realizations == realization)[0])
+        for realization in unique_realizations
+    ]
     lengths = [len(i) for i in indices]
 
     if bootstrap_sample_size is None:
         bootstrap_sample_size = len(indices)
 
-    counts = random.randint(len(indices), size = (bootstrap_size, bootstrap_sample_size))
+    counts = random.randint(len(indices), size=(bootstrap_size, bootstrap_sample_size))
 
     for selection in counts:
         selection_indices = []
@@ -159,17 +187,23 @@ def bootstrap(df, bootstrap_size, random, realization_column = "realization", bo
 
         yield df_sample
 
-def apply_bootstrap(df, bootstrap_size, random, f, realization_column = "realization"):
+
+def apply_bootstrap(df, bootstrap_size, random, f, realization_column="realization"):
     df_bootstrap = []
 
-    for bootstrap_realization, df_sample in enumerate(bootstrap(df, bootstrap_size, random, realization_column)):
+    for bootstrap_realization, df_sample in enumerate(
+        bootstrap(df, bootstrap_size, random, realization_column)
+    ):
         df_sample = f(df_sample)
         df_sample[realization_column] = bootstrap_realization
         df_bootstrap.append(df_sample)
 
     return pd.concat(df_bootstrap)
 
-def analyze_sample(df, realization_column = "realization", columns = ["weight"], statistics = None):
+
+def analyze_sample(
+    df, realization_column="realization", columns=["weight"], statistics=None
+):
     assert realization_column in df
 
     if columns is None or len(columns) == 0:
@@ -183,128 +217,171 @@ def analyze_sample(df, realization_column = "realization", columns = ["weight"],
         assert column in df.columns
 
     group_columns = list(df.columns)
-    for column in columns: group_columns.remove(column)
+    for column in columns:
+        group_columns.remove(column)
     group_columns.remove(realization_column)
 
     if statistics is None:
         statistics = {
             column: [
-                ("mean", "mean"), ("median", "median"), ("min", "min"), ("max", "max"),
-                ("q10", lambda x: x.quantile(0.1)), ("q90", lambda x: x.quantile(0.9)),
-                ("q5", lambda x: x.quantile(0.05)), ("q95", lambda x: x.quantile(0.95))
+                ("mean", "mean"),
+                ("median", "median"),
+                ("min", "min"),
+                ("max", "max"),
+                ("q10", lambda x: x.quantile(0.1)),
+                ("q90", lambda x: x.quantile(0.9)),
+                ("q5", lambda x: x.quantile(0.05)),
+                ("q95", lambda x: x.quantile(0.95)),
             ]
             for column in columns
         }
 
-    df = df[group_columns + columns].groupby(group_columns).aggregate(statistics).reset_index()
+    df = (
+        df[group_columns + columns]
+        .groupby(group_columns)
+        .aggregate(statistics)
+        .reset_index()
+    )
 
     return df
 
-def analyze_sample_and_flatten(df, realization_column = "realization", columns = ["weight"], statistics = None):
+
+def analyze_sample_and_flatten(
+    df, realization_column="realization", columns=["weight"], statistics=None
+):
     df = analyze_sample(df, realization_column, columns, statistics)
     df.columns = [c[1] if c[0] == "weight" else c[0] for c in df.columns]
     return df
 
-def sample_subsets(df, subset_size, random, realization_column = "realization"):
+
+def sample_subsets(df, subset_size, random, realization_column="realization"):
     realizations = len(np.unique(df[realization_column]))
     return bootstrap(df, realizations, random, realization_column, subset_size)
 
-def average_subsets(df, subset_size, random, realization_column = "realization", weight_column = "weight"):
+
+def average_subsets(
+    df, subset_size, random, realization_column="realization", weight_column="weight"
+):
     df_output = []
 
-    for realization, df_subset in enumerate(sample_subsets(df, subset_size, random, realization_column)):
-        df_subset = analyze_sample(df_subset, realization_column, weight_column, [("weight", "mean")])
+    for realization, df_subset in enumerate(
+        sample_subsets(df, subset_size, random, realization_column)
+    ):
+        df_subset = analyze_sample(
+            df_subset, realization_column, weight_column, [("weight", "mean")]
+        )
         df_subset[realization_column] = realization
         df_output.append(df_subset)
 
     return pd.concat(df_output)
 
+
 if __name__ == "__main__":
+
     def create_sample(random_seed):
         random = np.random.RandomState(random_seed)
 
         index = np.arange(100)
-        ages = random.randint(10, size = 100) * 10
-        gender = random.randint(2, size = 100)
+        ages = random.randint(10, size=100) * 10
+        gender = random.randint(2, size=100)
 
-        df = pd.DataFrame.from_records(zip(index, ages, gender), columns = ["person", "age", "gender"])
-        df["gender"] = df["gender"].map({ 0: "male", 1: "female" }).astype("category")
+        df = pd.DataFrame.from_records(
+            zip(index, ages, gender), columns=["person", "age", "gender"]
+        )
+        df["gender"] = df["gender"].map({0: "male", 1: "female"}).astype("category")
         df["weight"] = 1.0
 
         return df
 
-    df = pd.DataFrame.from_records([
-        { "age": 20, "weight": 10.0, "abc": 10.0, "realization": 0 },
-        { "age": 50, "weight": 50.0, "abc": 50.0, "realization": 0 },
-        { "age": 20, "weight": 20.0, "abc": 20.0, "realization": 1 },
-        { "age": 50, "weight": 60.0, "abc": 60.0, "realization": 1 },
-    ])
+    df = pd.DataFrame.from_records(
+        [
+            {"age": 20, "weight": 10.0, "abc": 10.0, "realization": 0},
+            {"age": 50, "weight": 50.0, "abc": 50.0, "realization": 0},
+            {"age": 20, "weight": 20.0, "abc": 20.0, "realization": 1},
+            {"age": 50, "weight": 60.0, "abc": 60.0, "realization": 1},
+        ]
+    )
 
     random = np.random.RandomState(0)
 
     statistics = {
         "weight": [("mean", "mean")],
-        "abc": [("q95", lambda x: x.quantile(0.95))]
+        "abc": [("q95", lambda x: x.quantile(0.95))],
     }
 
-    df = apply_bootstrap(df, 100, random, lambda df: analyze_sample(df, statistics = statistics, columns = ["weight", "abc"]))
-
-    df = df.groupby("age").aggregate([
-        ("mean", "mean"),
-        ("q10", lambda x: x.quantile(0.1)),
-        ("q90", lambda x: x.quantile(0.9))
-    ]).reset_index()
+    df = apply_bootstrap(
+        df,
+        100,
+        random,
+        lambda df: analyze_sample(df, statistics=statistics, columns=["weight", "abc"]),
+    )
+
+    df = (
+        df.groupby("age")
+        .aggregate(
+            [
+                ("mean", "mean"),
+                ("q10", lambda x: x.quantile(0.1)),
+                ("q90", lambda x: x.quantile(0.9)),
+            ]
+        )
+        .reset_index()
+    )
 
     print(df)
 
-
-
-
     exit()
 
     random = np.random.RandomState(0)
 
-    #for df_subset in sample_subsets(df, 3, random):
+    # for df_subset in sample_subsets(df, 3, random):
     #    print(df_subset)
 
     print(average_subsets(df, 3, random))
 
-    print(apply_bootstrap(average_subsets(df, 3, random), 100, random, lambda df: analyze_sample(df)))
+    print(
+        apply_bootstrap(
+            average_subsets(df, 3, random), 100, random, lambda df: analyze_sample(df)
+        )
+    )
 
     exit()
 
-    #print(analyze(df))
+    # print(analyze(df))
 
-    #for df_sample in bootstrap(df, 100, random):
+    # for df_sample in bootstrap(df, 100, random):
     #    df_sample = analyze(df_sample)
     #    print(df_sample)
 
-    statistics = [
-        ("precision", lambda x: np.mean(x < 55.0))
-    ]
-
-    df = apply_bootstrap(df, 100, random, lambda df: analyze_sample(df, statistics = statistics))
-    df = df.groupby(["age"]).aggregate([
-        ("mean", "mean"),
-        ("q10", lambda x: x.quantile(0.1)),
-        ("q90", lambda x: x.quantile(0.9))
-    ]).reset_index()
-
-
+    statistics = [("precision", lambda x: np.mean(x < 55.0))]
+
+    df = apply_bootstrap(
+        df, 100, random, lambda df: analyze_sample(df, statistics=statistics)
+    )
+    df = (
+        df.groupby(["age"])
+        .aggregate(
+            [
+                ("mean", "mean"),
+                ("q10", lambda x: x.quantile(0.1)),
+                ("q90", lambda x: x.quantile(0.9)),
+            ]
+        )
+        .reset_index()
+    )
 
     print(df)
     exit()
     print()
 
-
     exit()
 
     sample = [create_sample(R) for R in range(2)]
     random = np.random.RandomState(5)
 
-    #marginals = [marginalize(df, [("age",), ("gender",), ("age", "gender"), tuple()]) for df in sample]
+    # marginals = [marginalize(df, [("age",), ("gender",), ("age", "gender"), tuple()]) for df in sample]
     marginals = [marginalize(df, [("gender",)]) for df in sample]
     marginals = collect_marginalized_sample(marginals)
 
-    metrics = bootstrap_sampled_marginals(marginals, 100, subset_size = 2, random = random)
+    metrics = bootstrap_sampled_marginals(marginals, 100, subset_size=2, random=random)
     print(metrics[("gender",)])
diff --git a/analysis/synthesis/commute_distance.py b/analysis/synthesis/commute_distance.py
index b8a83a8e..ec9f8946 100644
--- a/analysis/synthesis/commute_distance.py
+++ b/analysis/synthesis/commute_distance.py
@@ -6,30 +6,52 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
-    bs.configure(context, "synthesis.population.spatial.home.locations", acquisition_sample_size)
-    bs.configure(context, "synthesis.population.spatial.primary.locations", acquisition_sample_size)
+    bs.configure(
+        context, "synthesis.population.spatial.home.locations", acquisition_sample_size
+    )
+    bs.configure(
+        context,
+        "synthesis.population.spatial.primary.locations",
+        acquisition_sample_size,
+    )
     bs.configure(context, "synthesis.population.sampled", acquisition_sample_size)
 
+
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
     feeder = zip(
-        bs.get_stages(context, "synthesis.population.spatial.home.locations", acquisition_sample_size),
-        bs.get_stages(context, "synthesis.population.spatial.primary.locations", acquisition_sample_size),
+        bs.get_stages(
+            context,
+            "synthesis.population.spatial.home.locations",
+            acquisition_sample_size,
+        ),
+        bs.get_stages(
+            context,
+            "synthesis.population.spatial.primary.locations",
+            acquisition_sample_size,
+        ),
         bs.get_stages(context, "synthesis.population.sampled", acquisition_sample_size),
     )
 
     probabilities = np.linspace(0.0, 1.0, 20)
-    quantiles = { "work": [], "education": [] }
+    quantiles = {"work": [], "education": []}
 
-    with context.progress(label = "Processing commute data ...", total = acquisition_sample_size) as progress:
+    with context.progress(
+        label="Processing commute data ...", total=acquisition_sample_size
+    ) as progress:
         for df_home, df_spatial, df_persons in feeder:
             # Prepare home
-            df_home = pd.merge(df_home, df_persons[["person_id", "household_id"]], on = "household_id")
-            df_home = df_home[["person_id", "geometry"]].set_index("person_id").sort_index()
+            df_home = pd.merge(
+                df_home, df_persons[["person_id", "household_id"]], on="household_id"
+            )
+            df_home = (
+                df_home[["person_id", "geometry"]].set_index("person_id").sort_index()
+            )
             assert len(df_home) == len(df_persons)
 
             for index, name in enumerate(("work", "education")):
@@ -40,12 +62,11 @@ def execute(context):
                 df_compare = df_home.loc[df_destination.index]
                 assert len(df_destination) == len(df_compare)
 
-                distances = df_destination["geometry"].distance(df_compare["geometry"]) * 1e-3
+                distances = (
+                    df_destination["geometry"].distance(df_compare["geometry"]) * 1e-3
+                )
 
-                quantiles[name].append([
-                    distances.quantile(p)
-                    for p in probabilities
-                ])
+                quantiles[name].append([distances.quantile(p) for p in probabilities])
 
             progress.update()
 
@@ -54,11 +75,11 @@ def execute(context):
     for name in ("work", "education"):
         data = np.array(quantiles[name])
 
-        mean = np.mean(data, axis = 0)
-        min = np.min(data, axis = 0)
-        max = np.max(data, axis = 0)
+        mean = np.mean(data, axis=0)
+        min = np.min(data, axis=0)
+        max = np.max(data, axis=0)
 
-        df = pd.DataFrame(dict(mean = mean, min = min, max = max, cdf = probabilities))
+        df = pd.DataFrame(dict(mean=mean, min=min, max=max, cdf=probabilities))
         result[name] = df
 
     return result
diff --git a/analysis/synthesis/commute_flow.py b/analysis/synthesis/commute_flow.py
index 82119898..c96cd61b 100644
--- a/analysis/synthesis/commute_flow.py
+++ b/analysis/synthesis/commute_flow.py
@@ -5,56 +5,94 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
-    bs.configure(context, "synthesis.population.spatial.home.zones", acquisition_sample_size)
-    bs.configure(context, "synthesis.population.spatial.primary.locations", acquisition_sample_size)
+    bs.configure(
+        context, "synthesis.population.spatial.home.zones", acquisition_sample_size
+    )
+    bs.configure(
+        context,
+        "synthesis.population.spatial.primary.locations",
+        acquisition_sample_size,
+    )
     bs.configure(context, "synthesis.population.sampled", acquisition_sample_size)
 
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    df_codes = context.stage("data.spatial.municipalities")[[
-        "commune_id", "departement_id"
-    ]]
+    df_codes = context.stage("data.spatial.municipalities")[
+        ["commune_id", "departement_id"]
+    ]
 
     acquisition_sample_size = context.config("acquisition_sample_size")
 
     feeder = zip(
-        bs.get_stages(context, "synthesis.population.spatial.home.zones", acquisition_sample_size),
-        bs.get_stages(context, "synthesis.population.spatial.primary.locations", acquisition_sample_size),
+        bs.get_stages(
+            context, "synthesis.population.spatial.home.zones", acquisition_sample_size
+        ),
+        bs.get_stages(
+            context,
+            "synthesis.population.spatial.primary.locations",
+            acquisition_sample_size,
+        ),
         bs.get_stages(context, "synthesis.population.sampled", acquisition_sample_size),
     )
 
     work_flows = []
     education_flows = []
 
-    with context.progress(label = "Processing commute data ...", total = acquisition_sample_size) as progress:
+    with context.progress(
+        label="Processing commute data ...", total=acquisition_sample_size
+    ) as progress:
         for realization, (df_home, df_spatial, df_persons) in enumerate(feeder):
             # Prepare home
-            df_home = pd.merge(df_persons[["person_id", "household_id"]], df_home, on = "household_id")
-            df_home = df_home[["person_id", "departement_id"]].rename(columns = { "departement_id": "home" })
+            df_home = pd.merge(
+                df_persons[["person_id", "household_id"]], df_home, on="household_id"
+            )
+            df_home = df_home[["person_id", "departement_id"]].rename(
+                columns={"departement_id": "home"}
+            )
 
             # Prepare work
             df_work = df_spatial[0]
-            df_work = pd.merge(df_work, df_codes, how = "left", on = "commune_id")
-            df_work["departement_id"] = df_work["departement_id"].cat.remove_unused_categories()
-            df_work = df_work[["person_id", "departement_id"]].rename(columns = { "departement_id": "work" })
+            df_work = pd.merge(df_work, df_codes, how="left", on="commune_id")
+            df_work["departement_id"] = df_work[
+                "departement_id"
+            ].cat.remove_unused_categories()
+            df_work = df_work[["person_id", "departement_id"]].rename(
+                columns={"departement_id": "work"}
+            )
 
             # Calculate work
-            df_work = pd.merge(df_home, df_work, on = "person_id").groupby(["home", "work"]).size().reset_index(name = "weight")
+            df_work = (
+                pd.merge(df_home, df_work, on="person_id")
+                .groupby(["home", "work"])
+                .size()
+                .reset_index(name="weight")
+            )
             df_work["realization"] = realization
             work_flows.append(df_work)
 
             # Prepare work
             df_education = df_spatial[1]
-            df_education = pd.merge(df_education, df_codes, how = "left", on = "commune_id")
-            df_education["departement_id"] = df_education["departement_id"].cat.remove_unused_categories()
-            df_education = df_education[["person_id", "departement_id"]].rename(columns = { "departement_id": "education" })
+            df_education = pd.merge(df_education, df_codes, how="left", on="commune_id")
+            df_education["departement_id"] = df_education[
+                "departement_id"
+            ].cat.remove_unused_categories()
+            df_education = df_education[["person_id", "departement_id"]].rename(
+                columns={"departement_id": "education"}
+            )
 
             # Calculate education
-            df_education = pd.merge(df_home, df_education, on = "person_id").groupby(["home", "education"]).size().reset_index(name = "weight")
+            df_education = (
+                pd.merge(df_home, df_education, on="person_id")
+                .groupby(["home", "education"])
+                .size()
+                .reset_index(name="weight")
+            )
             df_education["realization"] = realization
             education_flows.append(df_education)
 
@@ -66,4 +104,4 @@ def execute(context):
     df_work = stats.analyze_sample_and_flatten(df_work)
     df_education = stats.analyze_sample_and_flatten(df_education)
 
-    return dict(work = df_work, education = df_education)
+    return dict(work=df_work, education=df_education)
diff --git a/analysis/synthesis/income.py b/analysis/synthesis/income.py
index f37131e4..1a49af17 100644
--- a/analysis/synthesis/income.py
+++ b/analysis/synthesis/income.py
@@ -6,9 +6,13 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
-    bs.configure(context, "synthesis.population.income.selected", acquisition_sample_size)
+    bs.configure(
+        context, "synthesis.population.income.selected", acquisition_sample_size
+    )
+
 
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
@@ -16,16 +20,20 @@ def execute(context):
     probabilities = np.linspace(0.0, 1.0, 20)
     quantiles = []
 
-    with context.progress(label = "Processing commute data ...", total = acquisition_sample_size) as progress:
-        for df_income in bs.get_stages(context, "synthesis.population.income.selected", acquisition_sample_size):
+    with context.progress(
+        label="Processing commute data ...", total=acquisition_sample_size
+    ) as progress:
+        for df_income in bs.get_stages(
+            context, "synthesis.population.income.selected", acquisition_sample_size
+        ):
             income = 12 * df_income["household_income"] / df_income["consumption_units"]
             quantiles.append([income.quantile(p) for p in probabilities])
             progress.update()
 
     quantiles = np.array(quantiles)
 
-    mean = np.mean(quantiles, axis = 0)
-    min = np.min(quantiles, axis = 0)
-    max = np.max(quantiles, axis = 0)
+    mean = np.mean(quantiles, axis=0)
+    min = np.min(quantiles, axis=0)
+    max = np.max(quantiles, axis=0)
 
-    return pd.DataFrame(dict(mean = mean, min = min, max = max, cdf = probabilities))
+    return pd.DataFrame(dict(mean=mean, min=min, max=max, cdf=probabilities))
diff --git a/analysis/synthesis/matching.py b/analysis/synthesis/matching.py
index 1c66c14b..f3b33884 100644
--- a/analysis/synthesis/matching.py
+++ b/analysis/synthesis/matching.py
@@ -2,15 +2,21 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
     random_seeds = (np.arange(acquisition_sample_size) * 1000 + 1000).astype(int)
 
     for index, random_seed in enumerate(random_seeds):
-        context.stage("synthesis.population.matched", {
-            "random_seed": int(random_seed),
-            "sampling_rate": context.config("sampling_rate")
-        }, alias = "seed_%d" % index)
+        context.stage(
+            "synthesis.population.matched",
+            {
+                "random_seed": int(random_seed),
+                "sampling_rate": context.config("sampling_rate"),
+            },
+            alias="seed_%d" % index,
+        )
+
 
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
@@ -26,6 +32,6 @@ def execute(context):
 
             aggregated[key].append(value)
 
-    aggregated = { k: np.array(v) for k, v in aggregated.items() }
+    aggregated = {k: np.array(v) for k, v in aggregated.items()}
 
     return aggregated
diff --git a/analysis/synthesis/mode_distances.py b/analysis/synthesis/mode_distances.py
index eea6b36d..e5f6a686 100644
--- a/analysis/synthesis/mode_distances.py
+++ b/analysis/synthesis/mode_distances.py
@@ -5,50 +5,80 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
-    bs.configure(context, "synthesis.population.spatial.locations", acquisition_sample_size)
+    bs.configure(
+        context, "synthesis.population.spatial.locations", acquisition_sample_size
+    )
     bs.configure(context, "synthesis.population.trips", acquisition_sample_size)
 
+
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
     probabilities = np.linspace(0.0, 1.0, 20)
     modes = ["car", "car_passenger", "pt", "bike", "walk"]
 
-    quantiles = { mode : [] for mode in modes }
+    quantiles = {mode: [] for mode in modes}
 
     generator = zip(
-        bs.get_stages(context, "synthesis.population.spatial.locations", acquisition_sample_size),
-        bs.get_stages(context, "synthesis.population.trips", acquisition_sample_size)
+        bs.get_stages(
+            context, "synthesis.population.spatial.locations", acquisition_sample_size
+        ),
+        bs.get_stages(context, "synthesis.population.trips", acquisition_sample_size),
     )
 
-    with context.progress(label = "Processing distance data ...", total = acquisition_sample_size) as progress:
+    with context.progress(
+        label="Processing distance data ...", total=acquisition_sample_size
+    ) as progress:
         for df_locations, df_trips in generator:
             # Load locations and calculate euclidean distances
-            df_locations = df_locations[["person_id", "activity_index", "geometry"]].rename(columns = { "activity_index": "trip_index" })
-            df_locations["euclidean_distance"] = df_locations["geometry"].distance(df_locations["geometry"].shift(-1))
+            df_locations = df_locations[
+                ["person_id", "activity_index", "geometry"]
+            ].rename(columns={"activity_index": "trip_index"})
+            df_locations["euclidean_distance"] = df_locations["geometry"].distance(
+                df_locations["geometry"].shift(-1)
+            )
 
             # Merge mode into distances
             df_trips = pd.merge(
-                df_trips[["person_id", "trip_index", "mode", "preceding_purpose", "following_purpose", "departure_time", "arrival_time"]],
-                df_locations, on = ["person_id", "trip_index"], how = "inner"
+                df_trips[
+                    [
+                        "person_id",
+                        "trip_index",
+                        "mode",
+                        "preceding_purpose",
+                        "following_purpose",
+                        "departure_time",
+                        "arrival_time",
+                    ]
+                ],
+                df_locations,
+                on=["person_id", "trip_index"],
+                how="inner",
+            )
+            df_trips["travel_time"] = (
+                df_trips["arrival_time"] - df_trips["departure_time"]
             )
-            df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
 
             # Filter trips
             primary_activities = ["home", "work", "education"]
-            #primary_activities = []
-            df_trips = df_trips[~(
-                df_trips["preceding_purpose"].isin(primary_activities) &
-                df_trips["following_purpose"].isin(primary_activities)
-            )]
+            # primary_activities = []
+            df_trips = df_trips[
+                ~(
+                    df_trips["preceding_purpose"].isin(primary_activities)
+                    & df_trips["following_purpose"].isin(primary_activities)
+                )
+            ]
 
             # Calculate quantiles
             for mode in modes:
                 df_mode = df_trips[df_trips["mode"] == mode]
-                quantiles[mode].append([df_mode["euclidean_distance"].quantile(p) for p in probabilities])
+                quantiles[mode].append(
+                    [df_mode["euclidean_distance"].quantile(p) for p in probabilities]
+                )
 
             progress.update()
 
@@ -58,14 +88,16 @@ def execute(context):
     df_data = []
 
     for mode in modes:
-        mean = np.mean(quantiles[mode], axis = 0)
-        #min = np.percentile(quantiles[mode], 5, axis = 0)
-        #max = np.percentile(quantiles[mode], 95, axis = 0)
+        mean = np.mean(quantiles[mode], axis=0)
+        # min = np.percentile(quantiles[mode], 5, axis = 0)
+        # max = np.percentile(quantiles[mode], 95, axis = 0)
 
-        min = np.min(quantiles[mode], axis = 0)
-        max = np.max(quantiles[mode], axis = 0)
+        min = np.min(quantiles[mode], axis=0)
+        max = np.max(quantiles[mode], axis=0)
 
-        df_data.append(pd.DataFrame(dict(mean = mean, min = min, max = max, cdf = probabilities)))
+        df_data.append(
+            pd.DataFrame(dict(mean=mean, min=min, max=max, cdf=probabilities))
+        )
         df_data[-1]["mode"] = mode
 
     return pd.concat(df_data)
diff --git a/analysis/synthesis/sociodemographics/chains.py b/analysis/synthesis/sociodemographics/chains.py
index 3c90e9bc..c90c95c8 100644
--- a/analysis/synthesis/sociodemographics/chains.py
+++ b/analysis/synthesis/sociodemographics/chains.py
@@ -5,7 +5,13 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
-from analysis.chains import aggregate_chains, CHAIN_MARGINALS, CHAIN_LENGTH_LIMIT, CHAIN_TOP_K
+from analysis.chains import (
+    aggregate_chains,
+    CHAIN_MARGINALS,
+    CHAIN_LENGTH_LIMIT,
+    CHAIN_TOP_K,
+)
+
 
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
@@ -13,26 +19,41 @@ def configure(context):
     bs.configure(context, "synthesis.population.sampled", acquisition_sample_size)
     bs.configure(context, "synthesis.population.activities", acquisition_sample_size)
 
+
 def execute_parallel(context, data):
     acquisition_sample_size = context.config("acquisition_sample_size")
     df_population, df_chains = data
 
-    df_chains = df_chains[["person_id", "activity_index", "purpose"]].sort_values(by = ["person_id", "activity_index"])
+    df_chains = df_chains[["person_id", "activity_index", "purpose"]].sort_values(
+        by=["person_id", "activity_index"]
+    )
     df_chains = aggregate_chains(df_chains)
 
     marginals.prepare_classes(df_population)
-    df_chains = pd.merge(df_population[["person_id", "age_class", "sex", "age"]], df_chains, on = "person_id")
-    df_chains["chain_length_class"] = np.minimum(df_chains["chain_length"], CHAIN_LENGTH_LIMIT)
+    df_chains = pd.merge(
+        df_population[["person_id", "age_class", "sex", "age"]],
+        df_chains,
+        on="person_id",
+    )
+    df_chains["chain_length_class"] = np.minimum(
+        df_chains["chain_length"], CHAIN_LENGTH_LIMIT
+    )
 
-    top_k_chains = df_chains.groupby("chain").size().reset_index(name = "weight").sort_values(
-        by = "weight", ascending = False
-    ).head(CHAIN_TOP_K)["chain"].values
+    top_k_chains = (
+        df_chains.groupby("chain")
+        .size()
+        .reset_index(name="weight")
+        .sort_values(by="weight", ascending=False)
+        .head(CHAIN_TOP_K)["chain"]
+        .values
+    )
     df_chains = df_chains[df_chains["chain"].isin(top_k_chains)]
 
     df_chains["age_range"] = (df_chains["age"] >= 18) & (df_chains["age"] <= 40)
 
     context.progress.update()
-    return stats.marginalize(df_chains, CHAIN_MARGINALS, weight_column = None)
+    return stats.marginalize(df_chains, CHAIN_MARGINALS, weight_column=None)
+
 
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
@@ -41,10 +62,14 @@ def execute(context):
 
     feeder = zip(
         bs.get_stages(context, "synthesis.population.sampled", acquisition_sample_size),
-        bs.get_stages(context, "synthesis.population.activities", acquisition_sample_size)
+        bs.get_stages(
+            context, "synthesis.population.activities", acquisition_sample_size
+        ),
     )
 
-    with context.progress(label = "Marginalizing chain data ...", total = acquisition_sample_size):
+    with context.progress(
+        label="Marginalizing chain data ...", total=acquisition_sample_size
+    ):
         with context.parallel() as parallel:
             data = list(parallel.imap_unordered(execute_parallel, feeder))
 
diff --git a/analysis/synthesis/sociodemographics/general.py b/analysis/synthesis/sociodemographics/general.py
index c396231f..854e4360 100644
--- a/analysis/synthesis/sociodemographics/general.py
+++ b/analysis/synthesis/sociodemographics/general.py
@@ -2,26 +2,44 @@
 import analysis.statistics as stats
 import analysis.marginals as marginals
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
     bs.configure(context, "synthesis.population.enriched", acquisition_sample_size)
 
+
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
     person_marginals = []
     household_marginals = []
 
-    for df in bs.get_stages(context, "synthesis.population.enriched", acquisition_sample_size):
+    for df in bs.get_stages(
+        context, "synthesis.population.enriched", acquisition_sample_size
+    ):
         marginals.prepare_classes(df)
 
-        person_marginals.append(stats.marginalize(df, marginals.ANALYSIS_PERSON_MARGINALS, weight_column = None))
-        household_marginals.append(stats.marginalize(df.drop_duplicates("household_id"), marginals.ANALYSIS_HOUSEHOLD_MARGINALS, weight_column = None))
+        person_marginals.append(
+            stats.marginalize(
+                df, marginals.ANALYSIS_PERSON_MARGINALS, weight_column=None
+            )
+        )
+        household_marginals.append(
+            stats.marginalize(
+                df.drop_duplicates("household_id"),
+                marginals.ANALYSIS_HOUSEHOLD_MARGINALS,
+                weight_column=None,
+            )
+        )
 
     person_marginals = stats.combine_marginals(person_marginals)
     household_marginals = stats.combine_marginals(household_marginals)
 
-    person_marginals = stats.apply_per_marginal(person_marginals, stats.analyze_sample_and_flatten)
-    household_marginals = stats.apply_per_marginal(household_marginals, stats.analyze_sample_and_flatten)
+    person_marginals = stats.apply_per_marginal(
+        person_marginals, stats.analyze_sample_and_flatten
+    )
+    household_marginals = stats.apply_per_marginal(
+        household_marginals, stats.analyze_sample_and_flatten
+    )
 
-    return dict(person = person_marginals, household = household_marginals)
+    return dict(person=person_marginals, household=household_marginals)
diff --git a/analysis/synthesis/sociodemographics/spatial.py b/analysis/synthesis/sociodemographics/spatial.py
index baba7e07..3204eea8 100644
--- a/analysis/synthesis/sociodemographics/spatial.py
+++ b/analysis/synthesis/sociodemographics/spatial.py
@@ -4,11 +4,15 @@
 
 import pandas as pd
 
+
 def configure(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
 
     bs.configure(context, "synthesis.population.enriched", acquisition_sample_size)
-    bs.configure(context, "synthesis.population.spatial.home.zones", acquisition_sample_size)
+    bs.configure(
+        context, "synthesis.population.spatial.home.zones", acquisition_sample_size
+    )
+
 
 def execute(context):
     acquisition_sample_size = context.config("acquisition_sample_size")
@@ -17,21 +21,39 @@ def execute(context):
     household_marginals = []
 
     feeder = zip(
-        bs.get_stages(context, "synthesis.population.enriched", acquisition_sample_size),
-        bs.get_stages(context, "synthesis.population.spatial.home.zones", acquisition_sample_size)
+        bs.get_stages(
+            context, "synthesis.population.enriched", acquisition_sample_size
+        ),
+        bs.get_stages(
+            context, "synthesis.population.spatial.home.zones", acquisition_sample_size
+        ),
     )
 
     for df, df_home in feeder:
         df = pd.merge(df, df_home[["household_id", "departement_id", "commune_id"]])
         marginals.prepare_classes(df)
 
-        person_marginals.append(stats.marginalize(df, marginals.SPATIAL_PERSON_MARGINALS, weight_column = None))
-        household_marginals.append(stats.marginalize(df.drop_duplicates("household_id"), marginals.SPATIAL_HOUSEHOLD_MARGINALS, weight_column = None))
+        person_marginals.append(
+            stats.marginalize(
+                df, marginals.SPATIAL_PERSON_MARGINALS, weight_column=None
+            )
+        )
+        household_marginals.append(
+            stats.marginalize(
+                df.drop_duplicates("household_id"),
+                marginals.SPATIAL_HOUSEHOLD_MARGINALS,
+                weight_column=None,
+            )
+        )
 
     person_marginals = stats.combine_marginals(person_marginals)
     household_marginals = stats.combine_marginals(household_marginals)
 
-    person_marginals = stats.apply_per_marginal(person_marginals, stats.analyze_sample_and_flatten)
-    household_marginals = stats.apply_per_marginal(household_marginals, stats.analyze_sample_and_flatten)
+    person_marginals = stats.apply_per_marginal(
+        person_marginals, stats.analyze_sample_and_flatten
+    )
+    household_marginals = stats.apply_per_marginal(
+        household_marginals, stats.analyze_sample_and_flatten
+    )
 
-    return dict(person = person_marginals, household = household_marginals)
+    return dict(person=person_marginals, household=household_marginals)
diff --git a/analysis/synthesis/statistics/marginal.py b/analysis/synthesis/statistics/marginal.py
index 7e140d5c..8afcec37 100644
--- a/analysis/synthesis/statistics/marginal.py
+++ b/analysis/synthesis/statistics/marginal.py
@@ -5,17 +5,25 @@
 import analysis.statistics as stats
 
 MARGINALS = [
-    ("age_class",), ("sex",), ("employed",), ("studies",),
-    ("socioprofessional_class",), ("age_class", "employed")
+    ("age_class",),
+    ("sex",),
+    ("employed",),
+    ("studies",),
+    ("socioprofessional_class",),
+    ("age_class", "employed"),
 ]
 
+
 def configure(context):
     context.config("random_seed")
-    context.stage("synthesis.population.sampled", dict(
-        random_seed = context.config("random_seed")
-    ), alias = "sample")
+    context.stage(
+        "synthesis.population.sampled",
+        dict(random_seed=context.config("random_seed")),
+        alias="sample",
+    )
+
 
 def execute(context):
     df = context.stage("sample")
     marginals.prepare_classes(df)
-    return stats.marginalize(df, MARGINALS, weight_column = None)
+    return stats.marginalize(df, MARGINALS, weight_column=None)
diff --git a/analysis/synthesis/statistics/monte_carlo.py b/analysis/synthesis/statistics/monte_carlo.py
index 23b9892d..84d0bd8c 100644
--- a/analysis/synthesis/statistics/monte_carlo.py
+++ b/analysis/synthesis/statistics/monte_carlo.py
@@ -11,23 +11,33 @@
 
 from analysis.synthesis.statistics.marginal import MARGINALS
 
+
 def configure(context):
     context.stage("analysis.reference.census.sociodemographics")
 
     for sampling_rate in SAMPLING_RATES:
-        bt.configure(context, "analysis.synthesis.statistics.marginal", ACQUISITION_SAMPLE_SIZE, dict(
-            sampling_rate = sampling_rate
-        ), alias = "sample_%f" % sampling_rate)
+        bt.configure(
+            context,
+            "analysis.synthesis.statistics.marginal",
+            ACQUISITION_SAMPLE_SIZE,
+            dict(sampling_rate=sampling_rate),
+            alias="sample_%f" % sampling_rate,
+        )
+
 
 STATISTICS = [
-    ("mean", "mean"), ("q5", lambda x: x.quantile(0.05)), ("q95", lambda x: x.quantile(0.95))
+    ("mean", "mean"),
+    ("q5", lambda x: x.quantile(0.05)),
+    ("q95", lambda x: x.quantile(0.95)),
 ]
 
 STATISTICS = {
-    "weight": STATISTICS, "error": STATISTICS,
-    "error_probability": [("mean", "mean")]
+    "weight": STATISTICS,
+    "error": STATISTICS,
+    "error_probability": [("mean", "mean")],
 }
 
+
 def process(context, k):
     reference = context.data("reference")
     partial_marginals = context.data("partial_marginals")
@@ -40,12 +50,23 @@ def process(context, k):
         df_marginal = k_marginals[marginal]
         df_reference = reference[marginal]
 
-        df_marginal = pd.merge(df_marginal, df_reference.rename(columns = { "weight": "reference" }), on = marginal)
+        df_marginal = pd.merge(
+            df_marginal,
+            df_reference.rename(columns={"weight": "reference"}),
+            on=marginal,
+        )
         df_marginal["weight"] /= sampling_rate
         df_marginal["error"] = df_marginal["weight"] / df_marginal["reference"] - 1
-        df_marginal["error_probability"] = np.abs(df_marginal["error"]) <= ERROR_THRESHOLD
+        df_marginal["error_probability"] = (
+            np.abs(df_marginal["error"]) <= ERROR_THRESHOLD
+        )
 
-        df = df_marginal[list(marginal) + ["weight", "error", "error_probability"]].groupby(list(marginal)).aggregate(STATISTICS).reset_index()
+        df = (
+            df_marginal[list(marginal) + ["weight", "error", "error_probability"]]
+            .groupby(list(marginal))
+            .aggregate(STATISTICS)
+            .reset_index()
+        )
 
         df["samples"] = k
         df["sampling_rate"] = sampling_rate
@@ -55,19 +76,36 @@ def process(context, k):
 
     return output
 
+
 def execute(context):
     reference = context.stage("analysis.reference.census.sociodemographics")["person"]
 
-    output = { marginal: [] for marginal in MARGINALS }
+    output = {marginal: [] for marginal in MARGINALS}
     total = len(SAMPLING_RATES) * len(MARGINALS) * ACQUISITION_SAMPLE_SIZE
 
-    with context.progress(label = "Running Monte Carlo analysis ...", total = total) as progress:
+    with context.progress(
+        label="Running Monte Carlo analysis ...", total=total
+    ) as progress:
         for sampling_rate in SAMPLING_RATES:
-            partial_marginals = list(bt.get_stages(context, "sample_%f" % sampling_rate, sample_size = ACQUISITION_SAMPLE_SIZE))
-
-            with context.parallel(data = dict(partial_marginals = partial_marginals, reference = reference, sampling_rate = sampling_rate)) as parallel:
-
-                for partial_output in parallel.imap_unordered(process, np.arange(1, ACQUISITION_SAMPLE_SIZE + 1)):
+            partial_marginals = list(
+                bt.get_stages(
+                    context,
+                    "sample_%f" % sampling_rate,
+                    sample_size=ACQUISITION_SAMPLE_SIZE,
+                )
+            )
+
+            with context.parallel(
+                data=dict(
+                    partial_marginals=partial_marginals,
+                    reference=reference,
+                    sampling_rate=sampling_rate,
+                )
+            ) as parallel:
+
+                for partial_output in parallel.imap_unordered(
+                    process, np.arange(1, ACQUISITION_SAMPLE_SIZE + 1)
+                ):
                     for marginal in MARGINALS:
                         output[marginal].append(partial_output[marginal])
 
diff --git a/data/ban/raw.py b/data/ban/raw.py
index 764c6c8f..7f97064f 100644
--- a/data/ban/raw.py
+++ b/data/ban/raw.py
@@ -7,17 +7,16 @@
 This stage loads the raw data from the new French address registry (BAN).
 """
 
+
 def configure(context):
     context.stage("data.spatial.codes")
 
     context.config("data_path")
     context.config("ban_path", "ban_idf")
 
-BAN_DTYPES = {
-    "code_insee": str,
-    "x": float, 
-    "y": float
-}
+
+BAN_DTYPES = {"code_insee": str, "x": float, "y": float}
+
 
 def execute(context):
     # Find relevant departments
@@ -27,12 +26,19 @@ def execute(context):
     # Load BAN
     df_ban = []
 
-    for source_path in find_ban("{}/{}".format(context.config("data_path"), context.config("ban_path"))):
+    for source_path in find_ban(
+        "{}/{}".format(context.config("data_path"), context.config("ban_path"))
+    ):
         print("Reading {} ...".format(source_path))
 
-        df_partial = pd.read_csv(source_path, 
-            compression = "gzip", sep = ";", usecols = BAN_DTYPES.keys(), dtype = BAN_DTYPES)
-        
+        df_partial = pd.read_csv(
+            source_path,
+            compression="gzip",
+            sep=";",
+            usecols=BAN_DTYPES.keys(),
+            dtype=BAN_DTYPES,
+        )
+
         # Filter by departments
         df_partial["department_id"] = df_partial["code_insee"].str[:2]
         df_partial = df_partial[["department_id", "x", "y"]]
@@ -40,25 +46,30 @@ def execute(context):
 
         if len(df_partial) > 0:
             df_ban.append(df_partial)
-    
+
     df_ban = pd.concat(df_ban)
     df_ban = gpd.GeoDataFrame(
-        df_ban, geometry = gpd.points_from_xy(df_ban.x, df_ban.y), crs = "EPSG:2154")
-    
+        df_ban, geometry=gpd.points_from_xy(df_ban.x, df_ban.y), crs="EPSG:2154"
+    )
+
     # Check that we cover all requested departments at least once
     for department_id in requested_departments:
         assert np.count_nonzero(df_ban["department_id"] == department_id) > 0
 
     return df_ban[["geometry"]]
 
+
 def find_ban(path):
     candidates = sorted(list(glob.glob("{}/*.csv.gz".format(path))))
 
     if len(candidates) == 0:
         raise RuntimeError("BAN data is not available in {}".format(path))
-    
+
     return candidates
 
+
 def validate(context):
-    paths = find_ban("{}/{}".format(context.config("data_path"), context.config("ban_path")))
+    paths = find_ban(
+        "{}/{}".format(context.config("data_path"), context.config("ban_path"))
+    )
     return sum([os.path.getsize(path) for path in paths])
diff --git a/data/bdtopo/output.py b/data/bdtopo/output.py
index 214fd465..c1bb95c8 100644
--- a/data/bdtopo/output.py
+++ b/data/bdtopo/output.py
@@ -1,14 +1,17 @@
 import geopandas as gpd
 
+
 def configure(context):
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
 
     context.stage("data.bdtopo.raw")
 
+
 def execute(context):
     df_buildings = context.stage("data.bdtopo.raw")
 
-    df_buildings.to_file("%s/%sbdtopo.gpkg" % (
-        context.config("output_path"), context.config("output_prefix")
-    ))
+    df_buildings.to_file(
+        "%s/%sbdtopo.gpkg"
+        % (context.config("output_path"), context.config("output_prefix"))
+    )
diff --git a/data/bdtopo/raw.py b/data/bdtopo/raw.py
index 354545ec..e794f895 100644
--- a/data/bdtopo/raw.py
+++ b/data/bdtopo/raw.py
@@ -11,13 +11,15 @@
 """
 This stage loads the raw data from the French building registry (BD-TOPO).
 """
- 
+
+
 def configure(context):
     context.config("data_path")
     context.config("bdtopo_path", "bdtopo_idf")
 
     context.stage("data.spatial.departments")
 
+
 def get_department_string(department_id):
     department_id = str(department_id)
 
@@ -28,11 +30,14 @@ def get_department_string(department_id):
     else:
         raise RuntimeError("Department identifier should have at least two characters")
 
+
 def execute(context):
     df_departments = context.stage("data.spatial.departments")
     print("Expecting data for {} departments".format(len(df_departments)))
-    
-    source_paths = find_bdtopo("{}/{}".format(context.config("data_path"), context.config("bdtopo_path")))
+
+    source_paths = find_bdtopo(
+        "{}/{}".format(context.config("data_path"), context.config("bdtopo_path"))
+    )
 
     df_bdtopo = []
     known_ids = set()
@@ -43,8 +48,10 @@ def execute(context):
 
         with py7zr.SevenZipFile(source_path) as archive:
             # Find the path inside the archive
-            internal_path = [path for path in archive.getnames() if path.endswith(".gpkg")]
-            
+            internal_path = [
+                path for path in archive.getnames() if path.endswith(".gpkg")
+            ]
+
             if len(internal_path) != 1:
                 print("  Skipping: No unambiguous geometry source found!")
 
@@ -54,20 +61,26 @@ def execute(context):
                 geometry_path = "{}/{}".format(context.path(), internal_path[0])
 
         if geometry_path is not None:
-            with context.progress(label = "  Reading ...") as progress:
-                data = { "cleabs": [], "nombre_de_logements": [], "geometry": [] }
-                with fiona.open(geometry_path, layer = "batiment") as package:
+            with context.progress(label="  Reading ...") as progress:
+                data = {"cleabs": [], "nombre_de_logements": [], "geometry": []}
+                with fiona.open(geometry_path, layer="batiment") as package:
                     for item in package:
                         data["cleabs"].append(item["properties"]["cleabs"])
-                        data["nombre_de_logements"].append(item["properties"]["nombre_de_logements"])
+                        data["nombre_de_logements"].append(
+                            item["properties"]["nombre_de_logements"]
+                        )
                         data["geometry"].append(geo.shape(item["geometry"]))
                         progress.update()
 
                 df_buildings = pd.DataFrame(data)
-                df_buildings = gpd.GeoDataFrame(df_buildings, crs = "EPSG:2154")
-            
-            df_buildings["building_id"] = df_buildings["cleabs"].apply(lambda x: int(x[8:]))
-            df_buildings["housing"] = df_buildings["nombre_de_logements"].fillna(0).astype(int)
+                df_buildings = gpd.GeoDataFrame(df_buildings, crs="EPSG:2154")
+
+            df_buildings["building_id"] = df_buildings["cleabs"].apply(
+                lambda x: int(x[8:])
+            )
+            df_buildings["housing"] = (
+                df_buildings["nombre_de_logements"].fillna(0).astype(int)
+            )
 
             df_buildings["centroid"] = df_buildings["geometry"].centroid
             df_buildings = df_buildings.set_geometry("centroid")
@@ -77,22 +90,36 @@ def execute(context):
             initial_count = len(df_buildings)
             df_buildings = df_buildings[df_buildings["housing"] > 0]
             final_count = len(df_buildings)
-            print("    {}/{} filtered by dwellings".format(initial_count - final_count, initial_count))
+            print(
+                "    {}/{} filtered by dwellings".format(
+                    initial_count - final_count, initial_count
+                )
+            )
 
             initial_count = len(df_buildings)
             df_buildings = df_buildings[~df_buildings["building_id"].isin(known_ids)]
             final_count = len(df_buildings)
-            print("    {}/{} filtered duplicates".format(initial_count - final_count, initial_count))
+            print(
+                "    {}/{} filtered duplicates".format(
+                    initial_count - final_count, initial_count
+                )
+            )
 
             initial_count = len(df_buildings)
-            df_buildings = gpd.sjoin(df_buildings, df_departments, predicate = "within")
+            df_buildings = gpd.sjoin(df_buildings, df_departments, predicate="within")
             final_count = len(df_buildings)
-            print("    {}/{} filtered spatially".format(initial_count - final_count, initial_count))
+            print(
+                "    {}/{} filtered spatially".format(
+                    initial_count - final_count, initial_count
+                )
+            )
 
             df_buildings["department_id"] = df_buildings["departement_id"]
             df_buildings = df_buildings.set_geometry("geometry")
 
-            df_bdtopo.append(df_buildings[["building_id", "housing", "department_id", "geometry"]])
+            df_bdtopo.append(
+                df_buildings[["building_id", "housing", "department_id", "geometry"]]
+            )
             known_ids |= set(df_buildings["building_id"].unique())
 
             os.remove(geometry_path)
@@ -104,14 +131,18 @@ def execute(context):
 
     return df_bdtopo[["building_id", "housing", "geometry"]]
 
+
 def find_bdtopo(path):
     candidates = sorted(list(glob.glob("{}/*.7z".format(path))))
 
     if len(candidates) == 0:
         raise RuntimeError("BD TOPO data is not available in {}".format(path))
-    
+
     return candidates
 
+
 def validate(context):
-    paths = find_bdtopo("{}/{}".format(context.config("data_path"), context.config("bdtopo_path")))
+    paths = find_bdtopo(
+        "{}/{}".format(context.config("data_path"), context.config("bdtopo_path"))
+    )
     return sum([os.path.getsize(path) for path in paths])
diff --git a/data/bpe/cleaned.py b/data/bpe/cleaned.py
index 30e1cad3..9797729e 100644
--- a/data/bpe/cleaned.py
+++ b/data/bpe/cleaned.py
@@ -10,6 +10,7 @@
   - Simplify activity types for all enterprises
 """
 
+
 def configure(context):
     context.stage("data.bpe.raw")
 
@@ -18,32 +19,38 @@ def configure(context):
 
     context.config("bpe_random_seed", 0)
 
+
 ACTIVITY_TYPE_MAP = [
-    ("A", "other"),         # Police, post office, etc ...
-    ("A504", "leisure"),    # Restaurant
-    ("B", "shop"),          # Shopping
-    ("C", "education"),     # Education
-    ("D", "other"),         # Health
-    ("E", "other"),         # Transport
-    ("F", "leisure"),       # Sports & Culture
-    ("G", "other"),         # Tourism, hotels, etc. (Hôtel = G102)
+    ("A", "other"),  # Police, post office, etc ...
+    ("A504", "leisure"),  # Restaurant
+    ("B", "shop"),  # Shopping
+    ("C", "education"),  # Education
+    ("D", "other"),  # Health
+    ("E", "other"),  # Transport
+    ("F", "leisure"),  # Sports & Culture
+    ("G", "other"),  # Tourism, hotels, etc. (Hôtel = G102)
 ]
 
+
 def find_outside(context, commune_id):
     df_municipalities = context.data("df_municipalities")
     df = context.data("df")
 
     df = df[df["commune_id"] == commune_id]
-    zone = df_municipalities[df_municipalities["commune_id"] == commune_id]["geometry"].values[0]
+    zone = df_municipalities[df_municipalities["commune_id"] == commune_id][
+        "geometry"
+    ].values[0]
 
     indices = [
-        index for index, x, y in df[["x", "y"]].itertuples()
+        index
+        for index, x, y in df[["x", "y"]].itertuples()
         if not zone.contains(geo.Point(x, y))
     ]
 
     context.progress.update()
     return indices
 
+
 def execute(context):
     df = context.stage("data.bpe.raw")
 
@@ -57,9 +64,9 @@ def execute(context):
 
     df["activity_type"] = df["activity_type"].astype("category")
 
-    #Add 
-    df = df.rename(columns={"TYPEQU":"education_type"})
-    df["weight"] = 500 
+    # Add
+    df = df.rename(columns={"TYPEQU": "education_type"})
+    df["weight"] = 500
     # Clean coordinates
     df["x"] = df["LAMBERT_X"].astype(str).str.replace(",", ".").astype(float)
     df["y"] = df["LAMBERT_Y"].astype(str).str.replace(",", ".").astype(float)
@@ -77,20 +84,29 @@ def execute(context):
 
     df["commune_id"] = df["DEPCOM"].astype("category")
 
-    print("Found %d/%d (%.2f%%) observations without IRIS" % (
-        (df["iris_id"] == "undefined").sum(), len(df), 100 * (df["iris_id"] == "undefined").mean()
-    ))
+    print(
+        "Found %d/%d (%.2f%%) observations without IRIS"
+        % (
+            (df["iris_id"] == "undefined").sum(),
+            len(df),
+            100 * (df["iris_id"] == "undefined").mean(),
+        )
+    )
 
     # Check whether all communes in BPE are within our set of requested data
     df_municipalities = context.stage("data.spatial.municipalities")
-    excess_communes = set(df["commune_id"].unique()) - set(df_municipalities["commune_id"].unique())
+    excess_communes = set(df["commune_id"].unique()) - set(
+        df_municipalities["commune_id"].unique()
+    )
 
     if len(excess_communes) > 0:
         raise RuntimeError("Found additional communes: %s" % excess_communes)
 
     # We notice that we have some additional IRIS. Make sure they will be placed randomly in there commune later.
     df_iris = context.stage("data.spatial.iris")
-    excess_iris = set(df[df["iris_id"] != "undefined"]["iris_id"].unique()) - set(df_iris["iris_id"].unique())
+    excess_iris = set(df[df["iris_id"] != "undefined"]["iris_id"].unique()) - set(
+        df_iris["iris_id"].unique()
+    )
     df.loc[df["iris_id"].isin(excess_iris), "iris_id"] = "undefined"
     print("Excess IRIS without valid code:", excess_iris)
 
@@ -100,19 +116,42 @@ def execute(context):
     f_undefined = df["iris_id"] == "undefined"
     f_missing = df["x"].isna()
 
-    print("Found %d/%d (%.2f%%) observations without coordinate" % (
-        ((f_missing & ~f_undefined).sum(), len(df), 100 * (f_missing & ~f_undefined).mean()
-    )))
+    print(
+        "Found %d/%d (%.2f%%) observations without coordinate"
+        % (
+            (
+                (f_missing & ~f_undefined).sum(),
+                len(df),
+                100 * (f_missing & ~f_undefined).mean(),
+            )
+        )
+    )
 
     if np.count_nonzero(f_missing & ~f_undefined) > 0:
         # Impute missing coordinates for known IRIS
-        df.update(spatial_utils.sample_from_zones(
-            context, df_iris, df[f_missing & ~f_undefined], "iris_id", random, label = "Imputing IRIS coordinates ..."))
+        df.update(
+            spatial_utils.sample_from_zones(
+                context,
+                df_iris,
+                df[f_missing & ~f_undefined],
+                "iris_id",
+                random,
+                label="Imputing IRIS coordinates ...",
+            )
+        )
 
     if np.count_nonzero(f_missing & f_undefined) > 0:
         # Impute missing coordinates for unknown IRIS
-        df.update(spatial_utils.sample_from_zones(
-            context, df_municipalities, df[f_missing & f_undefined], "commune_id", random, label = "Imputing municipality coordinates ..."))
+        df.update(
+            spatial_utils.sample_from_zones(
+                context,
+                df_municipalities,
+                df[f_missing & f_undefined],
+                "commune_id",
+                random,
+                label="Imputing municipality coordinates ...",
+            )
+        )
 
     # Consolidate
     df["imputed"] = f_missing
@@ -122,8 +161,12 @@ def execute(context):
     # the respective municipality. Find them and move them back in.
     outside_indices = []
 
-    with context.progress(label = "Finding outside observations ...", total = len(df["commune_id"].unique())):
-        with context.parallel(dict(df = df, df_municipalities = df_municipalities)) as parallel:
+    with context.progress(
+        label="Finding outside observations ...", total=len(df["commune_id"].unique())
+    ):
+        with context.parallel(
+            dict(df=df, df_municipalities=df_municipalities)
+        ) as parallel:
             for partial in parallel.imap(find_outside, df["commune_id"].unique()):
                 outside_indices += partial
 
@@ -131,14 +174,33 @@ def execute(context):
         df.loc[outside_indices, "x"] = np.nan
         df.loc[outside_indices, "y"] = np.nan
 
-        df.update(spatial_utils.sample_from_zones(
-            context, df_municipalities, df.loc[outside_indices], "commune_id", random, label = "Fixing outside locations ..."))
+        df.update(
+            spatial_utils.sample_from_zones(
+                context,
+                df_municipalities,
+                df.loc[outside_indices],
+                "commune_id",
+                random,
+                label="Fixing outside locations ...",
+            )
+        )
 
         df.loc[outside_indices, "imputed"] = True
 
     # Package up data set
-    df = df[["enterprise_id", "activity_type","education_type", "commune_id", "imputed", "x", "y","weight"]]
+    df = df[
+        [
+            "enterprise_id",
+            "activity_type",
+            "education_type",
+            "commune_id",
+            "imputed",
+            "x",
+            "y",
+            "weight",
+        ]
+    ]
 
-    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y),crs="EPSG:2154")
+    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs="EPSG:2154")
 
     return df
diff --git a/data/bpe/raw.py b/data/bpe/raw.py
index 98135631..95429e10 100644
--- a/data/bpe/raw.py
+++ b/data/bpe/raw.py
@@ -6,27 +6,38 @@
 This stage loads the raw data from the French service registry.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("bpe_path", "bpe_2021/bpe21_ensemble_xy_csv.zip")
     context.config("bpe_csv", "bpe21_ensemble_xy.csv")
     context.stage("data.spatial.codes")
 
+
 def execute(context):
     df_records = []
 
     df_codes = context.stage("data.spatial.codes")
     requested_departements = df_codes["departement_id"].unique()
 
-    with context.progress(label = "Reading BPE ...") as progress:
-        with zipfile.ZipFile("{}/{}".format(context.config("data_path"), context.config("bpe_path"))) as archive:
+    with context.progress(label="Reading BPE ...") as progress:
+        with zipfile.ZipFile(
+            "{}/{}".format(context.config("data_path"), context.config("bpe_path"))
+        ) as archive:
             with archive.open(context.config("bpe_csv")) as f:
-                csv = pd.read_csv(f, usecols = [
-                        "DCIRIS", "LAMBERT_X", "LAMBERT_Y",
-                        "TYPEQU", "DEPCOM", "DEP"
-                    ], sep = ";",
-                    dtype = dict(DEPCOM = str, DEP = str, DCIRIS = str),
-                    chunksize = 10240
+                csv = pd.read_csv(
+                    f,
+                    usecols=[
+                        "DCIRIS",
+                        "LAMBERT_X",
+                        "LAMBERT_Y",
+                        "TYPEQU",
+                        "DEPCOM",
+                        "DEP",
+                    ],
+                    sep=";",
+                    dtype=dict(DEPCOM=str, DEP=str, DCIRIS=str),
+                    chunksize=10240,
                 )
 
                 for df_chunk in csv:
@@ -39,8 +50,13 @@ def execute(context):
 
     return pd.concat(df_records)
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("bpe_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("bpe_path"))
+    ):
         raise RuntimeError("BPE data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("bpe_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("bpe_path"))
+    )
diff --git a/data/census/cleaned.py b/data/census/cleaned.py
index 789d0adb..1d4d4f95 100644
--- a/data/census/cleaned.py
+++ b/data/census/cleaned.py
@@ -9,6 +9,7 @@
   - Clean up spatial information and sociodemographic attributes
 """
 
+
 def configure(context):
     context.stage("data.census.raw")
     context.stage("data.spatial.codes")
@@ -16,27 +17,32 @@ def configure(context):
     if context.config("use_urban_type", False):
         context.stage("data.spatial.urban_type")
 
+
 def execute(context):
     df = context.stage("data.census.raw")
 
     # Construct household IDs for persons with NUMMI != Z
     df_household_ids = df[["CANTVILLE", "NUMMI"]]
     df_household_ids = df_household_ids[df_household_ids["NUMMI"] != "Z"]
-    df_household_ids["temporary"] = df_household_ids["CANTVILLE"] + df_household_ids["NUMMI"]
+    df_household_ids["temporary"] = (
+        df_household_ids["CANTVILLE"] + df_household_ids["NUMMI"]
+    )
     df_household_ids = df_household_ids.drop_duplicates("temporary")
     df_household_ids["household_id"] = np.arange(len(df_household_ids))
-    df = pd.merge(df, df_household_ids, on = ["CANTVILLE", "NUMMI"], how = "left")
+    df = pd.merge(df, df_household_ids, on=["CANTVILLE", "NUMMI"], how="left")
 
     # Fill up undefined household ids (those where NUMMI == Z)
     f = np.isnan(df["household_id"])
-    df.loc[f, "household_id"] = np.arange(np.count_nonzero(f)) + df["household_id"].max() + 1
+    df.loc[f, "household_id"] = (
+        np.arange(np.count_nonzero(f)) + df["household_id"].max() + 1
+    )
     df["household_id"] = df["household_id"].astype(int)
 
     # Put person IDs
     df["person_id"] = np.arange(len(df))
 
     # Sorting
-    df = df.sort_values(by = ["household_id", "person_id"])
+    df = df.sort_values(by=["household_id", "person_id"])
 
     # Spatial information
     df["departement_id"] = df["DEPT"].astype("category")
@@ -52,7 +58,9 @@ def execute(context):
     df["iris_id"] = df["iris_id"].astype("category")
 
     # Age
-    df["age"] = df["AGED"].apply(lambda x: "0" if x == "000" else x.lstrip("0")).astype(int)
+    df["age"] = (
+        df["AGED"].apply(lambda x: "0" if x == "000" else x.lstrip("0")).astype(int)
+    )
 
     # Clean COUPLE
     df["couple"] = df["COUPLE"] == "1"
@@ -81,42 +89,63 @@ def execute(context):
     df["studies"] = df["ETUD"] == "1"
 
     # Number of vehicles
-    df["number_of_vehicles"] = df["VOIT"].apply(
-        lambda x: str(x).replace("Z", "0").replace("X", "0")
-    ).astype(int)
-
-    df["number_of_vehicles"] += df["DEROU"].apply(
-        lambda x: str(x).replace("U", "0").replace("Z", "0").replace("X", "0")
-    ).astype(int)
+    df["number_of_vehicles"] = (
+        df["VOIT"]
+        .apply(lambda x: str(x).replace("Z", "0").replace("X", "0"))
+        .astype(int)
+    )
+
+    df["number_of_vehicles"] += (
+        df["DEROU"]
+        .apply(lambda x: str(x).replace("U", "0").replace("Z", "0").replace("X", "0"))
+        .astype(int)
+    )
 
     # Household size
-    df_size = df[["household_id"]].groupby("household_id").size().reset_index(name = "household_size")
+    df_size = (
+        df[["household_id"]]
+        .groupby("household_id")
+        .size()
+        .reset_index(name="household_size")
+    )
     df = pd.merge(df, df_size)
 
     # Socioprofessional category
     df["socioprofessional_class"] = df["CS1"].astype(int)
 
     # Consumption units
-    df = pd.merge(df, hts.calculate_consumption_units(df), on = "household_id")
-
-    df = df[[
-        "person_id", "household_id", "weight",
-        "iris_id", "commune_id", "departement_id",
-        "age", "sex", "couple",
-        "commute_mode", "employed",
-        "studies", "number_of_vehicles", "household_size",
-        "consumption_units", "socioprofessional_class"
-    ]]
+    df = pd.merge(df, hts.calculate_consumption_units(df), on="household_id")
+
+    df = df[
+        [
+            "person_id",
+            "household_id",
+            "weight",
+            "iris_id",
+            "commune_id",
+            "departement_id",
+            "age",
+            "sex",
+            "couple",
+            "commute_mode",
+            "employed",
+            "studies",
+            "number_of_vehicles",
+            "household_size",
+            "consumption_units",
+            "socioprofessional_class",
+        ]
+    ]
 
     if context.config("use_urban_type"):
-        df_urban_type = context.stage("data.spatial.urban_type")[[
-            "commune_id", "urban_type"
-        ]]
-        
+        df_urban_type = context.stage("data.spatial.urban_type")[
+            ["commune_id", "urban_type"]
+        ]
+
         # Impute urban type
-        df = pd.merge(df, df_urban_type, on = "commune_id", how = "left")
+        df = pd.merge(df, df_urban_type, on="commune_id", how="left")
         df.loc[df["commune_id"] == "undefined", "urban_type"] = "none"
         df["commune_id"] = df["commune_id"].astype("category")
-        assert ~np.any(df["urban_type"].isna()) 
+        assert ~np.any(df["urban_type"].isna())
 
     return df
diff --git a/data/census/filtered.py b/data/census/filtered.py
index ecd3bdcd..ac1f5771 100644
--- a/data/census/filtered.py
+++ b/data/census/filtered.py
@@ -7,10 +7,12 @@
 Île-de-France.
 """
 
+
 def configure(context):
     context.stage("data.census.cleaned")
     context.stage("data.spatial.codes")
 
+
 def execute(context):
     df = context.stage("data.census.cleaned")
 
@@ -20,7 +22,9 @@ def execute(context):
     requested_departements = df_codes["departement_id"].unique()
     df = df[df["departement_id"].isin(requested_departements)]
 
-    excess_communes = set(df["commune_id"].unique()) - set(df_codes["commune_id"].unique())
+    excess_communes = set(df["commune_id"].unique()) - set(
+        df_codes["commune_id"].unique()
+    )
     if not excess_communes == {"undefined"}:
         raise RuntimeError("Found additional communes: %s" % excess_communes)
 
diff --git a/data/census/projection.py b/data/census/projection.py
index dc9a8f9f..8a26b816 100644
--- a/data/census/projection.py
+++ b/data/census/projection.py
@@ -5,28 +5,32 @@
 This stage loads and cleans projection data about the French population.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("projection_path", "projection_2021")
     context.config("projection_scenario", "00_central")
     context.config("projection_year", None)
 
+
 def execute(context):
     source_path = "{}/{}/{}.xlsx".format(
-        context.config("data_path"), 
-        context.config("projection_path"), 
-        context.config("projection_scenario"))
-    
+        context.config("data_path"),
+        context.config("projection_path"),
+        context.config("projection_scenario"),
+    )
+
     projection_year = int(context.config("projection_year"))
 
-    df_all = pd.read_excel(
-        source_path, sheet_name = "population", skiprows = 1).iloc[:107]
-    
-    df_male = pd.read_excel(
-        source_path, sheet_name = "populationH", skiprows = 1).iloc[:107]
-    
-    df_female = pd.read_excel(
-        source_path, sheet_name = "populationF", skiprows = 1).iloc[:107]
+    df_all = pd.read_excel(source_path, sheet_name="population", skiprows=1).iloc[:107]
+
+    df_male = pd.read_excel(source_path, sheet_name="populationH", skiprows=1).iloc[
+        :107
+    ]
+
+    df_female = pd.read_excel(source_path, sheet_name="populationF", skiprows=1).iloc[
+        :107
+    ]
 
     df_male["sex"] = "male"
     df_female["sex"] = "female"
@@ -35,10 +39,9 @@ def execute(context):
     assert df_male["Âge au 1er janvier"].iloc[-1] == "Total des hommes"
     assert df_female["Âge au 1er janvier"].iloc[-1] == "Total des femmes"
 
-    df_sex = pd.concat([
-        df_male.iloc[-1:],
-        df_female.iloc[-1:]
-    ]).drop(columns = ["Âge au 1er janvier"])[["sex", projection_year]]
+    df_sex = pd.concat([df_male.iloc[-1:], df_female.iloc[-1:]]).drop(
+        columns=["Âge au 1er janvier"]
+    )[["sex", projection_year]]
     df_sex.columns = ["sex", "projection"]
 
     df_age = df_all[["Âge au 1er janvier", projection_year]].iloc[:-1]
@@ -48,28 +51,28 @@ def execute(context):
     df_female = df_female[["Âge au 1er janvier", "sex", projection_year]].iloc[:-1]
 
     df_male.columns = ["age", "sex", "projection"]
-    df_female.columns = ["age","sex", "projection"]
+    df_female.columns = ["age", "sex", "projection"]
 
     df_cross = pd.concat([df_male, df_female])
     df_cross["sex"] = df_cross["sex"].astype("category")
 
-    df_total = df_all.iloc[-1:].drop(columns = ["Âge au 1er janvier"])[[projection_year]]
+    df_total = df_all.iloc[-1:].drop(columns=["Âge au 1er janvier"])[[projection_year]]
     df_total.columns = ["projection"]
 
-    return {
-        "total": df_total, "sex": df_sex, "age": df_age, "cross": df_cross
-    }
+    return {"total": df_total, "sex": df_sex, "age": df_age, "cross": df_cross}
+
 
 def validate(context):
     if context.config("projection_year") is not None:
         source_path = "{}/{}/{}.xlsx".format(
-            context.config("data_path"), 
-            context.config("projection_path"), 
-            context.config("projection_scenario"))
+            context.config("data_path"),
+            context.config("projection_path"),
+            context.config("projection_scenario"),
+        )
 
         if not os.path.exists(source_path):
             raise RuntimeError("Projection data is not available")
 
         return os.path.getsize(source_path)
-    
+
     return 0
diff --git a/data/census/raw.py b/data/census/raw.py
index 73eebd4a..93099f36 100644
--- a/data/census/raw.py
+++ b/data/census/raw.py
@@ -6,6 +6,7 @@
 This stage loads the raw data from the French population census.
 """
 
+
 def configure(context):
     context.stage("data.spatial.codes")
 
@@ -15,24 +16,26 @@ def configure(context):
 
     context.config("projection_year", None)
 
+
 COLUMNS_DTYPES = {
-    "CANTVILLE":"str", 
-    "NUMMI":"str", 
-    "AGED":"str",
-    "COUPLE":"str", 
-    "CS1":"str",
-    "DEPT":"str", 
-    "ETUD":"str",
-    "IPONDI":"str", 
-    "IRIS":"str",
-    "REGION":"str", 
-    "SEXE":"str",
-    "TACT":"str", 
-    "TRANS":"str",
-    "VOIT":"str", 
-    "DEROU":"str"
+    "CANTVILLE": "str",
+    "NUMMI": "str",
+    "AGED": "str",
+    "COUPLE": "str",
+    "CS1": "str",
+    "DEPT": "str",
+    "ETUD": "str",
+    "IPONDI": "str",
+    "IRIS": "str",
+    "REGION": "str",
+    "SEXE": "str",
+    "TACT": "str",
+    "TRANS": "str",
+    "VOIT": "str",
+    "DEROU": "str",
 }
 
+
 def execute(context):
     df_records = []
     df_codes = context.stage("data.spatial.codes")
@@ -42,20 +45,26 @@ def execute(context):
     # only pre-filter if we don't need to reweight the census later
     prefilter_departments = context.config("projection_year") is None
 
-    with context.progress(label = "Reading census ...") as progress:
+    with context.progress(label="Reading census ...") as progress:
         with zipfile.ZipFile(
-            "{}/{}".format(context.config("data_path"), context.config("census_path"))) as archive:
+            "{}/{}".format(context.config("data_path"), context.config("census_path"))
+        ) as archive:
             with archive.open(context.config("census_csv")) as f:
-                csv = pd.read_csv(f, 
-                        usecols = COLUMNS_DTYPES.keys(), sep = ";",
-                        dtype = COLUMNS_DTYPES,
-                        chunksize = 10240)
-    
+                csv = pd.read_csv(
+                    f,
+                    usecols=COLUMNS_DTYPES.keys(),
+                    sep=";",
+                    dtype=COLUMNS_DTYPES,
+                    chunksize=10240,
+                )
+
                 for df_chunk in csv:
                     progress.update(len(df_chunk))
-                    
+
                     if prefilter_departments:
-                        df_chunk = df_chunk[df_chunk["DEPT"].isin(requested_departements)]
+                        df_chunk = df_chunk[
+                            df_chunk["DEPT"].isin(requested_departements)
+                        ]
 
                     if len(df_chunk) > 0:
                         df_records.append(df_chunk)
@@ -64,7 +73,11 @@ def execute(context):
 
 
 def validate(context):
-    if not os.path.exists("{}/{}".format(context.config("data_path"), context.config("census_path"))):
+    if not os.path.exists(
+        "{}/{}".format(context.config("data_path"), context.config("census_path"))
+    ):
         raise RuntimeError("RP 2019 data is not available")
 
-    return os.path.getsize("{}/{}".format(context.config("data_path"), context.config("census_path")))
+    return os.path.getsize(
+        "{}/{}".format(context.config("data_path"), context.config("census_path"))
+    )
diff --git a/data/external/education.py b/data/external/education.py
index 78950ce1..7c384a65 100644
--- a/data/external/education.py
+++ b/data/external/education.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import geopandas as gpd
 
+
 def configure(context):
     context.stage("data.bpe.cleaned")
     context.stage("data.spatial.municipalities")
@@ -10,24 +11,35 @@ def configure(context):
     context.config("data_path")
     context.config("education_file", "education/education_addresses.geojson")
 
+
 def execute(context):
-    df_locations = context.stage("data.bpe.cleaned")[[
-         "activity_type", "education_type", "commune_id","weight", "geometry"
-    ]]
+    df_locations = context.stage("data.bpe.cleaned")[
+        ["activity_type", "education_type", "commune_id", "weight", "geometry"]
+    ]
 
     df_locations = df_locations[df_locations["activity_type"] == "education"]
-    df_locations = df_locations[["activity_type","education_type", "commune_id", "geometry"]].copy()
-    df_locations["fake"] = False    
-
-    df_zones = context.stage("data.spatial.municipalities")    
-    required_communes = set(df_zones["commune_id"].unique())      
+    df_locations = df_locations[
+        ["activity_type", "education_type", "commune_id", "geometry"]
+    ].copy()
+    df_locations["fake"] = False
 
+    df_zones = context.stage("data.spatial.municipalities")
+    required_communes = set(df_zones["commune_id"].unique())
 
-    df_education = gpd.read_file("{}/{}".format(context.config("data_path"), context.config("education_file")))[["education_type", "commune_id","weight", "geometry"]]
+    df_education = gpd.read_file(
+        "{}/{}".format(context.config("data_path"), context.config("education_file"))
+    )[["education_type", "commune_id", "weight", "geometry"]]
     df_education["fake"] = False
     df_education = df_education.to_crs("2154")
     df_education["activity_type"] = "education"
     list_type = set(df_education["education_type"].unique())
-    df_locations = pd.concat([df_locations[~(df_locations["education_type"].str.startswith(tuple(list_type)))],df_education[df_education["commune_id"].isin(required_communes)]])
+    df_locations = pd.concat(
+        [
+            df_locations[
+                ~(df_locations["education_type"].str.startswith(tuple(list_type)))
+            ],
+            df_education[df_education["commune_id"].isin(required_communes)],
+        ]
+    )
 
     return df_locations
diff --git a/data/gtfs/cleaned.py b/data/gtfs/cleaned.py
index 81d0475e..f883fe08 100644
--- a/data/gtfs/cleaned.py
+++ b/data/gtfs/cleaned.py
@@ -6,15 +6,19 @@
 selected regions and departments) and merges them together.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("gtfs_path", "gtfs_idf")
 
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    input_files = get_input_files("{}/{}".format(context.config("data_path"), context.config("gtfs_path")))
-    
+    input_files = get_input_files(
+        "{}/{}".format(context.config("data_path"), context.config("gtfs_path"))
+    )
+
     # Prepare bounding area
     df_area = context.stage("data.spatial.municipalities")
 
@@ -25,7 +29,9 @@ def execute(context):
         feed = gtfs.cut_feed(feed, df_area)
 
         # This was fixed in pt2matsim, so we can remove one a new release (> 20.7) is available.
-        feed = gtfs.despace_stop_ids(feed) # Necessary as MATSim does not like stops/links with spaces
+        feed = gtfs.despace_stop_ids(
+            feed
+        )  # Necessary as MATSim does not like stops/links with spaces
 
         feeds.append(feed)
 
@@ -34,10 +40,27 @@ def execute(context):
 
     # Fix for pt2matsim (will be fixed after PR #173)
     # Order of week days must be fixed
-    days = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
+    days = [
+        "monday",
+        "tuesday",
+        "wednesday",
+        "thursday",
+        "friday",
+        "saturday",
+        "sunday",
+    ]
     columns = list(merged_feed["calendar"].columns)
-    for day in days: columns.remove(day)
-    columns += ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
+    for day in days:
+        columns.remove(day)
+    columns += [
+        "monday",
+        "tuesday",
+        "wednesday",
+        "thursday",
+        "friday",
+        "saturday",
+        "sunday",
+    ]
     merged_feed["calendar"] = merged_feed["calendar"][columns]
 
     # Write feed (not as a ZIP, but as files, for pt2matsim)
@@ -45,6 +68,7 @@ def execute(context):
 
     return "gtfs"
 
+
 def get_input_files(base_path):
     gtfs_paths = [
         str(child)
@@ -54,11 +78,14 @@ def get_input_files(base_path):
 
     if len(gtfs_paths) == 0:
         raise RuntimeError("Did not find any GTFS data (.zip) in {}".format(base_path))
-    
+
     return gtfs_paths
 
+
 def validate(context):
-    input_files = get_input_files("{}/{}".format(context.config("data_path"), context.config("gtfs_path")))
+    input_files = get_input_files(
+        "{}/{}".format(context.config("data_path"), context.config("gtfs_path"))
+    )
     total_size = 0
 
     for path in input_files:
diff --git a/data/gtfs/output.py b/data/gtfs/output.py
index 68c98ca9..f9fbf66f 100644
--- a/data/gtfs/output.py
+++ b/data/gtfs/output.py
@@ -4,18 +4,22 @@
 Writes out the consolidated GTFS feed
 """
 
+
 def configure(context):
     context.config("output_path")
     context.config("output_prefix")
 
     context.stage("data.gtfs.cleaned")
 
+
 def execute(context):
     source_path = "%s/output" % context.path("data.gtfs.cleaned")
     output_path = "%s/%sgtfs.zip" % (
-        context.config("output_path"), context.config("output_prefix"))
+        context.config("output_path"),
+        context.config("output_prefix"),
+    )
 
-    f = zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED)
+    f = zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED)
     print(source_path)
 
     for path in sorted(list(glob.glob("%s/*.txt" % source_path))):
diff --git a/data/gtfs/utils.py b/data/gtfs/utils.py
index 10585a7c..a95ba1e5 100644
--- a/data/gtfs/utils.py
+++ b/data/gtfs/utils.py
@@ -5,16 +5,24 @@
 import os
 import numpy as np
 
-REQUIRED_SLOTS = [
-    "agency", "stops", "routes", "trips", "stop_times"
-]
+REQUIRED_SLOTS = ["agency", "stops", "routes", "trips", "stop_times"]
 
 OPTIONAL_SLOTS = [
-    "calendar", "calendar_dates", "fare_attributes", "fare_rules",
-    "shapes", "frequencies", "transfers", "pathways", "levels",
-    "feed_info", "translations", "attributions"
+    "calendar",
+    "calendar_dates",
+    "fare_attributes",
+    "fare_rules",
+    "shapes",
+    "frequencies",
+    "transfers",
+    "pathways",
+    "levels",
+    "feed_info",
+    "translations",
+    "attributions",
 ]
 
+
 def read_feed(path):
     feed = {}
 
@@ -38,8 +46,13 @@ def read_feed(path):
             if not "%s%s.txt" % (prefix, slot) in available_slots:
                 raise RuntimeError("Missing GTFS information: %s" % slot)
 
-        if not "%scalendar.txt" % prefix in available_slots and not "%scalendar_dates.txt" % prefix in available_slots:
-            raise RuntimeError("At least calendar.txt or calendar_dates.txt must be specified.")
+        if (
+            not "%scalendar.txt" % prefix in available_slots
+            and not "%scalendar_dates.txt" % prefix in available_slots
+        ):
+            raise RuntimeError(
+                "At least calendar.txt or calendar_dates.txt must be specified."
+            )
 
         print("Loading GTFS data from %s ..." % path)
 
@@ -48,22 +61,27 @@ def read_feed(path):
                 print("  Loading %s.txt ..." % slot)
 
                 with zip.open("%s%s.txt" % (prefix, slot)) as f:
-                    feed[slot] = pd.read_csv(f, skipinitialspace = True)
+                    feed[slot] = pd.read_csv(f, skipinitialspace=True)
             else:
                 print("  Not loading %s.txt" % slot)
 
     # Some cleanup
 
     for slot in ("calendar", "calendar_dates", "trips"):
-        if slot in feed and "service_id" in feed[slot] and pd.api.types.is_string_dtype(feed[slot]["service_id"]):
+        if (
+            slot in feed
+            and "service_id" in feed[slot]
+            and pd.api.types.is_string_dtype(feed[slot]["service_id"])
+        ):
             initial_count = len(feed[slot])
             feed[slot] = feed[slot][feed[slot]["service_id"].str.len() > 0]
             final_count = len(feed[slot])
 
             if final_count != initial_count:
-                print("WARNING Removed %d/%d entries from %s with empty service_id" % (
-                    initial_count - final_count, initial_count, slot
-                ))
+                print(
+                    "WARNING Removed %d/%d entries from %s with empty service_id"
+                    % (initial_count - final_count, initial_count, slot)
+                )
 
     if "stops" in feed:
         df_stops = feed["stops"]
@@ -83,7 +101,9 @@ def read_feed(path):
             print("WARNING NaN numbers for min_transfer_time in transfers")
             df_transfers = df_transfers[~f]
 
-        df_transfers["min_transfer_time"] = df_transfers["min_transfer_time"].astype(int)
+        df_transfers["min_transfer_time"] = df_transfers["min_transfer_time"].astype(
+            int
+        )
         feed["transfers"] = df_transfers
 
     if "agency" in feed:
@@ -99,17 +119,19 @@ def read_feed(path):
 
         df_routes.loc[df_routes["agency_id"].isna(), "agency_id"] = agency_id
 
-    if "shapes" in feed: del feed["shapes"]
+    if "shapes" in feed:
+        del feed["shapes"]
     feed["trips"]["shape_id"] = np.nan
 
     # Fixes for Nantes PDL
     for item in feed.keys():
-        feed[item] = feed[item].drop(columns = [
-            c for c in feed[item].columns if c.startswith("ext_")
-        ])
+        feed[item] = feed[item].drop(
+            columns=[c for c in feed[item].columns if c.startswith("ext_")]
+        )
 
     return feed
 
+
 def write_feed(feed, path):
     print("Writing GTFS data to %s ..." % path)
 
@@ -121,7 +143,7 @@ def write_feed(feed, path):
 
                     # We cannot write directly to the file handle as it
                     # is binary, but pandas only writes in text mode.
-                    zip.writestr("%s.txt" % slot, feed[slot].to_csv(index = None))
+                    zip.writestr("%s.txt" % slot, feed[slot].to_csv(index=None))
 
     else:
         if not os.path.exists(path):
@@ -134,9 +156,10 @@ def write_feed(feed, path):
             if slot in feed:
                 with open("%s/%s.txt" % (path, slot), "w+", encoding="utf-8") as f:
                     print("  Writing %s.txt ..." % slot)
-                    feed[slot].to_csv(f, index = None, lineterminator='\n')
+                    feed[slot].to_csv(f, index=None, lineterminator="\n")
+
 
-def cut_feed(feed, df_area, crs = None):
+def cut_feed(feed, df_area, crs=None):
     feed = copy_feed(feed)
 
     df_stops = feed["stops"]
@@ -148,11 +171,10 @@ def cut_feed(feed, df_area, crs = None):
         df_stations = df_stops[df_stops["location_type"] == 1].copy()
 
     df_stations["geometry"] = [
-        geo.Point(*xy)
-        for xy in zip(df_stations["stop_lon"], df_stations["stop_lat"])
+        geo.Point(*xy) for xy in zip(df_stations["stop_lon"], df_stations["stop_lat"])
     ]
 
-    df_stations = gpd.GeoDataFrame(df_stations, crs = "EPSG:4326")
+    df_stations = gpd.GeoDataFrame(df_stations, crs="EPSG:4326")
 
     if not crs is None:
         print("Converting stops to custom CRS", crs)
@@ -164,20 +186,22 @@ def cut_feed(feed, df_area, crs = None):
     print("Filtering stations ...")
     initial_count = len(df_stations)
 
-    df_stations = gpd.sjoin(df_stations, df_area, predicate = "within")
+    df_stations = gpd.sjoin(df_stations, df_area, predicate="within")
     final_count = len(df_stations)
 
-    print("Found %d/%d stations inside the specified area" % (final_count, initial_count))
+    print(
+        "Found %d/%d stations inside the specified area" % (final_count, initial_count)
+    )
     inside_stations = df_stations["stop_id"]
 
     # 1) Remove stations that are not inside stations and not have a parent stop
     df_stops = feed["stops"]
 
     df_stops = df_stops[
-        df_stops["parent_station"].isin(inside_stations) |
-        (
-            df_stops["parent_station"].isna() &
-            df_stops["stop_id"].isin(inside_stations)
+        df_stops["parent_station"].isin(inside_stations)
+        | (
+            df_stops["parent_station"].isna()
+            & df_stops["stop_id"].isin(inside_stations)
         )
     ]
 
@@ -186,15 +210,17 @@ def cut_feed(feed, df_area, crs = None):
 
     # 2) Remove stop times
     df_times = feed["stop_times"]
-    df_times = df_times[df_times["stop_id"].astype(str).isin(remaining_stops.astype(str))]
+    df_times = df_times[
+        df_times["stop_id"].astype(str).isin(remaining_stops.astype(str))
+    ]
     feed["stop_times"] = df_times.copy()
 
     # 3) Remove transfers
     if "transfers" in feed:
         df_transfers = feed["transfers"]
         df_transfers = df_transfers[
-            df_transfers["from_stop_id"].isin(remaining_stops) &
-            df_transfers["to_stop_id"].isin(remaining_stops)
+            df_transfers["from_stop_id"].isin(remaining_stops)
+            & df_transfers["to_stop_id"].isin(remaining_stops)
         ]
         feed["transfers"] = df_transfers.copy()
 
@@ -202,8 +228,8 @@ def cut_feed(feed, df_area, crs = None):
     if "pathways" in feed:
         df_pathways = feed["pathways"]
         df_pathways = df_pathways[
-            df_pathways["from_stop_id"].isin(remaining_stops) &
-            df_pathways["to_stop_id"].isin(remaining_stops)
+            df_pathways["from_stop_id"].isin(remaining_stops)
+            & df_pathways["to_stop_id"].isin(remaining_stops)
         ]
         feed["pathways"] = df_pathways.copy()
 
@@ -212,9 +238,7 @@ def cut_feed(feed, df_area, crs = None):
     remaining_trips = trip_counts[trip_counts > 1].index.values
 
     df_trips = feed["trips"]
-    df_trips = df_trips[
-        df_trips["trip_id"].isin(remaining_trips)
-    ]
+    df_trips = df_trips[df_trips["trip_id"].isin(remaining_trips)]
     feed["trips"] = df_trips.copy()
 
     feed["stop_times"] = feed["stop_times"][
@@ -224,44 +248,73 @@ def cut_feed(feed, df_area, crs = None):
     # 6) Remove frequencies
     if "frequencies" in feed:
         df_frequencies = feed["frequencies"]
-        df_frequencies = df_frequencies[
-            df_frequencies["trip_id"].isin(remaining_trips)
-        ]
+        df_frequencies = df_frequencies[df_frequencies["trip_id"].isin(remaining_trips)]
         feed["frequencies"] = df_frequencies.copy()
 
     return feed
 
+
 SLOT_COLLISIONS = [
-    { "slot": "agency", "identifier": "agency_id", "references": [
-        ("routes", "agency_id"), ("fare_attributes", "agency_id")] },
-    { "slot": "stops", "identifier": "stop_id", "references": [
-        ("stops", "parent_station"), ("stop_times", "stop_id"),
-        ("transfers", "from_stop_id"), ("transfers", "to_stop_id"),
-        ("pathways", "from_stop_id"), ("pathways", "to_stop_id")] },
-    { "slot": "routes", "identifier": "route_id", "references": [
-        ("trips", "route_id"), ("fare_rules", "route_id"),
-        ("attributions", "route_id")] },
-    { "slot": "trips", "identifier": "trip_id", "references": [
-        ("stop_times", "trip_id"), ("frequencies", "trip_id"),
-        ("attributions", "trip_id")] },
-    { "slot": "calendar", "identifier": "service_id", "references": [
-        ("calendar_dates", "service_id"), ("trips", "service_id")] },
-    { "slot": "calendar_dates", "identifier": "service_id", "references": [
-        ("trips", "service_id"), ("calendar", "service_id")] },
-    { "slot": "fare_attributes", "identifier": "fare_id", "references": [
-        ("fare_rules", "fare_id")] },
-    { "slot": "shapes", "identifier": "shape_id", "references": [
-        ("trips", "shape_id")] },
-    { "slot": "pathways", "identifier": "pathway_id", "references": [] },
-    { "slot": "levels", "identifier": "level_id", "references": [
-        ("stops", "level_id")] },
-    { "slot": "attributions", "identifier": "attribution_id" },
+    {
+        "slot": "agency",
+        "identifier": "agency_id",
+        "references": [("routes", "agency_id"), ("fare_attributes", "agency_id")],
+    },
+    {
+        "slot": "stops",
+        "identifier": "stop_id",
+        "references": [
+            ("stops", "parent_station"),
+            ("stop_times", "stop_id"),
+            ("transfers", "from_stop_id"),
+            ("transfers", "to_stop_id"),
+            ("pathways", "from_stop_id"),
+            ("pathways", "to_stop_id"),
+        ],
+    },
+    {
+        "slot": "routes",
+        "identifier": "route_id",
+        "references": [
+            ("trips", "route_id"),
+            ("fare_rules", "route_id"),
+            ("attributions", "route_id"),
+        ],
+    },
+    {
+        "slot": "trips",
+        "identifier": "trip_id",
+        "references": [
+            ("stop_times", "trip_id"),
+            ("frequencies", "trip_id"),
+            ("attributions", "trip_id"),
+        ],
+    },
+    {
+        "slot": "calendar",
+        "identifier": "service_id",
+        "references": [("calendar_dates", "service_id"), ("trips", "service_id")],
+    },
+    {
+        "slot": "calendar_dates",
+        "identifier": "service_id",
+        "references": [("trips", "service_id"), ("calendar", "service_id")],
+    },
+    {
+        "slot": "fare_attributes",
+        "identifier": "fare_id",
+        "references": [("fare_rules", "fare_id")],
+    },
+    {"slot": "shapes", "identifier": "shape_id", "references": [("trips", "shape_id")]},
+    {"slot": "pathways", "identifier": "pathway_id", "references": []},
+    {"slot": "levels", "identifier": "level_id", "references": [("stops", "level_id")]},
+    {"slot": "attributions", "identifier": "attribution_id"},
 ]
 
+
 def copy_feed(feed):
-    return {
-        slot: feed[slot].copy() for slot in feed
-    }
+    return {slot: feed[slot].copy() for slot in feed}
+
 
 def merge_feeds(feeds):
     result = {}
@@ -271,7 +324,8 @@ def merge_feeds(feeds):
 
     return result
 
-def merge_two_feeds(first, second, suffix = "_merged"):
+
+def merge_two_feeds(first, second, suffix="_merged"):
     feed = {}
 
     print("Merging GTFS data ...")
@@ -284,35 +338,52 @@ def merge_two_feeds(first, second, suffix = "_merged"):
             df_first = first[collision["slot"]]
             df_second = second[collision["slot"]]
 
-            df_first[collision["identifier"]] = df_first[collision["identifier"]].astype(str)
-            df_second[collision["identifier"]] = df_second[collision["identifier"]].astype(str)
-
-            df_concat = pd.concat([df_first, df_second], sort = True).drop_duplicates()
-            duplicate_ids = list(df_concat[df_concat[collision["identifier"]].duplicated()][
-                collision["identifier"]].astype(str).unique())
+            df_first[collision["identifier"]] = df_first[
+                collision["identifier"]
+            ].astype(str)
+            df_second[collision["identifier"]] = df_second[
+                collision["identifier"]
+            ].astype(str)
+
+            df_concat = pd.concat([df_first, df_second], sort=True).drop_duplicates()
+            duplicate_ids = list(
+                df_concat[df_concat[collision["identifier"]].duplicated()][
+                    collision["identifier"]
+                ]
+                .astype(str)
+                .unique()
+            )
 
             if len(duplicate_ids) > 0:
-                print("   Found %d duplicate identifiers in %s" % (
-                    len(duplicate_ids), collision["slot"]))
+                print(
+                    "   Found %d duplicate identifiers in %s"
+                    % (len(duplicate_ids), collision["slot"])
+                )
 
                 replacement_ids = [str(id) + suffix for id in duplicate_ids]
 
-                df_second[collision["identifier"]] = df_second[collision["identifier"]].replace(
-                    duplicate_ids, replacement_ids
-                )
+                df_second[collision["identifier"]] = df_second[
+                    collision["identifier"]
+                ].replace(duplicate_ids, replacement_ids)
 
                 for ref_slot, ref_identifier in collision["references"]:
                     if ref_slot in first and ref_slot in second:
-                        first[ref_slot][ref_identifier] = first[ref_slot][ref_identifier].astype(str)
-                        second[ref_slot][ref_identifier] = second[ref_slot][ref_identifier].astype(str)
+                        first[ref_slot][ref_identifier] = first[ref_slot][
+                            ref_identifier
+                        ].astype(str)
+                        second[ref_slot][ref_identifier] = second[ref_slot][
+                            ref_identifier
+                        ].astype(str)
 
-                        second[ref_slot][ref_identifier] = second[ref_slot][ref_identifier].replace(
-                            duplicate_ids, replacement_ids
-                        )
+                        second[ref_slot][ref_identifier] = second[ref_slot][
+                            ref_identifier
+                        ].replace(duplicate_ids, replacement_ids)
 
     for slot in REQUIRED_SLOTS + OPTIONAL_SLOTS:
         if slot in first and slot in second:
-            feed[slot] = pd.concat([first[slot], second[slot]], sort = True).drop_duplicates()
+            feed[slot] = pd.concat(
+                [first[slot], second[slot]], sort=True
+            ).drop_duplicates()
         elif slot in first:
             feed[slot] = first[slot].copy()
         elif slot in second:
@@ -320,7 +391,8 @@ def merge_two_feeds(first, second, suffix = "_merged"):
 
     return feed
 
-def despace_stop_ids(feed, replacement = ":::"):
+
+def despace_stop_ids(feed, replacement=":::"):
     feed = copy_feed(feed)
 
     references = None
@@ -332,14 +404,20 @@ def despace_stop_ids(feed, replacement = ":::"):
     df_stops = feed["stops"]
     df_stops["stop_id"] = df_stops["stop_id"].astype(str)
 
-    search_ids = list(df_stops[df_stops["stop_id"].str.contains(" ")]["stop_id"].unique())
+    search_ids = list(
+        df_stops[df_stops["stop_id"].str.contains(" ")]["stop_id"].unique()
+    )
     replacement_ids = [item.replace(" ", replacement) for item in search_ids]
 
     df_stops["stop_id"] = df_stops["stop_id"].replace(search_ids, replacement_ids)
 
     for reference_slot, reference_field in references:
         if reference_slot in feed:
-            feed[reference_slot][reference_field] = feed[reference_slot][reference_field].astype(str).replace(search_ids, replacement_ids)
+            feed[reference_slot][reference_field] = (
+                feed[reference_slot][reference_field]
+                .astype(str)
+                .replace(search_ids, replacement_ids)
+            )
 
     print("De-spaced %d/%d stops" % (len(search_ids), len(df_stops)))
 
diff --git a/data/hts/commute_distance.py b/data/hts/commute_distance.py
index 2a83893d..249201ef 100644
--- a/data/hts/commute_distance.py
+++ b/data/hts/commute_distance.py
@@ -1,25 +1,37 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
     context.config("random_seed")
     context.stage("data.hts.selected")
 
+
 def get_commuting_distance(df_persons, df_trips, activity_type, random):
     if "euclidean_distance" in df_trips:
         distance_slot = "euclidean_distance"
         distance_factor = 1.0
     else:
         distance_slot = "routed_distance"
-        distance_factor = 1.0 # / 1.3
+        distance_factor = 1.0  # / 1.3
 
     # Add commuting distances
-    df_commute_distance = df_trips[
-        ((df_trips["preceding_purpose"] == "home") & (df_trips["following_purpose"] == activity_type)) |
-        ((df_trips["preceding_purpose"] == activity_type) & (df_trips["following_purpose"] == "home"))
-    ].drop_duplicates("person_id", keep = "first")[["person_id", distance_slot]].rename(columns = { distance_slot: "commute_distance" })
+    df_commute_distance = (
+        df_trips[
+            (
+                (df_trips["preceding_purpose"] == "home")
+                & (df_trips["following_purpose"] == activity_type)
+            )
+            | (
+                (df_trips["preceding_purpose"] == activity_type)
+                & (df_trips["following_purpose"] == "home")
+            )
+        ]
+        .drop_duplicates("person_id", keep="first")[["person_id", distance_slot]]
+        .rename(columns={distance_slot: "commute_distance"})
+    )
 
-    df_persons = pd.merge(df_persons, df_commute_distance, on = "person_id", how = "left")
+    df_persons = pd.merge(df_persons, df_commute_distance, on="person_id", how="left")
 
     # For the ones without commuting distance, sample from the distribution
     f_missing = df_persons["commute_distance"].isna()
@@ -39,7 +51,7 @@ def get_commuting_distance(df_persons, df_trips, activity_type, random):
 
     indices = [
         np.searchsorted(cdf, r)
-        for r in random.random_sample(size = np.count_nonzero(f_missing))
+        for r in random.random_sample(size=np.count_nonzero(f_missing))
     ]
 
     df_persons.loc[f_missing, "commute_distance"] = values[indices]
@@ -50,17 +62,19 @@ def get_commuting_distance(df_persons, df_trips, activity_type, random):
     # Attach euclidean factor
     df_persons["commute_distance"] *= distance_factor
 
-    print("Missing %s commute distances: %.2f%%" % (
-        activity_type, 100 * np.count_nonzero(f_missing) / len(f_missing)
-    ))
+    print(
+        "Missing %s commute distances: %.2f%%"
+        % (activity_type, 100 * np.count_nonzero(f_missing) / len(f_missing))
+    )
 
     return df_persons
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.selected")
     random = np.random.RandomState(context.config("random_seed"))
 
     return dict(
-        work = get_commuting_distance(df_persons, df_trips, "work", random),
-        education = get_commuting_distance(df_persons, df_trips, "education", random)
+        work=get_commuting_distance(df_persons, df_trips, "work", random),
+        education=get_commuting_distance(df_persons, df_trips, "education", random),
     )
diff --git a/data/hts/comparison.py b/data/hts/comparison.py
index 3b59979a..891d4c41 100644
--- a/data/hts/comparison.py
+++ b/data/hts/comparison.py
@@ -7,28 +7,34 @@
 Comparison of various attributes between EGT, ENTD and census.
 """
 
+
 def configure(context):
     context.stage("data.hts.egt.filtered")
     context.stage("data.hts.entd.filtered")
     context.stage("data.census.filtered")
 
+
 def combine(htss):
     households, persons, trips = [], [], []
 
     for name, (df_hts_households, df_hts_persons, df_hts_trips) in htss.items():
-        df_hts_households = pd.DataFrame(df_hts_households, copy = True)
-        df_hts_persons = pd.DataFrame(df_hts_persons, copy = True)
-        df_hts_trips = pd.DataFrame(df_hts_trips, copy = True)
+        df_hts_households = pd.DataFrame(df_hts_households, copy=True)
+        df_hts_persons = pd.DataFrame(df_hts_persons, copy=True)
+        df_hts_trips = pd.DataFrame(df_hts_trips, copy=True)
 
         df_hts_households["hts"] = name
         df_hts_persons["hts"] = name
         df_hts_trips["hts"] = name
 
         if "routed_distance" in df_hts_trips:
-            df_hts_trips = df_hts_trips.rename(columns = { "routed_distance": "hts_distance" })
+            df_hts_trips = df_hts_trips.rename(
+                columns={"routed_distance": "hts_distance"}
+            )
             df_hts_trips["distance_type"] = "routed"
         elif "euclidean_distance" in df_hts_trips:
-            df_hts_trips = df_hts_trips.rename(columns = { "euclidean_distance": "hts_distance" })
+            df_hts_trips = df_hts_trips.rename(
+                columns={"euclidean_distance": "hts_distance"}
+            )
             df_hts_trips["distance_type"] = "euclidean"
         else:
             raise RuntimeError("No distance slot available")
@@ -39,11 +45,12 @@ def combine(htss):
 
     return pd.concat(households), pd.concat(persons), pd.concat(trips)
 
+
 def execute(context):
     egt = context.stage("data.hts.egt.filtered")
     entd = context.stage("data.hts.entd.filtered")
 
-    htss = dict(egt = egt, entd = entd)
+    htss = dict(egt=egt, entd=entd)
     names = sorted(list(htss.keys()))
 
     # Make data set of all HTS
@@ -61,41 +68,80 @@ def execute(context):
             "number_of_households": np.count_nonzero(f_hts_households),
             "number_of_persons": np.count_nonzero(f_hts_persons),
             "number_of_trips": np.count_nonzero(f_hts_trips),
-            "weighted_number_of_households": df_households[f_hts_households]["household_weight"].sum(),
-            "weighted_number_of_persons": df_persons[f_hts_persons]["person_weight"].sum(),
+            "weighted_number_of_households": df_households[f_hts_households][
+                "household_weight"
+            ].sum(),
+            "weighted_number_of_persons": df_persons[f_hts_persons][
+                "person_weight"
+            ].sum(),
             "weighted_number_of_trips": df_trips[f_hts_trips]["trip_weight"].sum(),
-            "weighted_number_of_trips_per_mobile_person": (df_persons[f_hts_persons & f_any_trips]["number_of_trips"] * df_persons[f_hts_persons & f_any_trips]["trip_weight"]).sum() / df_persons[f_hts_persons & f_any_trips]["trip_weight"].sum(),
-            "share_of_students": (df_persons[f_hts_persons]["studies"] * df_persons[f_hts_persons]["person_weight"]).sum() / df_persons[f_hts_persons]["person_weight"].sum(),
-            "share_of_employed": (df_persons[f_hts_persons]["employed"] * df_persons[f_hts_persons]["person_weight"]).sum() / df_persons[f_hts_persons]["person_weight"].sum(),
-            "number_of_activity_chains": len(df_trips[f_hts_trips]["person_id"].unique()),
-            "number_of_activity_chains": len(df_trips[f_hts_trips]["person_id"].unique()),
+            "weighted_number_of_trips_per_mobile_person": (
+                df_persons[f_hts_persons & f_any_trips]["number_of_trips"]
+                * df_persons[f_hts_persons & f_any_trips]["trip_weight"]
+            ).sum()
+            / df_persons[f_hts_persons & f_any_trips]["trip_weight"].sum(),
+            "share_of_students": (
+                df_persons[f_hts_persons]["studies"]
+                * df_persons[f_hts_persons]["person_weight"]
+            ).sum()
+            / df_persons[f_hts_persons]["person_weight"].sum(),
+            "share_of_employed": (
+                df_persons[f_hts_persons]["employed"]
+                * df_persons[f_hts_persons]["person_weight"]
+            ).sum()
+            / df_persons[f_hts_persons]["person_weight"].sum(),
+            "number_of_activity_chains": len(
+                df_trips[f_hts_trips]["person_id"].unique()
+            ),
+            "number_of_activity_chains": len(
+                df_trips[f_hts_trips]["person_id"].unique()
+            ),
         }
 
     # Trip distance distribution
-    df_trips["distance_class"] = np.digitize(df_trips["hts_distance"], np.arange(1, 10) * 1000)
-    df_distance = df_trips.groupby(["hts", "distance_class"])["trip_weight"].sum().reset_index(name = "trip_weight")
+    df_trips["distance_class"] = np.digitize(
+        df_trips["hts_distance"], np.arange(1, 10) * 1000
+    )
+    df_distance = (
+        df_trips.groupby(["hts", "distance_class"])["trip_weight"]
+        .sum()
+        .reset_index(name="trip_weight")
+    )
 
     # Age distribution
     AGE_BOUNDS = [14, 29, 44, 59, 74, 1000]
 
-    df_persons["age_class"] = np.digitize(df_persons["age"], AGE_BOUNDS, right = True)
-    df_age = df_persons.groupby(["hts", "age_class"])["person_weight"].sum().reset_index(name = "person_weight")
-
-    df_census = pd.DataFrame(context.stage("data.census.filtered")[["age", "studies", "weight", "employed"]], copy = True)
+    df_persons["age_class"] = np.digitize(df_persons["age"], AGE_BOUNDS, right=True)
+    df_age = (
+        df_persons.groupby(["hts", "age_class"])["person_weight"]
+        .sum()
+        .reset_index(name="person_weight")
+    )
+
+    df_census = pd.DataFrame(
+        context.stage("data.census.filtered")[["age", "studies", "weight", "employed"]],
+        copy=True,
+    )
     df_census["hts"] = "census"
-    df_census["age_class"] = np.digitize(df_census["age"], AGE_BOUNDS, right = True)
-    df_age_census = df_census.groupby(["hts", "age_class"])["weight"].sum().reset_index(name = "person_weight")
+    df_census["age_class"] = np.digitize(df_census["age"], AGE_BOUNDS, right=True)
+    df_age_census = (
+        df_census.groupby(["hts", "age_class"])["weight"]
+        .sum()
+        .reset_index(name="person_weight")
+    )
 
     df_age = pd.concat([df_age, df_age_census])
 
     # Add student and employment share for census
     info["census"] = {
-        "share_of_students": (df_census["studies"] * df_census["weight"]).sum() / df_census["weight"].sum(),
-        "share_of_employed": (df_census["employed"] * df_census["weight"]).sum() / df_census["weight"].sum()
+        "share_of_students": (df_census["studies"] * df_census["weight"]).sum()
+        / df_census["weight"].sum(),
+        "share_of_employed": (df_census["employed"] * df_census["weight"]).sum()
+        / df_census["weight"].sum(),
     }
 
     return {
         "info": info,
         "distance_distribution": df_distance,
-        "age_distribution": df_age
+        "age_distribution": df_age,
     }
diff --git a/data/hts/edgt_44/cleaned.py b/data/hts/edgt_44/cleaned.py
index 1fa9b526..f9a95888 100644
--- a/data/hts/edgt_44/cleaned.py
+++ b/data/hts/edgt_44/cleaned.py
@@ -6,26 +6,54 @@
 This stage cleans the Loire Atlantique EDGT.
 """
 
+
 def configure(context):
     context.stage("data.hts.edgt_44.raw")
 
+
 PURPOSE_MAP = {
     "home": [1, 2],
     "work": [11, 12, 13, 81],
     "education": [21, 22, 23, 24, 25, 26, 27, 28, 29],
     "shop": [30, 31, 32, 33, 34, 35, 82],
     "leisure": [51, 52, 53, 54],
-    "other": [41, 42, 43, 44, 45, 61, 62, 63, 64, 71, 72, 73, 74, 91]
+    "other": [41, 42, 43, 44, 45, 61, 62, 63, 64, 71, 72, 73, 74, 91],
 }
 
 MODES_MAP = {
     "car": [13, 15, 21, 81],
     "car_passenger": [14, 16, 22, 82],
-    "pt": [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 51, 52, 53, 61, 71, 72, 73, 91, 92, 94, 95],
+    "pt": [
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        51,
+        52,
+        53,
+        61,
+        71,
+        72,
+        73,
+        91,
+        92,
+        94,
+        95,
+    ],
     "bike": [11, 17, 12, 18, 93, 19],
-    "walk": [1, 2] # Actually, 2 is not really explained, but we assume it is walk
+    "walk": [1, 2],  # Actually, 2 is not really explained, but we assume it is walk
 }
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.edgt_44.raw")
 
@@ -33,9 +61,13 @@ def execute(context):
     df_households["departement_id"] = "44"
 
     # Transform original IDs to integer (they are hierarchichal)
-    df_households["edgt_household_id"] = (df_households["ECH"] + df_households["MTIR"]).astype(int)
+    df_households["edgt_household_id"] = (
+        df_households["ECH"] + df_households["MTIR"]
+    ).astype(int)
     df_persons["edgt_person_id"] = df_persons["PER"].astype(int)
-    df_persons["edgt_household_id"] = (df_persons["ECH"] + df_persons["PTIR"]).astype(int)
+    df_persons["edgt_household_id"] = (df_persons["ECH"] + df_persons["PTIR"]).astype(
+        int
+    )
     df_trips["edgt_person_id"] = df_trips["PER"].astype(int)
     df_trips["edgt_household_id"] = (df_trips["ECH"] + df_trips["DTIR"]).astype(int)
     df_trips["edgt_trip_id"] = df_trips["NDEP"].astype(int)
@@ -44,15 +76,19 @@ def execute(context):
     df_households["household_id"] = np.arange(len(df_households))
 
     df_persons = pd.merge(
-        df_persons, df_households[["edgt_household_id", "household_id", "departement_id"]],
-        on = ["edgt_household_id"]
-    ).sort_values(by = ["household_id", "edgt_person_id"])
+        df_persons,
+        df_households[["edgt_household_id", "household_id", "departement_id"]],
+        on=["edgt_household_id"],
+    ).sort_values(by=["household_id", "edgt_person_id"])
     df_persons["person_id"] = np.arange(len(df_persons))
 
     df_trips = pd.merge(
-        df_trips, df_persons[["edgt_person_id", "edgt_household_id", "person_id", "household_id"]],
-        on = ["edgt_person_id", "edgt_household_id"]
-    ).sort_values(by = ["household_id", "person_id", "edgt_trip_id"])
+        df_trips,
+        df_persons[
+            ["edgt_person_id", "edgt_household_id", "person_id", "household_id"]
+        ],
+        on=["edgt_person_id", "edgt_household_id"],
+    ).sort_values(by=["household_id", "person_id", "edgt_trip_id"])
     df_trips["trip_id"] = np.arange(len(df_trips))
 
     # Trip flags
@@ -71,8 +107,10 @@ def execute(context):
     df_persons["sex"] = df_persons["sex"].astype("category")
 
     # Household size
-    df_size = df_persons.groupby("household_id").size().reset_index(name = "household_size")
-    df_households = pd.merge(df_households, df_size, on = "household_id")
+    df_size = (
+        df_persons.groupby("household_id").size().reset_index(name="household_size")
+    )
+    df_households = pd.merge(df_households, df_size, on="household_id")
 
     # Clean departement
     df_trips["origin_departement_id"] = "44"
@@ -80,8 +118,12 @@ def execute(context):
 
     df_households["departement_id"] = df_households["departement_id"].astype("category")
     df_persons["departement_id"] = df_persons["departement_id"].astype("category")
-    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype("category")
-    df_trips["destination_departement_id"] = df_trips["destination_departement_id"].astype("category")
+    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype(
+        "category"
+    )
+    df_trips["destination_departement_id"] = df_trips[
+        "destination_departement_id"
+    ].astype("category")
 
     # Clean employment
     df_persons["employed"] = df_persons["P7"].isin(["1", "2"])
@@ -91,7 +133,9 @@ def execute(context):
 
     # Number of vehicles
     df_households["number_of_vehicles"] = df_households["M6"] + df_households["M5"]
-    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(int)
+    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(
+        int
+    )
     df_households["number_of_bikes"] = df_households["M7"].astype(int)
 
     # License
@@ -100,7 +144,7 @@ def execute(context):
     # Has subscription (not availabile in EDGT 44)
     df_persons["has_pt_subscription"] = False
 
-    # Survey respondents 
+    # Survey respondents
     # PENQ 1 : fully awnsered the travel questionary section, having a chain or non-movers
     # PENQ 2 : nonrespondent of travel questionary section
     df_persons.loc[df_persons["PENQ"] == 1, "travel_respondent"] = True
@@ -138,13 +182,13 @@ def execute(context):
     df_trips["routed_distance"] = df_trips["DIST"]
 
     # Trip times
-    df_trips["departure_time"] = 3600.0 * df_trips["D4A"] # hour
-    df_trips["departure_time"] += 60.0 * df_trips["D4B"] # minute
+    df_trips["departure_time"] = 3600.0 * df_trips["D4A"]  # hour
+    df_trips["departure_time"] += 60.0 * df_trips["D4B"]  # minute
 
-    df_trips["arrival_time"] = 3600.0 * df_trips["D8A"] # hour
-    df_trips["arrival_time"] += 60.0 * df_trips["D8B"] # minute
+    df_trips["arrival_time"] = 3600.0 * df_trips["D8A"]  # hour
+    df_trips["arrival_time"] += 60.0 * df_trips["D8B"]  # minute
 
-    df_trips = df_trips.sort_values(by = ["household_id", "person_id", "trip_id"])
+    df_trips = df_trips.sort_values(by=["household_id", "person_id", "trip_id"])
     df_trips = hts.fix_trip_times(df_trips)
 
     # Durations
@@ -153,16 +197,25 @@ def execute(context):
 
     # Add weight to trips
     df_trips = pd.merge(
-        df_trips, df_persons[["person_id", "COEQ"]], on = "person_id", how = "left"
-    ).rename(columns = { "COEQ": "trip_weight" })
+        df_trips, df_persons[["person_id", "COEQ"]], on="person_id", how="left"
+    ).rename(columns={"COEQ": "trip_weight"})
     df_persons["trip_weight"] = df_persons["COEQ"]
 
     # Chain length
-    df_count = df_trips[["person_id"]].groupby("person_id").size().reset_index(name = "number_of_trips")
+    df_count = (
+        df_trips[["person_id"]]
+        .groupby("person_id")
+        .size()
+        .reset_index(name="number_of_trips")
+    )
     # People with at least one trip (number_of_trips > 0)
-    df_persons = pd.merge(df_persons, df_count, on = "person_id", how = "left")
+    df_persons = pd.merge(df_persons, df_count, on="person_id", how="left")
     # People that awnsered the travel questionary section but stayed at home (number_of_trips = 0)
-    df_persons.loc[(df_persons["travel_respondent"] == True) & (df_persons["number_of_trips"].isna()), "number_of_trips"]  = 0
+    df_persons.loc[
+        (df_persons["travel_respondent"] == True)
+        & (df_persons["number_of_trips"].isna()),
+        "number_of_trips",
+    ] = 0
     # Nonrespondent of travel questionary section (number_of_trips = -1)
     df_persons["number_of_trips"] = df_persons["number_of_trips"].fillna(-1).astype(int)
 
@@ -173,11 +226,15 @@ def execute(context):
 
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
-    df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
+    df_households = pd.merge(
+        df_households, hts.calculate_consumption_units(df_persons), on="household_id"
+    )
 
     # Socioprofessional class
     df_persons["socioprofessional_class"] = df_persons["P9"].fillna(8).astype(int)
-    df_persons.loc[df_persons["socioprofessional_class"] > 6, "socioprofessional_class"] = 8
+    df_persons.loc[
+        df_persons["socioprofessional_class"] > 6, "socioprofessional_class"
+    ] = 8
     df_persons.loc[df_persons["P7"] == "7", "socioprofessional_class"] = 7
 
     # Check departure and arrival times
diff --git a/data/hts/edgt_44/filtered.py b/data/hts/edgt_44/filtered.py
index df52ab89..cebf650a 100644
--- a/data/hts/edgt_44/filtered.py
+++ b/data/hts/edgt_44/filtered.py
@@ -5,17 +5,20 @@
 This stage filters out observations which live or work outside of the area.
 """
 
+
 def configure(context):
     context.stage("data.hts.edgt_44.cleaned")
     context.stage("data.spatial.codes")
-    
-    context.config("filter_hts",True)
+
+    context.config("filter_hts", True)
+
+
 def execute(context):
-    filter_edgt = context.config("filter_hts")     
+    filter_edgt = context.config("filter_hts")
     df_codes = context.stage("data.spatial.codes")
     df_households, df_persons, df_trips = context.stage("data.hts.edgt_44.cleaned")
 
-    if filter_edgt :
+    if filter_edgt:
         # Filter for non-residents
         requested_departments = df_codes["departement_id"].unique()
         f = df_persons["departement_id"].astype(str).isin(requested_departments)
@@ -24,15 +27,26 @@ def execute(context):
         # Filter for people going outside of the area
         remove_ids = set()
 
-        remove_ids |= set(df_trips[
-            ~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
-        ]["person_id"].unique())
+        remove_ids |= set(
+            df_trips[
+                ~df_trips["origin_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+                | ~df_trips["destination_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+            ]["person_id"].unique()
+        )
 
         df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
 
         # Only keep trips and households that still have a person
-        df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
-        df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
+        df_trips = df_trips[
+            df_trips["person_id"].isin(df_persons["person_id"].unique())
+        ]
+        df_households = df_households[
+            df_households["household_id"].isin(df_persons["household_id"])
+        ]
 
     # Finish up
     df_households = df_households[hts.HOUSEHOLD_COLUMNS]
diff --git a/data/hts/edgt_44/format.py b/data/hts/edgt_44/format.py
index 0dd224e1..402a5e59 100644
--- a/data/hts/edgt_44/format.py
+++ b/data/hts/edgt_44/format.py
@@ -24,12 +24,22 @@
     (50, 1, "M6", "NOMBRE DE DEUX OU TROIS ROUES MOTORISES A DISPOSITION"),
     (51, 2, "M7", "NOMBRE DE VELOS A DISPOSITION"),
     (53, 4, "MLA", "ANNEE INSTALLATION DANS LE LOGEMENT"),
-    (57, 2, "MLB1", "PREMIER CRITERE DE CHOIX DE LA RESIDENCE ACTUELLE (sans classement)"),
-    (59, 2, "MLB2", "DEUXIEME CRITERE DE CHOIX DE LA RESIDENCE ACTUELLE (sans classement)"),
+    (
+        57,
+        2,
+        "MLB1",
+        "PREMIER CRITERE DE CHOIX DE LA RESIDENCE ACTUELLE (sans classement)",
+    ),
+    (
+        59,
+        2,
+        "MLB2",
+        "DEUXIEME CRITERE DE CHOIX DE LA RESIDENCE ACTUELLE (sans classement)",
+    ),
     (61, 5, "MLC", "ANCIENNE COMMUNE DE RESIDENCE"),
     (66, 1, "MLD", "TYPE D'OCCUPATION DE L'ANCIEN LOGEMENT (le ménage était-il ?)"),
     (67, 8, "COEM", "COEFFICIENT DE REDRESSEMENT MENAGE"),
-    (75, 1, "MFIN", "FIN FICHIER MENAGE")
+    (75, 1, "MFIN", "FIN FICHIER MENAGE"),
 ]
 
 PERSON_FORMAT = [
@@ -50,15 +60,35 @@
     (24, 1, "P9", "PCS"),
     (25, 1, "P12", "TRAVAIL OU ETUDES A DOMICILE"),
     (26, 6, "P13A", "LIEU DE TRAVAIL OU D'ETUDES (OCCUPATION PRINCIPALE)"),
-    (32, 1, "P15", "DISPOSITION D'UNE VOITURE EN GÉNÉRAL (DÉPLACEMENTS DOMICILE TRAVAIL OU ÉTUDES)"),
-    (33, 1, "P17", "PROBLÈMES DE STATIONNEMENT EN GÉNÉRAL (SUR LIEU DE TRAVAIL OU D'ÉTUDES)"),
-    (34, 1, "P17A", "DIFFICULTÉS DE STATIONNEMENT SUR OU À PROXIMITÉ DE VOTRE LIEU DE TRAVAIL OU DE VOTRE LIEU D'ÉTUDES"),
+    (
+        32,
+        1,
+        "P15",
+        "DISPOSITION D'UNE VOITURE EN GÉNÉRAL (DÉPLACEMENTS DOMICILE TRAVAIL OU ÉTUDES)",
+    ),
+    (
+        33,
+        1,
+        "P17",
+        "PROBLÈMES DE STATIONNEMENT EN GÉNÉRAL (SUR LIEU DE TRAVAIL OU D'ÉTUDES)",
+    ),
+    (
+        34,
+        1,
+        "P17A",
+        "DIFFICULTÉS DE STATIONNEMENT SUR OU À PROXIMITÉ DE VOTRE LIEU DE TRAVAIL OU DE VOTRE LIEU D'ÉTUDES",
+    ),
     (35, 1, "P23A", "FRÉQUENCE D'UTILISATION EN SEMAINE : MARCHE A PIED"),
     (36, 1, "P20", "FRÉQUENCE D'UTILISATION EN SEMAINE : BICYCLETTE"),
     (37, 1, "P21", "FRÉQUENCE D'UTILISATION EN SEMAINE : 2 ROUES À MOTEUR CONDUCTEUR"),
     (38, 1, "P23", "FRÉQUENCE D'UTILISATION EN SEMAINE : VOITURE CONDUCTEUR"),
     (39, 1, "P24", "FRÉQUENCE D'UTILISATION EN SEMAINE : VOITURE PASSAGER"),
-    (40, 1, "P25", "FRÉQUENCE D'UTILISATION EN SEMAINE : RESEAUX DE TRANSPORT EN COMMUN (TRAM, BUS, CAR…)"),
+    (
+        40,
+        1,
+        "P25",
+        "FRÉQUENCE D'UTILISATION EN SEMAINE : RESEAUX DE TRANSPORT EN COMMUN (TRAM, BUS, CAR…)",
+    ),
     (41, 1, "P19", "SITUATION DE LA PERSONNE LA VEILLE"),
     (42, 1, "P19A", "SITUATION DES ACTIFS LA VEILLE"),
     (43, 1, "PL27", "FRÉQUENCE D'UTILISATION EN SEMAINE : TRAIN"),
@@ -67,7 +97,7 @@
     (46, 6, "DP13", "Distance DOMICILE-TRAVAIL"),
     (52, 8, "COEP", "COEFFICIENT DE REDRESSEMENT TOUTES PERSONNES"),
     (60, 8, "COEQ", "COEFFICIENT DE REDRESSEMENT PERSONNES ENQUETEES"),
-    (68, 1, "PFIN", "FIN FICHIER PERSONNE")
+    (68, 1, "PFIN", "FIN FICHIER PERSONNE"),
 ]
 
 TRIP_FORMAT = [
@@ -95,5 +125,5 @@
     (54, 8, "DOIB", "DISTANCE VOL OISEAU (en mètres)"),
     (62, 8, "DIST", "DiSTANCE PARCOURUE (en mètres)"),
     (70, 8, "DISP", "DiSTANCE PARCOURUE dans périmètre (en mètres)"),
-    (78, 1, "DFIN", "FIN FICHIER DEPLACEMENT")
+    (78, 1, "DFIN", "FIN FICHIER DEPLACEMENT"),
 ]
diff --git a/data/hts/edgt_44/raw.py b/data/hts/edgt_44/raw.py
index cb58bd27..5a55d8cb 100644
--- a/data/hts/edgt_44/raw.py
+++ b/data/hts/edgt_44/raw.py
@@ -10,35 +10,66 @@
 Adapted from the first implementation by Valentin Le Besond (IFSTTAR Nantes)
 """
 
+
 def configure(context):
     context.config("data_path")
 
+
 from .format import HOUSEHOLD_FORMAT, PERSON_FORMAT, TRIP_FORMAT
 
 HOUSEHOLD_COLUMNS = {
-    "MP2": str, "MTIR": str, "ECH": str, "COEM": float,
-    "M6": int, "M7": int, "M5": int
+    "MP2": str,
+    "MTIR": str,
+    "ECH": str,
+    "COEM": float,
+    "M6": int,
+    "M7": int,
+    "M5": int,
 }
 
 PERSON_COLUMNS = {
-    "ECH": str, "PTIR": str, "PER": int, "PP2": str, "PENQ": int,
-    "P3": int, "P2": int, "P4": int,
-    "P7": str, "P12": str,
-    "P9": str, "P5": str,
-    "COEP": float, "COEQ": float, "P1": int
+    "ECH": str,
+    "PTIR": str,
+    "PER": int,
+    "PP2": str,
+    "PENQ": int,
+    "P3": int,
+    "P2": int,
+    "P4": int,
+    "P7": str,
+    "P12": str,
+    "P9": str,
+    "P5": str,
+    "COEP": float,
+    "COEQ": float,
+    "P1": int,
 }
 
 TRIP_COLUMNS = {
-    "ECH": str, "DTIR": str, "PER": int, "NDEP": int, "DP2": str,
-    "D2A": int, "D5A": int, "D3": str, "D4A": int, "D4B": int,
-    "D7": str, "D8A": int, "D8B": int,
-    "D8C": int, "MODP": int, "DOIB": int, "DIST": int
+    "ECH": str,
+    "DTIR": str,
+    "PER": int,
+    "NDEP": int,
+    "DP2": str,
+    "D2A": int,
+    "D5A": int,
+    "D3": str,
+    "D4A": int,
+    "D4B": int,
+    "D7": str,
+    "D8A": int,
+    "D8B": int,
+    "D8C": int,
+    "MODP": int,
+    "DOIB": int,
+    "DIST": int,
 }
 
+
 def execute(context):
     # Load households
     df_household_dictionary = pd.DataFrame.from_records(
-        HOUSEHOLD_FORMAT, columns = ["position", "size", "variable", "description"]
+        HOUSEHOLD_FORMAT, columns=["position", "size", "variable", "description"]
     )
 
     column_widths = df_household_dictionary["size"].values
@@ -46,13 +77,17 @@ def execute(context):
 
     df_households = pd.read_fwf(
         "%s/edgt_44_2015/02a_EDGT_44_MENAGE_FAF_TEL_2015-08-07_modifZF.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(HOUSEHOLD_COLUMNS.keys()), dtype = HOUSEHOLD_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(HOUSEHOLD_COLUMNS.keys()),
+        dtype=HOUSEHOLD_COLUMNS,
     )
 
     # Load persons
     df_person_dictionary = pd.DataFrame.from_records(
-        PERSON_FORMAT, columns = ["position", "size", "variable", "description"]
+        PERSON_FORMAT, columns=["position", "size", "variable", "description"]
     )
 
     column_widths = df_person_dictionary["size"].values
@@ -60,13 +95,17 @@ def execute(context):
 
     df_persons = pd.read_fwf(
         "%s/edgt_44_2015/02b_EDGT_44_PERSO_FAF_TEL_ModifPCS_2016-04-14.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(PERSON_COLUMNS.keys()), dtype = PERSON_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(PERSON_COLUMNS.keys()),
+        dtype=PERSON_COLUMNS,
     )
 
     # Load trips
     df_trip_dictionary = pd.DataFrame.from_records(
-        TRIP_FORMAT, columns = ["position", "size", "variable", "description"]
+        TRIP_FORMAT, columns=["position", "size", "variable", "description"]
     )
 
     column_widths = df_trip_dictionary["size"].values
@@ -74,21 +113,29 @@ def execute(context):
 
     df_trips = pd.read_fwf(
         "%s/edgt_44_2015/02c_EDGT_44_DEPLA_FAF_TEL_DIST_2015-11-10.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(TRIP_COLUMNS.keys()), dtype = TRIP_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(TRIP_COLUMNS.keys()),
+        dtype=TRIP_COLUMNS,
     )
 
     return df_households, df_persons, df_trips
 
+
 FILES = [
     "02a_EDGT_44_MENAGE_FAF_TEL_2015-08-07_modifZF.txt",
     "02b_EDGT_44_PERSO_FAF_TEL_ModifPCS_2016-04-14.txt",
     "02c_EDGT_44_DEPLA_FAF_TEL_DIST_2015-11-10.txt",
 ]
 
+
 def validate(context):
     for name in FILES:
-        if not os.path.exists("%s/edgt_44_2015/%s" % (context.config("data_path"), name)):
+        if not os.path.exists(
+            "%s/edgt_44_2015/%s" % (context.config("data_path"), name)
+        ):
             raise RuntimeError("File missing from EDGT: %s" % name)
 
     return [
diff --git a/data/hts/edgt_44/reweighted.py b/data/hts/edgt_44/reweighted.py
index 1bbcbd4d..647ccdef 100644
--- a/data/hts/edgt_44/reweighted.py
+++ b/data/hts/edgt_44/reweighted.py
@@ -1,8 +1,10 @@
 import numpy as np
 
+
 def configure(context):
     context.stage("data.hts.edgt_44.filtered")
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.edgt_44.filtered")
 
diff --git a/data/hts/edgt_lyon/cleaned_adisp.py b/data/hts/edgt_lyon/cleaned_adisp.py
index eed34608..905df318 100644
--- a/data/hts/edgt_lyon/cleaned_adisp.py
+++ b/data/hts/edgt_lyon/cleaned_adisp.py
@@ -7,43 +7,76 @@
 This stage cleans the Lyon EDGT.
 """
 
+
 def configure(context):
     context.stage("data.hts.edgt_lyon.raw_adisp")
 
+
 PURPOSE_MAP = {
     "home": [1, 2],
     "work": [11, 12, 13, 14, 81],
     "education": [21, 22, 23, 24, 25, 26, 27, 28, 29, 96, 97],
     "shop": [30, 31, 32, 33, 34, 35, 82, 98],
     "leisure": [51, 52, 53, 54],
-    "other": [41, 42, 43, 61, 62, 63, 64, 71, 72, 73, 74, 91]
+    "other": [41, 42, 43, 61, 62, 63, 64, 71, 72, 73, 74, 91],
 }
 
 MODES_MAP = {
-    "car": [10, 13, 15, 21, 81], # 10 is (driving) an ambulance
+    "car": [10, 13, 15, 21, 81],  # 10 is (driving) an ambulance
     "car_passenger": [14, 16, 22, 82],
-    "pt": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 51, 52, 53, 61, 71, 91, 92, 94, 95],
+    "pt": [
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        51,
+        52,
+        53,
+        61,
+        71,
+        91,
+        92,
+        94,
+        95,
+    ],
     "bike": [11, 17, 12, 18, 93],
-    "walk": [1, 2] # Actually, 2 is not really explained, but we assume it is walk
+    "walk": [1, 2],  # Actually, 2 is not really explained, but we assume it is walk
 }
 
+
 def execute(context):
-    df_households, df_persons, df_trips, df_spatial = context.stage("data.hts.edgt_lyon.raw_adisp")
+    df_households, df_persons, df_trips, df_spatial = context.stage(
+        "data.hts.edgt_lyon.raw_adisp"
+    )
 
     # Merge departement into households
     df_spatial = df_spatial[["ZF__2015", "DepCom"]].copy()
-    df_spatial["ZFM"] = df_spatial["ZF__2015"].astype(str).str.pad(width=8, side='left', fillchar='0')
+    df_spatial["ZFM"] = (
+        df_spatial["ZF__2015"].astype(str).str.pad(width=8, side="left", fillchar="0")
+    )
     df_spatial["departement_id"] = df_spatial["DepCom"].str[:2]
     df_spatial = df_spatial[["ZFM", "departement_id"]]
 
     # Attention, some households get lost here!
-    df_households = pd.merge(df_households, df_spatial, on = "ZFM", how = "left")
+    df_households = pd.merge(df_households, df_spatial, on="ZFM", how="left")
     df_households["departement_id"] = df_households["departement_id"].fillna("unknown")
 
     # Transform original IDs to integer (they are hierarchichal)
-    df_households["edgt_household_id"] = (df_households["ZFM"] + df_households["ECH"]).astype(int)
+    df_households["edgt_household_id"] = (
+        df_households["ZFM"] + df_households["ECH"]
+    ).astype(int)
     df_persons["edgt_person_id"] = df_persons["PER"].astype(int)
-    df_persons["edgt_household_id"] = (df_persons["ZFP"] + df_persons["ECH"]).astype(int)
+    df_persons["edgt_household_id"] = (df_persons["ZFP"] + df_persons["ECH"]).astype(
+        int
+    )
     df_trips["edgt_person_id"] = df_trips["PER"].astype(int)
     df_trips["edgt_household_id"] = (df_trips["ZFD"] + df_trips["ECH"]).astype(int)
     df_trips["edgt_trip_id"] = df_trips["NDEP"].astype(int)
@@ -52,15 +85,19 @@ def execute(context):
     df_households["household_id"] = np.arange(len(df_households))
 
     df_persons = pd.merge(
-        df_persons, df_households[["edgt_household_id", "household_id", "departement_id"]],
-        on = ["edgt_household_id"]
-    ).sort_values(by = ["household_id", "edgt_person_id"])
+        df_persons,
+        df_households[["edgt_household_id", "household_id", "departement_id"]],
+        on=["edgt_household_id"],
+    ).sort_values(by=["household_id", "edgt_person_id"])
     df_persons["person_id"] = np.arange(len(df_persons))
 
     df_trips = pd.merge(
-        df_trips, df_persons[["edgt_person_id", "edgt_household_id", "person_id", "household_id"]],
-        on = ["edgt_person_id", "edgt_household_id"]
-    ).sort_values(by = ["household_id", "person_id", "edgt_trip_id"])
+        df_trips,
+        df_persons[
+            ["edgt_person_id", "edgt_household_id", "person_id", "household_id"]
+        ],
+        on=["edgt_person_id", "edgt_household_id"],
+    ).sort_values(by=["household_id", "person_id", "edgt_trip_id"])
     df_trips["trip_id"] = np.arange(len(df_trips))
 
     # Trip flags
@@ -79,25 +116,45 @@ def execute(context):
     df_persons["sex"] = df_persons["sex"].astype("category")
 
     # Household size
-    df_size = df_persons.groupby("household_id").size().reset_index(name = "household_size")
-    df_households = pd.merge(df_households, df_size, on = "household_id")
+    df_size = (
+        df_persons.groupby("household_id").size().reset_index(name="household_size")
+    )
+    df_households = pd.merge(df_households, df_size, on="household_id")
 
     # Clean departement
-    df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
-        "ZFM": "D3", "departement_id": "origin_departement_id"
-    }), on = "D3", how = "left")
+    df_trips = pd.merge(
+        df_trips,
+        df_spatial.rename(
+            columns={"ZFM": "D3", "departement_id": "origin_departement_id"}
+        ),
+        on="D3",
+        how="left",
+    )
 
-    df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
-        "ZFM": "D7", "departement_id": "destination_departement_id"
-    }), on = "D7", how = "left")
+    df_trips = pd.merge(
+        df_trips,
+        df_spatial.rename(
+            columns={"ZFM": "D7", "departement_id": "destination_departement_id"}
+        ),
+        on="D7",
+        how="left",
+    )
 
-    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].fillna("unknown")
-    df_trips["destination_departement_id"] = df_trips["destination_departement_id"].fillna("unknown")
+    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].fillna(
+        "unknown"
+    )
+    df_trips["destination_departement_id"] = df_trips[
+        "destination_departement_id"
+    ].fillna("unknown")
 
     df_households["departement_id"] = df_households["departement_id"].astype("category")
     df_persons["departement_id"] = df_persons["departement_id"].astype("category")
-    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype("category")
-    df_trips["destination_departement_id"] = df_trips["destination_departement_id"].astype("category")
+    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype(
+        "category"
+    )
+    df_trips["destination_departement_id"] = df_trips[
+        "destination_departement_id"
+    ].astype("category")
 
     # Clean employment
     df_persons["employed"] = df_persons["P9"].isin(["1", "2"])
@@ -107,16 +164,20 @@ def execute(context):
 
     # Number of vehicles
     df_households["number_of_vehicles"] = df_households["M6"] + df_households["M14"]
-    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(int)
+    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(
+        int
+    )
     df_households["number_of_bikes"] = df_households["M21"].astype(int)
 
     # License
     df_persons["has_license"] = df_persons["P7"] == "1"
 
     # Has subscription
-    df_persons["has_pt_subscription"] = df_persons["P12"].isin(["1", "2", "3", "5", "6"])
+    df_persons["has_pt_subscription"] = df_persons["P12"].isin(
+        ["1", "2", "3", "5", "6"]
+    )
 
-    # Survey respondents 
+    # Survey respondents
     # PENQ 1 : fully awnsered the travel questionary section, having a chain or non-movers
     # PENQ 2 : nonrespondent of travel questionary section
     df_persons["PENQ"] = df_persons["PENQ"].fillna("2").astype("int")
@@ -151,13 +212,13 @@ def execute(context):
     df_trips["routed_distance"] = df_trips["D12"]
 
     # Trip times
-    df_trips["departure_time"] = 3600.0 * (df_trips["D4"] // 100) # hour
-    df_trips["departure_time"] += 60.0 * (df_trips["D4"] % 100) # minute
+    df_trips["departure_time"] = 3600.0 * (df_trips["D4"] // 100)  # hour
+    df_trips["departure_time"] += 60.0 * (df_trips["D4"] % 100)  # minute
 
-    df_trips["arrival_time"] = 3600.0 * (df_trips["D8"] // 100) # hour
-    df_trips["arrival_time"] += 60.0 * (df_trips["D8"] % 100) # minute
+    df_trips["arrival_time"] = 3600.0 * (df_trips["D8"] // 100)  # hour
+    df_trips["arrival_time"] += 60.0 * (df_trips["D8"] % 100)  # minute
 
-    df_trips = df_trips.sort_values(by = ["household_id", "person_id", "trip_id"])
+    df_trips = df_trips.sort_values(by=["household_id", "person_id", "trip_id"])
     df_trips = hts.fix_trip_times(df_trips)
 
     # Durations
@@ -166,16 +227,25 @@ def execute(context):
 
     # Add weight to trips
     df_trips = pd.merge(
-        df_trips, df_persons[["person_id", "COE1"]], on = "person_id", how = "left"
-    ).rename(columns = { "COE1": "trip_weight" })
+        df_trips, df_persons[["person_id", "COE1"]], on="person_id", how="left"
+    ).rename(columns={"COE1": "trip_weight"})
     df_persons["trip_weight"] = df_persons["COE1"]
 
     # Chain length
-    df_count = df_trips[["person_id"]].groupby("person_id").size().reset_index(name = "number_of_trips")
+    df_count = (
+        df_trips[["person_id"]]
+        .groupby("person_id")
+        .size()
+        .reset_index(name="number_of_trips")
+    )
     # People with at least one trip (number_of_trips > 0)
-    df_persons = pd.merge(df_persons, df_count, on = "person_id", how = "left")
+    df_persons = pd.merge(df_persons, df_count, on="person_id", how="left")
     # People that answered the travel questionary section but stayed at home (number_of_trips = 0)
-    df_persons.loc[(df_persons["travel_respondent"] == True) & (df_persons["number_of_trips"].isna()), "number_of_trips"] = 0
+    df_persons.loc[
+        (df_persons["travel_respondent"] == True)
+        & (df_persons["number_of_trips"].isna()),
+        "number_of_trips",
+    ] = 0
     # Nonrespondent of travel questionary section (number_of_trips = -1)
     df_persons["number_of_trips"] = df_persons["number_of_trips"].fillna(-1).astype(int)
 
@@ -186,7 +256,9 @@ def execute(context):
 
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
-    df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
+    df_households = pd.merge(
+        df_households, hts.calculate_consumption_units(df_persons), on="household_id"
+    )
 
     # Socioprofessional class
     df_persons["socioprofessional_class"] = df_persons["PCSC"].fillna(8).astype(int)
diff --git a/data/hts/edgt_lyon/cleaned_cerema.py b/data/hts/edgt_lyon/cleaned_cerema.py
index d452820b..850a53ac 100644
--- a/data/hts/edgt_lyon/cleaned_cerema.py
+++ b/data/hts/edgt_lyon/cleaned_cerema.py
@@ -6,28 +6,55 @@
 This stage cleans the Lyon EDGT.
 """
 
+
 def configure(context):
     context.stage("data.hts.edgt_lyon.raw_cerema")
 
+
 PURPOSE_MAP = {
     "home": [1, 2],
     "work": [11, 12, 13, 81],
     "education": [21, 22, 23, 24, 25, 26, 27, 28, 29],
     "shop": [30, 31, 32, 33, 34, 35, 82],
     "leisure": [51, 52, 53, 54],
-    "other": [41, 42, 43, 61, 62, 63, 64, 71, 72, 73, 74, 91]
+    "other": [41, 42, 43, 61, 62, 63, 64, 71, 72, 73, 74, 91],
 }
 
 MODES_MAP = {
     "car": [13, 15, 21, 81],
     "car_passenger": [14, 16, 22, 82],
-    "pt": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 51, 52, 53, 61, 71, 91, 92, 94, 95],
+    "pt": [
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        51,
+        52,
+        53,
+        61,
+        71,
+        91,
+        92,
+        94,
+        95,
+    ],
     "bike": [11, 17, 12, 18, 93],
-    "walk": [1, 2] # Actually, 2 is not really explained, but we assume it is walk
+    "walk": [1, 2],  # Actually, 2 is not really explained, but we assume it is walk
 }
 
+
 def execute(context):
-    df_households, df_persons, df_trips, df_spatial = context.stage("data.hts.edgt_lyon.raw_cerema")
+    df_households, df_persons, df_trips, df_spatial = context.stage(
+        "data.hts.edgt_lyon.raw_cerema"
+    )
 
     # Merge departement into households
     df_spatial = df_spatial[["ZF__2015", "DepCom"]].copy()
@@ -36,13 +63,17 @@ def execute(context):
     df_spatial = df_spatial[["MP2", "departement_id"]]
 
     # Attention, some households get lost here!
-    df_households = pd.merge(df_households, df_spatial, on = "MP2", how = "left")
+    df_households = pd.merge(df_households, df_spatial, on="MP2", how="left")
     df_households["departement_id"] = df_households["departement_id"].fillna("unknown")
 
     # Transform original IDs to integer (they are hierarchichal)
-    df_households["edgt_household_id"] = (df_households["ECH"] + df_households["MP2"]).astype(int)
+    df_households["edgt_household_id"] = (
+        df_households["ECH"] + df_households["MP2"]
+    ).astype(int)
     df_persons["edgt_person_id"] = df_persons["PER"].astype(int)
-    df_persons["edgt_household_id"] = (df_persons["ECH"] + df_persons["PP2"]).astype(int)
+    df_persons["edgt_household_id"] = (df_persons["ECH"] + df_persons["PP2"]).astype(
+        int
+    )
     df_trips["edgt_person_id"] = df_trips["PER"].astype(int)
     df_trips["edgt_household_id"] = (df_trips["ECH"] + df_trips["DP2"]).astype(int)
     df_trips["edgt_trip_id"] = df_trips["NDEP"].astype(int)
@@ -51,15 +82,19 @@ def execute(context):
     df_households["household_id"] = np.arange(len(df_households))
 
     df_persons = pd.merge(
-        df_persons, df_households[["edgt_household_id", "household_id", "departement_id"]],
-        on = ["edgt_household_id"]
-    ).sort_values(by = ["household_id", "edgt_person_id"])
+        df_persons,
+        df_households[["edgt_household_id", "household_id", "departement_id"]],
+        on=["edgt_household_id"],
+    ).sort_values(by=["household_id", "edgt_person_id"])
     df_persons["person_id"] = np.arange(len(df_persons))
 
     df_trips = pd.merge(
-        df_trips, df_persons[["edgt_person_id", "edgt_household_id", "person_id", "household_id"]],
-        on = ["edgt_person_id", "edgt_household_id"]
-    ).sort_values(by = ["household_id", "person_id", "edgt_trip_id"])
+        df_trips,
+        df_persons[
+            ["edgt_person_id", "edgt_household_id", "person_id", "household_id"]
+        ],
+        on=["edgt_person_id", "edgt_household_id"],
+    ).sort_values(by=["household_id", "person_id", "edgt_trip_id"])
     df_trips["trip_id"] = np.arange(len(df_trips))
 
     # Trip flags
@@ -78,25 +113,45 @@ def execute(context):
     df_persons["sex"] = df_persons["sex"].astype("category")
 
     # Household size
-    df_size = df_persons.groupby("household_id").size().reset_index(name = "household_size")
-    df_households = pd.merge(df_households, df_size, on = "household_id")
+    df_size = (
+        df_persons.groupby("household_id").size().reset_index(name="household_size")
+    )
+    df_households = pd.merge(df_households, df_size, on="household_id")
 
     # Clean departement
-    df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
-        "MP2": "D3", "departement_id": "origin_departement_id"
-    }), on = "D3", how = "left")
+    df_trips = pd.merge(
+        df_trips,
+        df_spatial.rename(
+            columns={"MP2": "D3", "departement_id": "origin_departement_id"}
+        ),
+        on="D3",
+        how="left",
+    )
 
-    df_trips = pd.merge(df_trips, df_spatial.rename(columns = {
-        "MP2": "D7", "departement_id": "destination_departement_id"
-    }), on = "D7", how = "left")
+    df_trips = pd.merge(
+        df_trips,
+        df_spatial.rename(
+            columns={"MP2": "D7", "departement_id": "destination_departement_id"}
+        ),
+        on="D7",
+        how="left",
+    )
 
-    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].fillna("unknown")
-    df_trips["destination_departement_id"] = df_trips["destination_departement_id"].fillna("unknown")
+    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].fillna(
+        "unknown"
+    )
+    df_trips["destination_departement_id"] = df_trips[
+        "destination_departement_id"
+    ].fillna("unknown")
 
     df_households["departement_id"] = df_households["departement_id"].astype("category")
     df_persons["departement_id"] = df_persons["departement_id"].astype("category")
-    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype("category")
-    df_trips["destination_departement_id"] = df_trips["destination_departement_id"].astype("category")
+    df_trips["origin_departement_id"] = df_trips["origin_departement_id"].astype(
+        "category"
+    )
+    df_trips["destination_departement_id"] = df_trips[
+        "destination_departement_id"
+    ].astype("category")
 
     # Clean employment
     df_persons["employed"] = df_persons["P7"].isin(["1", "2"])
@@ -106,7 +161,9 @@ def execute(context):
 
     # Number of vehicles
     df_households["number_of_vehicles"] = df_households["M6"] + df_households["M5"]
-    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(int)
+    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(
+        int
+    )
     df_households["number_of_bikes"] = df_households["M7"].astype(int)
 
     # License
@@ -115,7 +172,7 @@ def execute(context):
     # Has subscription
     df_persons["has_pt_subscription"] = df_persons["P10"].isin(["1", "2", "3"])
 
-    # Survey respondents 
+    # Survey respondents
     # PENQ 1 : fully awnsered the travel questionary section, having a chain or non-movers
     # PENQ 2 : nonrespondent of travel questionary section
     df_persons["PENQ"] = df_persons["PENQ"].fillna("2").astype(int)
@@ -149,13 +206,13 @@ def execute(context):
     df_trips["routed_distance"] = df_trips["DIST"]
 
     # Trip times
-    df_trips["departure_time"] = 3600.0 * (df_trips["D4"] // 100) # hour
-    df_trips["departure_time"] += 60.0 * (df_trips["D4"] % 100) # minute
+    df_trips["departure_time"] = 3600.0 * (df_trips["D4"] // 100)  # hour
+    df_trips["departure_time"] += 60.0 * (df_trips["D4"] % 100)  # minute
 
-    df_trips["arrival_time"] = 3600.0 * (df_trips["D8"] // 100) # hour
-    df_trips["arrival_time"] += 60.0 * (df_trips["D8"] % 100) # minute
+    df_trips["arrival_time"] = 3600.0 * (df_trips["D8"] // 100)  # hour
+    df_trips["arrival_time"] += 60.0 * (df_trips["D8"] % 100)  # minute
 
-    df_trips = df_trips.sort_values(by = ["household_id", "person_id", "trip_id"])
+    df_trips = df_trips.sort_values(by=["household_id", "person_id", "trip_id"])
     df_trips = hts.fix_trip_times(df_trips)
 
     # Durations
@@ -164,18 +221,26 @@ def execute(context):
 
     # Add weight to trips
     df_trips = pd.merge(
-        df_trips, df_persons[["person_id", "COEQ"]], on = "person_id", how = "left"
-    ).rename(columns = { "COEQ": "trip_weight" })
+        df_trips, df_persons[["person_id", "COEQ"]], on="person_id", how="left"
+    ).rename(columns={"COEQ": "trip_weight"})
     df_persons["trip_weight"] = df_persons["COEQ"]
 
     # Chain length
-    df_count = df_trips[["person_id"]].groupby("person_id").size().reset_index(name = "number_of_trips")
+    df_count = (
+        df_trips[["person_id"]]
+        .groupby("person_id")
+        .size()
+        .reset_index(name="number_of_trips")
+    )
 
     # People with at least one trip (number_of_trips > 0)
-    df_persons = pd.merge(df_persons, df_count, on = "person_id", how = "left")
-    
+    df_persons = pd.merge(df_persons, df_count, on="person_id", how="left")
+
     # People that awnsered the travel questionary section but stayed at home (number_of_trips = 0)
-    df_persons.loc[df_persons["travel_respondent"] & df_persons["number_of_trips"].isna(), "number_of_trips"] = 0
+    df_persons.loc[
+        df_persons["travel_respondent"] & df_persons["number_of_trips"].isna(),
+        "number_of_trips",
+    ] = 0
 
     # Nonrespondent of travel questionary section (number_of_trips = -1)
     df_persons["number_of_trips"] = df_persons["number_of_trips"].fillna(-1).astype(int)
@@ -187,11 +252,15 @@ def execute(context):
 
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
-    df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
+    df_households = pd.merge(
+        df_households, hts.calculate_consumption_units(df_persons), on="household_id"
+    )
 
     # Socioprofessional class
     df_persons["socioprofessional_class"] = df_persons["P9"].fillna(8).astype(int)
-    df_persons.loc[df_persons["socioprofessional_class"] > 6, "socioprofessional_class"] = 8
+    df_persons.loc[
+        df_persons["socioprofessional_class"] > 6, "socioprofessional_class"
+    ] = 8
     df_persons.loc[df_persons["P7"] == "7", "socioprofessional_class"] = 7
 
     # Check departure and arrival times
diff --git a/data/hts/edgt_lyon/filtered.py b/data/hts/edgt_lyon/filtered.py
index cf957685..bac0a1a3 100644
--- a/data/hts/edgt_lyon/filtered.py
+++ b/data/hts/edgt_lyon/filtered.py
@@ -5,27 +5,39 @@
 This stage filters out observations which live or work outside of the area.
 """
 
+
 def configure(context):
     edgt_lyon_source = context.config("edgt_lyon_source", "unchosen")
 
     if edgt_lyon_source == "unchosen":
-        raise RuntimeError("Using 'hts: edgt_lyon' without specifying 'edgt_lyon_source' (either 'cerema' or 'adisp')")
+        raise RuntimeError(
+            "Using 'hts: edgt_lyon' without specifying 'edgt_lyon_source' (either 'cerema' or 'adisp')"
+        )
     elif edgt_lyon_source == "adisp":
-        context.stage("data.hts.edgt_lyon.cleaned_adisp", alias="data.hts.edgt_lyon.cleaned")
+        context.stage(
+            "data.hts.edgt_lyon.cleaned_adisp", alias="data.hts.edgt_lyon.cleaned"
+        )
     elif edgt_lyon_source == "cerema":
-        context.stage("data.hts.edgt_lyon.cleaned_cerema", alias="data.hts.edgt_lyon.cleaned")
+        context.stage(
+            "data.hts.edgt_lyon.cleaned_cerema", alias="data.hts.edgt_lyon.cleaned"
+        )
     else:
-        raise RuntimeError("Unknown Lyon EDGT source (only 'cerema' and 'adisp' are supported): %s" % edgt_lyon_source)
-    
+        raise RuntimeError(
+            "Unknown Lyon EDGT source (only 'cerema' and 'adisp' are supported): %s"
+            % edgt_lyon_source
+        )
+
     context.stage("data.spatial.codes")
-    
-    context.config("filter_hts",True)
+
+    context.config("filter_hts", True)
+
+
 def execute(context):
     filter_edgt = context.config("filter_hts")
     df_codes = context.stage("data.spatial.codes")
     df_households, df_persons, df_trips = context.stage("data.hts.edgt_lyon.cleaned")
 
-    if filter_edgt : 
+    if filter_edgt:
         # Filter for non-residents
         requested_departments = df_codes["departement_id"].unique()
         f = df_persons["departement_id"].astype(str).isin(requested_departments)
@@ -34,15 +46,26 @@ def execute(context):
         # Filter for people going outside of the area
         remove_ids = set()
 
-        remove_ids |= set(df_trips[
-            ~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
-        ]["person_id"].unique())
+        remove_ids |= set(
+            df_trips[
+                ~df_trips["origin_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+                | ~df_trips["destination_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+            ]["person_id"].unique()
+        )
 
         df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
 
         # Only keep trips and households that still have a person
-        df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
-        df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
+        df_trips = df_trips[
+            df_trips["person_id"].isin(df_persons["person_id"].unique())
+        ]
+        df_households = df_households[
+            df_households["household_id"].isin(df_persons["household_id"])
+        ]
 
     # Finish up
     df_households = df_households[hts.HOUSEHOLD_COLUMNS]
diff --git a/data/hts/edgt_lyon/raw_adisp.py b/data/hts/edgt_lyon/raw_adisp.py
index 5ec5f111..31f9d877 100644
--- a/data/hts/edgt_lyon/raw_adisp.py
+++ b/data/hts/edgt_lyon/raw_adisp.py
@@ -10,77 +10,121 @@
 Adapted from the first implementation by Valentin Le Besond (IFSTTAR Nantes)
 """
 
+
 def configure(context):
     context.config("data_path")
 
+
 HOUSEHOLD_COLUMNS = {
-    "ECH": str, "ZFM": str, # id
-    "M6": int, "M21": int, "M14": int,  # number_of_cars, number_of_bikes, number_of_motorbikes
-    "COE0": float # weights
+    "ECH": str,
+    "ZFM": str,  # id
+    "M6": int,
+    "M21": int,
+    "M14": int,  # number_of_cars, number_of_bikes, number_of_motorbikes
+    "COE0": float,  # weights
 }
 
 PERSON_COLUMNS = {
-    "ECH": str, "PER": int, "ZFP": str, # id
-    "PENQ": str, # respondents of travel questionary section
-    "P2": int, "P4": int, # sex, age
-    "P9": str, # employed, studies
-    "P7": str, "P12": str, # has_license, has_pt_subscription
-    "PCSC": str, # socioprofessional_class
-    "COEP": float, "COE1": float # weights
+    "ECH": str,
+    "PER": int,
+    "ZFP": str,  # id
+    "PENQ": str,  # respondents of travel questionary section
+    "P2": int,
+    "P4": int,  # sex, age
+    "P9": str,  # employed, studies
+    "P7": str,
+    "P12": str,  # has_license, has_pt_subscription
+    "PCSC": str,  # socioprofessional_class
+    "COEP": float,
+    "COE1": float,  # weights
 }
 
 TRIP_COLUMNS = {
-    "ECH": str, "PER": int, "NDEP": int, "ZFD": str, # id
-    "D2A": int, "D5A": int, # preceding_purpose, following_purpose
-    "D3": str, "D7": str, # origin_zone, destination_zone
-    "D4": int, "D8": int, # time_departure, time_arrival
-    "MODP": int, "D11": int, "D12": int # mode, euclidean_distance, routed_distance
+    "ECH": str,
+    "PER": int,
+    "NDEP": int,
+    "ZFD": str,  # id
+    "D2A": int,
+    "D5A": int,  # preceding_purpose, following_purpose
+    "D3": str,
+    "D7": str,  # origin_zone, destination_zone
+    "D4": int,
+    "D8": int,  # time_departure, time_arrival
+    "MODP": int,
+    "D11": int,
+    "D12": int,  # mode, euclidean_distance, routed_distance
 }
 
+
 def execute(context):
     # Load households
-    df_households = pd.concat([
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_faf_men.csv"
-            % context.config("data_path"), sep=";", usecols = list(HOUSEHOLD_COLUMNS.keys()), dtype = HOUSEHOLD_COLUMNS
-        ),
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_tel_men.csv"
-            % context.config("data_path"), sep=";", usecols = list(HOUSEHOLD_COLUMNS.keys()), dtype = HOUSEHOLD_COLUMNS
-        )
-    ])
+    df_households = pd.concat(
+        [
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_faf_men.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(HOUSEHOLD_COLUMNS.keys()),
+                dtype=HOUSEHOLD_COLUMNS,
+            ),
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_tel_men.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(HOUSEHOLD_COLUMNS.keys()),
+                dtype=HOUSEHOLD_COLUMNS,
+            ),
+        ]
+    )
 
     # Load persons
-    df_persons = pd.concat([
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_faf_pers.csv"
-            % context.config("data_path"), sep=";", usecols = list(PERSON_COLUMNS.keys()), dtype = PERSON_COLUMNS
-        ),
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_tel_pers.csv"
-            % context.config("data_path"), sep=";", usecols = list(PERSON_COLUMNS.keys()), dtype = PERSON_COLUMNS
-        )
-    ])
+    df_persons = pd.concat(
+        [
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_faf_pers.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(PERSON_COLUMNS.keys()),
+                dtype=PERSON_COLUMNS,
+            ),
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_tel_pers.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(PERSON_COLUMNS.keys()),
+                dtype=PERSON_COLUMNS,
+            ),
+        ]
+    )
 
     # Load trips
-    df_trips = pd.concat([
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_faf_depl.csv"
-            % context.config("data_path"), sep=";", usecols = list(TRIP_COLUMNS.keys()), dtype = TRIP_COLUMNS
-        ),
-        pd.read_csv(
-            "%s/edgt_lyon_2015/lyon_2015_std_tel_depl.csv"
-            % context.config("data_path"), sep=";", usecols = list(TRIP_COLUMNS.keys()), dtype = TRIP_COLUMNS
-        )
-    ])
+    df_trips = pd.concat(
+        [
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_faf_depl.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(TRIP_COLUMNS.keys()),
+                dtype=TRIP_COLUMNS,
+            ),
+            pd.read_csv(
+                "%s/edgt_lyon_2015/lyon_2015_std_tel_depl.csv"
+                % context.config("data_path"),
+                sep=";",
+                usecols=list(TRIP_COLUMNS.keys()),
+                dtype=TRIP_COLUMNS,
+            ),
+        ]
+    )
 
     # Load spatial data
     df_spatial = gpd.read_file(
-        "%s/edgt_lyon_2015/EDGT_AML2015_ZF_GT.TAB"
-        % context.config("data_path"))
+        "%s/edgt_lyon_2015/EDGT_AML2015_ZF_GT.TAB" % context.config("data_path")
+    )
 
     return df_households, df_persons, df_trips, df_spatial
 
+
 FILES = [
     "lyon_2015_std_faf_men.csv",
     "lyon_2015_std_tel_men.csv",
@@ -92,12 +136,15 @@ def execute(context):
     "EDGT_AML2015_ZF_GT.ID",
     "EDGT_AML2015_ZF_GT.IND",
     "EDGT_AML2015_ZF_GT.MAP",
-    "EDGT_AML2015_ZF_GT.TAB"
+    "EDGT_AML2015_ZF_GT.TAB",
 ]
 
+
 def validate(context):
     for name in FILES:
-        if not os.path.exists("%s/edgt_lyon_2015/%s" % (context.config("data_path"), name)):
+        if not os.path.exists(
+            "%s/edgt_lyon_2015/%s" % (context.config("data_path"), name)
+        ):
             raise RuntimeError("File missing from EDGT: %s" % name)
 
     return [
diff --git a/data/hts/edgt_lyon/raw_cerema.py b/data/hts/edgt_lyon/raw_cerema.py
index c48b28a3..76e3835d 100644
--- a/data/hts/edgt_lyon/raw_cerema.py
+++ b/data/hts/edgt_lyon/raw_cerema.py
@@ -10,82 +10,134 @@
 Adapted from the first implementation by Valentin Le Besond (IFSTTAR Nantes)
 """
 
+
 def configure(context):
     context.config("data_path")
 
+
 HOUSEHOLD_COLUMNS = {
-    "MP2": str, "ECH": str, "COEM": float,
-    "M6": int, "M7": int, "M5": int
+    "MP2": str,
+    "ECH": str,
+    "COEM": float,
+    "M6": int,
+    "M7": int,
+    "M5": int,
 }
 
 PERSON_COLUMNS = {
-    "ECH": str, "PER": int, "PP2": str, "PENQ": str,
-    "P3": int, "P2": int, "P4": int,
-    "P7": str, "P12": str,
-    "P10": str, "P9": str, "P5": str,
-    "COEP": float, "COEQ": float, "P1": int
+    "ECH": str,
+    "PER": int,
+    "PP2": str,
+    "PENQ": str,
+    "P3": int,
+    "P2": int,
+    "P4": int,
+    "P7": str,
+    "P12": str,
+    "P10": str,
+    "P9": str,
+    "P5": str,
+    "COEP": float,
+    "COEQ": float,
+    "P1": int,
 }
 
 TRIP_COLUMNS = {
-    "ECH": str, "PER": int, "NDEP": int, "DP2": str,
-    "D2A": int, "D5A": int, "D3": str, "D4": int,
-    "D7": str, "D8": int,
-    "D8C": int, "MODP": int, "DOIB": int, "DIST": int
+    "ECH": str,
+    "PER": int,
+    "NDEP": int,
+    "DP2": str,
+    "D2A": int,
+    "D5A": int,
+    "D3": str,
+    "D4": int,
+    "D7": str,
+    "D8": int,
+    "D8C": int,
+    "MODP": int,
+    "DOIB": int,
+    "DIST": int,
 }
 
+
 def execute(context):
     # Load households
     df_household_dictionary = pd.read_excel(
         "%s/edgt_lyon_2015/EDGT-AML-2015_Total_Dessin&Dictionnaire.xls"
-        % context.config("data_path"), skiprows = 1, nrows = 21,
-        usecols = [1,2], names = ["size", "variable"])
+        % context.config("data_path"),
+        skiprows=1,
+        nrows=21,
+        usecols=[1, 2],
+        names=["size", "variable"],
+    )
 
     column_widths = df_household_dictionary["size"].values
     column_names = df_household_dictionary["variable"].values
 
     df_households = pd.read_fwf(
         "%s/edgt_lyon_2015/EDGT_AML_MENAGE_FAF_TEL_2015-08-03.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(HOUSEHOLD_COLUMNS.keys()), dtype = HOUSEHOLD_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(HOUSEHOLD_COLUMNS.keys()),
+        dtype=HOUSEHOLD_COLUMNS,
     )
 
     # Load persons
     df_person_dictionary = pd.read_excel(
         "%s/edgt_lyon_2015/EDGT-AML-2015_Total_Dessin&Dictionnaire.xls"
-        % context.config("data_path"), skiprows = 25, nrows = 34,
-        usecols = [1,2], names = ["size", "variable"])
+        % context.config("data_path"),
+        skiprows=25,
+        nrows=34,
+        usecols=[1, 2],
+        names=["size", "variable"],
+    )
 
     column_widths = df_person_dictionary["size"].values
     column_names = df_person_dictionary["variable"].values
 
     df_persons = pd.read_fwf(
         "%s/edgt_lyon_2015/EDGT_AML_PERSO_DIST_DT_2015-10-27.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(PERSON_COLUMNS.keys()), dtype = PERSON_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(PERSON_COLUMNS.keys()),
+        dtype=PERSON_COLUMNS,
     )
 
     # Load trips
     df_trip_dictionary = pd.read_excel(
         "%s/edgt_lyon_2015/EDGT-AML-2015_Total_Dessin&Dictionnaire.xls"
-        % context.config("data_path"), skiprows = 62, nrows = 24,
-        usecols = [1,2], names = ["size", "variable"])
+        % context.config("data_path"),
+        skiprows=62,
+        nrows=24,
+        usecols=[1, 2],
+        names=["size", "variable"],
+    )
 
     column_widths = df_trip_dictionary["size"].values
     column_names = df_trip_dictionary["variable"].values
 
     df_trips = pd.read_fwf(
         "%s/edgt_lyon_2015/EDGT_AML_DEPLA_DIST_2015-10-27.txt"
-        % context.config("data_path"), widths = column_widths, header = None,
-        names = column_names, usecols = list(TRIP_COLUMNS.keys()), dtype = TRIP_COLUMNS
+        % context.config("data_path"),
+        widths=column_widths,
+        header=None,
+        names=column_names,
+        usecols=list(TRIP_COLUMNS.keys()),
+        dtype=TRIP_COLUMNS,
     )
 
     # Load spatial data
     df_spatial = gpd.read_file(
-        "%s/edgt_lyon_2015/EDGT_AML2015_ZF_GT.TAB"
-        % context.config("data_path"))
+        "%s/edgt_lyon_2015/EDGT_AML2015_ZF_GT.TAB" % context.config("data_path")
+    )
 
     return df_households, df_persons, df_trips, df_spatial
 
+
 FILES = [
     "EDGT_AML_MENAGE_FAF_TEL_2015-08-03.txt",
     "EDGT_AML_PERSO_DIST_DT_2015-10-27.txt",
@@ -95,12 +147,15 @@ def execute(context):
     "EDGT_AML2015_ZF_GT.ID",
     "EDGT_AML2015_ZF_GT.IND",
     "EDGT_AML2015_ZF_GT.MAP",
-    "EDGT_AML2015_ZF_GT.TAB"
+    "EDGT_AML2015_ZF_GT.TAB",
 ]
 
+
 def validate(context):
     for name in FILES:
-        if not os.path.exists("%s/edgt_lyon_2015/%s" % (context.config("data_path"), name)):
+        if not os.path.exists(
+            "%s/edgt_lyon_2015/%s" % (context.config("data_path"), name)
+        ):
             raise RuntimeError("File missing from EDGT: %s" % name)
 
     return [
diff --git a/data/hts/edgt_lyon/reweighted.py b/data/hts/edgt_lyon/reweighted.py
index f858d79b..368c5558 100644
--- a/data/hts/edgt_lyon/reweighted.py
+++ b/data/hts/edgt_lyon/reweighted.py
@@ -1,8 +1,10 @@
 import numpy as np
 
+
 def configure(context):
     context.stage("data.hts.edgt_lyon.filtered")
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.edgt_lyon.filtered")
 
diff --git a/data/hts/egt/cleaned.py b/data/hts/egt/cleaned.py
index 490320c9..7f54f317 100644
--- a/data/hts/egt/cleaned.py
+++ b/data/hts/egt/cleaned.py
@@ -7,43 +7,46 @@
 This stage cleans the regional HTS.
 """
 
+
 def configure(context):
     context.stage("data.hts.egt.raw")
 
     if context.config("use_urban_type", False):
         context.stage("data.spatial.urban_type")
 
+
 INCOME_CLASS_BOUNDS = [800, 1200, 1600, 2000, 2400, 3000, 3500, 4500, 5500, 1e6]
 
 PURPOSE_MAP = {
-    1 : "home",
-    2 : "work",
-    3 : "work",
-    4 : "education",
-    5 : "shop",
-    6 : "other",
-    7 : "other",
-    8 : "leisure"
+    1: "home",
+    2: "work",
+    3: "work",
+    4: "education",
+    5: "shop",
+    6: "other",
+    7: "other",
+    8: "leisure",
     # 9 : "other" # default
 }
 
 MODES_MAP = {
-    1 : "pt",
-    2 : "car",
-    3 : "car_passenger",
-    4 : "car",
-    5 : "bike",
-    #6 : "pt", # default (other)
-    7 : "walk"
+    1: "pt",
+    2: "car",
+    3: "car_passenger",
+    4: "car",
+    5: "bike",
+    # 6 : "pt", # default (other)
+    7: "walk",
 }
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.egt.raw")
 
     # Make copies
-    df_households = pd.DataFrame(df_households, copy = True)
-    df_persons = pd.DataFrame(df_persons, copy = True)
-    df_trips = pd.DataFrame(df_trips, copy = True)
+    df_households = pd.DataFrame(df_households, copy=True)
+    df_persons = pd.DataFrame(df_persons, copy=True)
+    df_trips = pd.DataFrame(df_trips, copy=True)
 
     # Transform original IDs to integer (they are hierarchichal)
     df_households["egt_household_id"] = df_households["NQUEST"].astype(int)
@@ -57,14 +60,16 @@ def execute(context):
     df_households["household_id"] = np.arange(len(df_households))
 
     df_persons = pd.merge(
-        df_persons, df_households[["egt_household_id", "household_id"]],
-        on = "egt_household_id"
+        df_persons,
+        df_households[["egt_household_id", "household_id"]],
+        on="egt_household_id",
     )
     df_persons["person_id"] = np.arange(len(df_persons))
 
     df_trips = pd.merge(
-        df_trips, df_persons[["egt_person_id", "egt_household_id", "person_id", "household_id"]],
-        on = ["egt_person_id", "egt_household_id"]
+        df_trips,
+        df_persons[["egt_person_id", "egt_household_id", "person_id", "household_id"]],
+        on=["egt_person_id", "egt_household_id"],
     )
     df_trips["trip_id"] = np.arange(len(df_trips))
 
@@ -88,9 +93,13 @@ def execute(context):
 
     # Clean departement
     df_persons["departement_id"] = df_persons["RESDEP"].astype(str).astype("category")
-    df_households["departement_id"] = df_households["RESDEP"].astype(str).astype("category")
+    df_households["departement_id"] = (
+        df_households["RESDEP"].astype(str).astype("category")
+    )
     df_trips["origin_departement_id"] = df_trips["ORDEP"].astype(str).astype("category")
-    df_trips["destination_departement_id"] = df_trips["DESTDEP"].astype(str).astype("category")
+    df_trips["destination_departement_id"] = (
+        df_trips["DESTDEP"].astype(str).astype("category")
+    )
 
     # Clean employment
     df_persons["employed"] = df_persons["OCCP"].isin([1.0, 2.0])
@@ -99,38 +108,50 @@ def execute(context):
     df_persons["studies"] = df_persons["OCCP"].isin([3.0, 4.0, 5.0])
 
     # Number of vehicles
-    df_households["number_of_vehicles"] = df_households["NB_2RM"] + df_households["NB_VD"]
-    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(int)
+    df_households["number_of_vehicles"] = (
+        df_households["NB_2RM"] + df_households["NB_VD"]
+    )
+    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(
+        int
+    )
     df_households["number_of_bikes"] = df_households["NB_VELO"].astype(int)
 
     # License
-    df_persons["has_license"] = (df_persons["PERMVP"] == 1) | (df_persons["PERM2RM"] == 1)
+    df_persons["has_license"] = (df_persons["PERMVP"] == 1) | (
+        df_persons["PERM2RM"] == 1
+    )
 
     # Has subscription
     df_persons["has_pt_subscription"] = df_persons["ABONTC"] > 1
 
     # Household income
     df_households["income_class"] = df_households["REVENU"] - 1
-    df_households.loc[df_households["income_class"].isin([10.0, 11.0, np.nan]), "income_class"] = -1
+    df_households.loc[
+        df_households["income_class"].isin([10.0, 11.0, np.nan]), "income_class"
+    ] = -1
     df_households["income_class"] = df_households["income_class"].astype(int)
 
     # Impute urban type
     if context.config("use_urban_type"):
-        df_urban_type = context.stage("data.spatial.urban_type")[[
-            "commune_id", "urban_type"
-        ]]
+        df_urban_type = context.stage("data.spatial.urban_type")[
+            ["commune_id", "urban_type"]
+        ]
 
         # Household municipality
         df_households["commune_id"] = df_households["RESCOMM"].astype(str)
-        df_persons = pd.merge(df_persons, df_households[["household_id", "commune_id"]], how = "left")
+        df_persons = pd.merge(
+            df_persons, df_households[["household_id", "commune_id"]], how="left"
+        )
         assert np.all(~df_persons["commune_id"].isna())
-        
+
         # Impute urban type
-        df_persons = pd.merge(df_persons, df_urban_type, on = "commune_id", how = "left")
-        df_persons["urban_type"] = df_persons["urban_type"].fillna("none").astype("category")
+        df_persons = pd.merge(df_persons, df_urban_type, on="commune_id", how="left")
+        df_persons["urban_type"] = (
+            df_persons["urban_type"].fillna("none").astype("category")
+        )
 
-        df_households.drop(columns = ["commune_id"])
-        df_persons.drop(columns = ["commune_id"])
+        df_households.drop(columns=["commune_id"])
+        df_persons.drop(columns=["commune_id"])
 
     # Trip purpose
     df_trips["following_purpose"] = "other"
@@ -165,8 +186,8 @@ def execute(context):
 
     # Add weight to trips
     df_trips = pd.merge(
-        df_trips, df_persons[["person_id", "person_weight"]], on = "person_id", how = "left"
-    ).rename(columns = { "person_weight": "trip_weight" })
+        df_trips, df_persons[["person_id", "person_weight"]], on="person_id", how="left"
+    ).rename(columns={"person_weight": "trip_weight"})
     df_persons["trip_weight"] = df_persons["person_weight"]
 
     # Chain length
@@ -179,7 +200,9 @@ def execute(context):
 
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
-    df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
+    df_households = pd.merge(
+        df_households, hts.calculate_consumption_units(df_persons), on="household_id"
+    )
 
     # Socioprofessional class
     df_persons["socioprofessional_class"] = df_persons["CS8"].fillna(8).astype(int)
@@ -194,19 +217,29 @@ def execute(context):
     nan_count = np.count_nonzero(f)
     total_count = len(df_persons)
 
-    print("Dropping %d/%d persons because of NaN values in departure and arrival times" % (nan_count, total_count))
+    print(
+        "Dropping %d/%d persons because of NaN values in departure and arrival times"
+        % (nan_count, total_count)
+    )
 
     df_persons = df_persons[~f]
     df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
-    df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
+    df_households = df_households[
+        df_households["household_id"].isin(df_persons["household_id"])
+    ]
 
     # Fix activity types (because of inconsistent EGT data and removing in the timing fixing step)
     hts.fix_activity_types(df_trips)
 
     return df_households, df_persons, df_trips
 
+
 def calculate_income_class(df):
     assert "household_income" in df
     assert "consumption_units" in df
 
-    return np.digitize(df["household_income"] / df["consumption_units"], INCOME_CLASS_BOUNDS, right = True)
+    return np.digitize(
+        df["household_income"] / df["consumption_units"],
+        INCOME_CLASS_BOUNDS,
+        right=True,
+    )
diff --git a/data/hts/egt/filtered.py b/data/hts/egt/filtered.py
index 29f06604..54701c26 100644
--- a/data/hts/egt/filtered.py
+++ b/data/hts/egt/filtered.py
@@ -6,49 +6,70 @@
 Île-de-France.
 """
 
+
 def configure(context):
     context.stage("data.hts.egt.cleaned")
     context.stage("data.spatial.codes")
 
-    context.config("filter_hts",True)
+    context.config("filter_hts", True)
+
+
 def execute(context):
-    filter_egt = context.config("filter_hts") 
+    filter_egt = context.config("filter_hts")
     df_codes = context.stage("data.spatial.codes")
 
     df_households, df_persons, df_trips = context.stage("data.hts.egt.cleaned")
 
-    if filter_egt : 
+    if filter_egt:
         # Filter for non-residents
         requested_departments = df_codes["departement_id"].unique()
-        f = df_persons["departement_id"].astype(str).isin(requested_departments) # pandas bug!
+        f = (
+            df_persons["departement_id"].astype(str).isin(requested_departments)
+        )  # pandas bug!
         df_persons = df_persons[f]
 
         # Filter for people going outside of the area (because they have NaN distances)
         remove_ids = set()
 
-        remove_ids |= set(df_trips[
-            ~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
-        ]["person_id"].unique())
+        remove_ids |= set(
+            df_trips[
+                ~df_trips["origin_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+                | ~df_trips["destination_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+            ]["person_id"].unique()
+        )
 
-        remove_ids |= set(df_persons[
-            ~df_persons["departement_id"].isin(requested_departments)
-        ])
+        remove_ids |= set(
+            df_persons[~df_persons["departement_id"].isin(requested_departments)]
+        )
 
         df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
 
         # Only keep trips and households that still have a person
-        df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
-        df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
+        df_trips = df_trips[
+            df_trips["person_id"].isin(df_persons["person_id"].unique())
+        ]
+        df_households = df_households[
+            df_households["household_id"].isin(df_persons["household_id"])
+        ]
 
     # Finish up
     household_columns = hts.HOUSEHOLD_COLUMNS + ["income_class"] + ["egt_household_id"]
     df_households = df_households[household_columns]
-    
+
     person_columns = hts.PERSON_COLUMNS + ["egt_household_id", "egt_person_id"]
-    if "urban_type" in df_persons: person_columns.append("urban_type")
+    if "urban_type" in df_persons:
+        person_columns.append("urban_type")
     df_persons = df_persons[person_columns]
-    
-    trip_columns = hts.TRIP_COLUMNS + ["euclidean_distance"] + ["egt_household_id", "egt_person_id", "egt_trip_id"]
+
+    trip_columns = (
+        hts.TRIP_COLUMNS
+        + ["euclidean_distance"]
+        + ["egt_household_id", "egt_person_id", "egt_trip_id"]
+    )
     df_trips = df_trips[trip_columns]
 
     hts.check(df_households, df_persons, df_trips)
diff --git a/data/hts/egt/raw.py b/data/hts/egt/raw.py
index 53b88a21..2ce75ed8 100644
--- a/data/hts/egt/raw.py
+++ b/data/hts/egt/raw.py
@@ -7,49 +7,97 @@
 """
 
 MENAGES_COLUMNS = [
-    "RESDEP", "NQUEST", "POIDSM", "NB_VELO", "NB_VD", "REVENU", "RESCOMM",
-    "NB_2RM", "MNP"
+    "RESDEP",
+    "NQUEST",
+    "POIDSM",
+    "NB_VELO",
+    "NB_VD",
+    "REVENU",
+    "RESCOMM",
+    "NB_2RM",
+    "MNP",
 ]
 
 PERSONNES_COLUMNS = [
-    "RESDEP", "NP", "POIDSP", "NQUEST", "SEXE", "AGE", "PERMVP",
-    "ABONTC", "OCCP", "PERM2RM", "NBDEPL", "CS8"
+    "RESDEP",
+    "NP",
+    "POIDSP",
+    "NQUEST",
+    "SEXE",
+    "AGE",
+    "PERMVP",
+    "ABONTC",
+    "OCCP",
+    "PERM2RM",
+    "NBDEPL",
+    "CS8",
 ]
 
 DEPLACEMENTS_COLUMNS = [
-    "NQUEST", "NP", "ND",
-    "ORDEP", "DESTDEP", "ORH", "DESTH", "ORM", "DESTM", "ORCOMM", "DESTCOMM",
-    "DPORTEE", "MODP_H7", "DESTMOT_H9", "ORMOT_H9"
+    "NQUEST",
+    "NP",
+    "ND",
+    "ORDEP",
+    "DESTDEP",
+    "ORH",
+    "DESTH",
+    "ORM",
+    "DESTM",
+    "ORCOMM",
+    "DESTCOMM",
+    "DPORTEE",
+    "MODP_H7",
+    "DESTMOT_H9",
+    "ORMOT_H9",
 ]
 
+
 def configure(context):
     context.config("data_path")
 
+
 def execute(context):
     df_menages = pd.read_csv(
         "%s/egt_2010/Menages_semaine.csv" % context.config("data_path"),
-        sep = ",", encoding = "latin1", usecols = MENAGES_COLUMNS
+        sep=",",
+        encoding="latin1",
+        usecols=MENAGES_COLUMNS,
     )
 
     df_personnes = pd.read_csv(
         "%s/egt_2010/Personnes_semaine.csv" % context.config("data_path"),
-        sep = ",", encoding = "latin1", usecols = PERSONNES_COLUMNS
+        sep=",",
+        encoding="latin1",
+        usecols=PERSONNES_COLUMNS,
     )
 
     df_deplacements = pd.read_csv(
         "%s/egt_2010/Deplacements_semaine.csv" % context.config("data_path"),
-        sep = ",", encoding = "latin1", usecols = DEPLACEMENTS_COLUMNS
+        sep=",",
+        encoding="latin1",
+        usecols=DEPLACEMENTS_COLUMNS,
     )
 
     return df_menages, df_personnes, df_deplacements
 
+
 def validate(context):
-    for name in ("Menages_semaine.csv", "Personnes_semaine.csv", "Deplacements_semaine.csv"):
+    for name in (
+        "Menages_semaine.csv",
+        "Personnes_semaine.csv",
+        "Deplacements_semaine.csv",
+    ):
         if not os.path.exists("%s/egt_2010/%s" % (context.config("data_path"), name)):
             raise RuntimeError("File missing from EGT: %s" % name)
 
     return [
-        os.path.getsize("%s/egt_2010/Menages_semaine.csv" % context.config("data_path")),
-        os.path.getsize("%s/egt_2010/Personnes_semaine.csv" % context.config("data_path")),
-        os.path.getsize("%s/egt_2010/Deplacements_semaine.csv" % context.config("data_path"))
+        os.path.getsize(
+            "%s/egt_2010/Menages_semaine.csv" % context.config("data_path")
+        ),
+        os.path.getsize(
+            "%s/egt_2010/Personnes_semaine.csv" % context.config("data_path")
+        ),
+        os.path.getsize(
+            "%s/egt_2010/Deplacements_semaine.csv" % context.config("data_path")
+        ),
     ]
diff --git a/data/hts/entd/cleaned.py b/data/hts/entd/cleaned.py
index 51bfd966..51618eee 100644
--- a/data/hts/entd/cleaned.py
+++ b/data/hts/entd/cleaned.py
@@ -7,10 +7,27 @@
 This stage cleans the national HTS.
 """
 
+
 def configure(context):
     context.stage("data.hts.entd.raw")
 
-INCOME_CLASS_BOUNDS = [400, 600, 800, 1000, 1200, 1500, 1800, 2000, 2500, 3000, 4000, 6000, 10000, 1e6]
+
+INCOME_CLASS_BOUNDS = [
+    400,
+    600,
+    800,
+    1000,
+    1200,
+    1500,
+    1800,
+    2000,
+    2500,
+    3000,
+    4000,
+    6000,
+    10000,
+    1e6,
+]
 
 PURPOSE_MAP = [
     ("1", "home"),
@@ -22,38 +39,47 @@ def configure(context):
     ("6", "other"),
     ("7", "leisure"),
     ("8", "leisure"),
-    ("9", "work")
+    ("9", "work"),
 ]
 
 MODES_MAP = [
     ("1", "walk"),
-    ("2", "car"), #
-    ("2.20", "bike"), # bike
-    ("2.23", "car_passenger"), # motorcycle passenger
-    ("2.25", "car_passenger"), # same
+    ("2", "car"),  #
+    ("2.20", "bike"),  # bike
+    ("2.23", "car_passenger"),  # motorcycle passenger
+    ("2.25", "car_passenger"),  # same
     ("3", "car"),
     ("3.32", "car_passenger"),
-    ("4", "pt"), # taxi
+    ("4", "pt"),  # taxi
     ("5", "pt"),
     ("6", "pt"),
-    ("7", "pt"), # Plane
-    ("8", "pt"), # Boat
-#    ("9", "pt") # Other
+    ("7", "pt"),  # Plane
+    ("8", "pt"),  # Boat
+    #    ("9", "pt") # Other
 ]
 
+
 def convert_time(x):
-    return np.dot(np.array(x.split(":"), dtype = float), [3600.0, 60.0, 1.0])
+    return np.dot(np.array(x.split(":"), dtype=float), [3600.0, 60.0, 1.0])
+
 
 def execute(context):
-    df_individu, df_tcm_individu, df_menage, df_tcm_menage, df_deploc = context.stage("data.hts.entd.raw")
+    df_individu, df_tcm_individu, df_menage, df_tcm_menage, df_deploc = context.stage(
+        "data.hts.entd.raw"
+    )
 
     # Make copies
-    df_persons = pd.DataFrame(df_tcm_individu, copy = True)
-    df_households = pd.DataFrame(df_tcm_menage, copy = True)
-    df_trips = pd.DataFrame(df_deploc, copy = True)
+    df_persons = pd.DataFrame(df_tcm_individu, copy=True)
+    df_households = pd.DataFrame(df_tcm_menage, copy=True)
+    df_trips = pd.DataFrame(df_deploc, copy=True)
 
     # Get weights for persons that actually have trips
-    df_persons = pd.merge(df_persons, df_trips[["IDENT_IND", "PONDKI"]].drop_duplicates("IDENT_IND"), on = "IDENT_IND", how = "left")
+    df_persons = pd.merge(
+        df_persons,
+        df_trips[["IDENT_IND", "PONDKI"]].drop_duplicates("IDENT_IND"),
+        on="IDENT_IND",
+        how="left",
+    )
     df_persons["is_kish"] = ~df_persons["PONDKI"].isna()
     df_persons["trip_weight"] = df_persons["PONDKI"].fillna(0.0)
 
@@ -64,13 +90,21 @@ def execute(context):
     print("Filtering out %d non-reference day trips" % np.count_nonzero(~f))
 
     # Merge in additional information from ENTD
-    df_households = pd.merge(df_households, df_menage[[
-        "idENT_MEN", "V1_JNBVEH", "V1_JNBMOTO", "V1_JNBCYCLO", "V1_JNBVELOADT"
-    ]], on = "idENT_MEN", how = "left")
+    df_households = pd.merge(
+        df_households,
+        df_menage[
+            ["idENT_MEN", "V1_JNBVEH", "V1_JNBMOTO", "V1_JNBCYCLO", "V1_JNBVELOADT"]
+        ],
+        on="idENT_MEN",
+        how="left",
+    )
 
-    df_persons = pd.merge(df_persons, df_individu[[
-        "IDENT_IND", "V1_GPERMIS", "V1_GPERMIS2R", "V1_ICARTABON"
-    ]], on = "IDENT_IND", how = "left")
+    df_persons = pd.merge(
+        df_persons,
+        df_individu[["IDENT_IND", "V1_GPERMIS", "V1_GPERMIS2R", "V1_ICARTABON"]],
+        on="IDENT_IND",
+        how="left",
+    )
 
     # Transform original IDs to integer (they are hierarchichal)
     df_persons["entd_person_id"] = df_persons["IDENT_IND"].astype(int)
@@ -82,14 +116,16 @@ def execute(context):
     df_households["household_id"] = np.arange(len(df_households))
 
     df_persons = pd.merge(
-        df_persons, df_households[["entd_household_id", "household_id"]],
-        on = "entd_household_id"
+        df_persons,
+        df_households[["entd_household_id", "household_id"]],
+        on="entd_household_id",
     )
     df_persons["person_id"] = np.arange(len(df_persons))
 
     df_trips = pd.merge(
-        df_trips, df_persons[["entd_person_id", "person_id", "household_id"]],
-        on = ["entd_person_id"]
+        df_trips,
+        df_persons[["entd_person_id", "person_id", "household_id"]],
+        on=["entd_person_id"],
     )
     df_trips["trip_id"] = np.arange(len(df_trips))
 
@@ -109,19 +145,24 @@ def execute(context):
     df_households["household_size"] = df_households["NPERS"]
 
     # Clean departement
-    df_households["departement_id"] = df_households["DEP"].fillna("undefined").astype("category")
-    df_persons["departement_id"] = df_persons["DEP"].fillna("undefined").astype("category")
+    df_households["departement_id"] = (
+        df_households["DEP"].fillna("undefined").astype("category")
+    )
+    df_persons["departement_id"] = (
+        df_persons["DEP"].fillna("undefined").astype("category")
+    )
 
-    df_trips["origin_departement_id"] = df_trips["V2_MORIDEP"].fillna("undefined").astype("category")
-    df_trips["destination_departement_id"] = df_trips["V2_MDESDEP"].fillna("undefined").astype("category")
+    df_trips["origin_departement_id"] = (
+        df_trips["V2_MORIDEP"].fillna("undefined").astype("category")
+    )
+    df_trips["destination_departement_id"] = (
+        df_trips["V2_MDESDEP"].fillna("undefined").astype("category")
+    )
 
     # Clean urban type
-    df_households["urban_type"] = df_households["numcom_UU2010"].replace({
-        "B": "suburb",
-        "C": "central_city",
-        "I": "isolated_city",
-        "R": "none"
-    })
+    df_households["urban_type"] = df_households["numcom_UU2010"].replace(
+        {"B": "suburb", "C": "central_city", "I": "isolated_city", "R": "none"}
+    )
 
     assert np.all(~df_households["urban_type"].isna())
     df_households["urban_type"] = df_households["urban_type"].astype("category")
@@ -139,32 +180,67 @@ def execute(context):
     df_households["number_of_vehicles"] += df_households["V1_JNBVEH"].fillna(0)
     df_households["number_of_vehicles"] += df_households["V1_JNBMOTO"].fillna(0)
     df_households["number_of_vehicles"] += df_households["V1_JNBCYCLO"].fillna(0)
-    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(int)
+    df_households["number_of_vehicles"] = df_households["number_of_vehicles"].astype(
+        int
+    )
 
-    df_households["number_of_bikes"] = df_households["V1_JNBVELOADT"].fillna(0).astype(int)
+    df_households["number_of_bikes"] = (
+        df_households["V1_JNBVELOADT"].fillna(0).astype(int)
+    )
 
     # License
-    df_persons["has_license"] = (df_persons["V1_GPERMIS"] == 1) | (df_persons["V1_GPERMIS2R"] == 1)
+    df_persons["has_license"] = (df_persons["V1_GPERMIS"] == 1) | (
+        df_persons["V1_GPERMIS2R"] == 1
+    )
 
     # Has subscription
     df_persons["has_pt_subscription"] = df_persons["V1_ICARTABON"] == 1
 
     # Household income
     df_households["income_class"] = -1
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("Moins de 400"), "income_class"] = 0
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 400"), "income_class"] = 1
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 600"), "income_class"] = 2
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 800"), "income_class"] = 3
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 1 000"), "income_class"] = 4
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 1 200"), "income_class"] = 5
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 1 500"), "income_class"] = 6
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 1 800"), "income_class"] = 7
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 2 000"), "income_class"] = 8
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 2 500"), "income_class"] = 9
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 3 000"), "income_class"] = 10
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 4 000"), "income_class"] = 11
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("De 6 000"), "income_class"] = 12
-    df_households.loc[df_households["TrancheRevenuMensuel"].str.startswith("10 000"), "income_class"] = 13
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("Moins de 400"),
+        "income_class",
+    ] = 0
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 400"), "income_class"
+    ] = 1
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 600"), "income_class"
+    ] = 2
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 800"), "income_class"
+    ] = 3
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 1 000"), "income_class"
+    ] = 4
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 1 200"), "income_class"
+    ] = 5
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 1 500"), "income_class"
+    ] = 6
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 1 800"), "income_class"
+    ] = 7
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 2 000"), "income_class"
+    ] = 8
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 2 500"), "income_class"
+    ] = 9
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 3 000"), "income_class"
+    ] = 10
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 4 000"), "income_class"
+    ] = 11
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("De 6 000"), "income_class"
+    ] = 12
+    df_households.loc[
+        df_households["TrancheRevenuMensuel"].str.startswith("10 000"), "income_class"
+    ] = 13
     df_households["income_class"] = df_households["income_class"].astype(int)
 
     # Trip purpose
@@ -173,11 +249,13 @@ def execute(context):
 
     for prefix, activity_type in PURPOSE_MAP:
         df_trips.loc[
-            df_trips["V2_MMOTIFDES"].astype(str).str.startswith(prefix), "following_purpose"
+            df_trips["V2_MMOTIFDES"].astype(str).str.startswith(prefix),
+            "following_purpose",
         ] = activity_type
 
         df_trips.loc[
-            df_trips["V2_MMOTIFORI"].astype(str).str.startswith(prefix), "preceding_purpose"
+            df_trips["V2_MMOTIFORI"].astype(str).str.startswith(prefix),
+            "preceding_purpose",
         ] = activity_type
 
     df_trips["following_purpose"] = df_trips["following_purpose"].astype("category")
@@ -187,15 +265,17 @@ def execute(context):
     df_trips["mode"] = "pt"
 
     for prefix, mode in MODES_MAP:
-        df_trips.loc[
-            df_trips["V2_MTP"].astype(str).str.startswith(prefix), "mode"
-        ] = mode
+        df_trips.loc[df_trips["V2_MTP"].astype(str).str.startswith(prefix), "mode"] = (
+            mode
+        )
 
     df_trips["mode"] = df_trips["mode"].astype("category")
 
     # Further trip attributes
     df_trips["routed_distance"] = df_trips["V2_MDISTTOT"] * 1000.0
-    df_trips["routed_distance"] = df_trips["routed_distance"].fillna(0.0) # This should be just one within Île-de-France
+    df_trips["routed_distance"] = df_trips["routed_distance"].fillna(
+        0.0
+    )  # This should be just one within Île-de-France
 
     # Only leave weekday trips
     f = df_trips["V2_TYPJOUR"] == 1
@@ -205,10 +285,14 @@ def execute(context):
     # Only leave one day per person
     initial_count = len(df_trips)
 
-    df_first_day = df_trips[["person_id", "IDENT_JOUR"]].sort_values(
-        by = ["person_id", "IDENT_JOUR"]
-    ).drop_duplicates("person_id")
-    df_trips = pd.merge(df_trips, df_first_day, how = "inner", on = ["person_id", "IDENT_JOUR"])
+    df_first_day = (
+        df_trips[["person_id", "IDENT_JOUR"]]
+        .sort_values(by=["person_id", "IDENT_JOUR"])
+        .drop_duplicates("person_id")
+    )
+    df_trips = pd.merge(
+        df_trips, df_first_day, how="inner", on=["person_id", "IDENT_JOUR"]
+    )
 
     final_count = len(df_trips)
     print("Removed %d trips for non-primary days" % (initial_count - final_count))
@@ -217,7 +301,9 @@ def execute(context):
     df_trips = hts.compute_first_last(df_trips)
 
     # Trip times
-    df_trips["departure_time"] = df_trips["V2_MORIHDEP"].apply(convert_time).astype(float)
+    df_trips["departure_time"] = (
+        df_trips["V2_MORIHDEP"].apply(convert_time).astype(float)
+    )
     df_trips["arrival_time"] = df_trips["V2_MDESHARR"].apply(convert_time).astype(float)
     df_trips = hts.fix_trip_times(df_trips)
 
@@ -230,11 +316,17 @@ def execute(context):
 
     # Chain length
     df_persons = pd.merge(
-        df_persons, df_trips[["person_id", "NDEP"]].drop_duplicates("person_id").rename(columns = { "NDEP": "number_of_trips" }),
-        on = "person_id", how = "left"
+        df_persons,
+        df_trips[["person_id", "NDEP"]]
+        .drop_duplicates("person_id")
+        .rename(columns={"NDEP": "number_of_trips"}),
+        on="person_id",
+        how="left",
     )
     df_persons["number_of_trips"] = df_persons["number_of_trips"].fillna(-1).astype(int)
-    df_persons.loc[(df_persons["number_of_trips"] == -1) & df_persons["is_kish"], "number_of_trips"] = 0
+    df_persons.loc[
+        (df_persons["number_of_trips"] == -1) & df_persons["is_kish"], "number_of_trips"
+    ] = 0
 
     # Passenger attribute
     df_persons["is_passenger"] = df_persons["person_id"].isin(
@@ -243,18 +335,23 @@ def execute(context):
 
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
-    df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
+    df_households = pd.merge(
+        df_households, hts.calculate_consumption_units(df_persons), on="household_id"
+    )
 
     # Socioprofessional class
-    df_persons["socioprofessional_class"] = df_persons["CS24"].fillna(80).astype(int) // 10
+    df_persons["socioprofessional_class"] = (
+        df_persons["CS24"].fillna(80).astype(int) // 10
+    )
 
     # Fix activity types (because of 1 inconsistent ENTD data)
     hts.fix_activity_types(df_trips)
 
     return df_households, df_persons, df_trips
 
+
 def calculate_income_class(df):
     assert "household_income" in df
     assert "consumption_units" in df
 
-    return np.digitize(df["household_income"], INCOME_CLASS_BOUNDS, right = True)
+    return np.digitize(df["household_income"], INCOME_CLASS_BOUNDS, right=True)
diff --git a/data/hts/entd/filtered.py b/data/hts/entd/filtered.py
index e9bb2ca1..71d36485 100644
--- a/data/hts/entd/filtered.py
+++ b/data/hts/entd/filtered.py
@@ -6,17 +6,20 @@
 Île-de-France.
 """
 
+
 def configure(context):
     context.stage("data.hts.entd.cleaned")
     context.stage("data.spatial.codes")
 
-    context.config("filter_hts",True)
+    context.config("filter_hts", True)
+
+
 def execute(context):
-    filter_entd = context.config("filter_hts")    
+    filter_entd = context.config("filter_hts")
     df_codes = context.stage("data.spatial.codes")
     df_households, df_persons, df_trips = context.stage("data.hts.entd.cleaned")
 
-    if filter_entd :
+    if filter_entd:
         # Filter for non-residents
         requested_departments = df_codes["departement_id"].unique()
         f = df_persons["departement_id"].astype(str).isin(requested_departments)
@@ -25,18 +28,31 @@ def execute(context):
         # Filter for people going outside of the area (because they have NaN distances)
         remove_ids = set()
 
-        remove_ids |= set(df_trips[
-            ~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
-        ]["person_id"].unique())
+        remove_ids |= set(
+            df_trips[
+                ~df_trips["origin_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+                | ~df_trips["destination_departement_id"]
+                .astype(str)
+                .isin(requested_departments)
+            ]["person_id"].unique()
+        )
 
         df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
 
         # Only keep trips and households that still have a person
-        df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
-        df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
+        df_trips = df_trips[
+            df_trips["person_id"].isin(df_persons["person_id"].unique())
+        ]
+        df_households = df_households[
+            df_households["household_id"].isin(df_persons["household_id"])
+        ]
 
     # Finish up
-    df_households = df_households[hts.HOUSEHOLD_COLUMNS + ["urban_type", "income_class"]]
+    df_households = df_households[
+        hts.HOUSEHOLD_COLUMNS + ["urban_type", "income_class"]
+    ]
     df_persons = df_persons[hts.PERSON_COLUMNS]
     df_trips = df_trips[hts.TRIP_COLUMNS + ["routed_distance"]]
 
diff --git a/data/hts/entd/raw.py b/data/hts/entd/raw.py
index f4bdd91a..16b1ab85 100644
--- a/data/hts/entd/raw.py
+++ b/data/hts/entd/raw.py
@@ -7,80 +7,132 @@
 """
 
 Q_MENAGE_COLUMNS = [
-    "DEP", "idENT_MEN", "PONDV1", "RG",
+    "DEP",
+    "idENT_MEN",
+    "PONDV1",
+    "RG",
     "V1_JNBVELOADT",
-    "V1_JNBVEH", "V1_JNBMOTO", "V1_JNBCYCLO"
+    "V1_JNBVEH",
+    "V1_JNBMOTO",
+    "V1_JNBCYCLO",
 ]
 
 Q_TCM_MENAGE_COLUMNS = [
-    "NPERS", "PONDV1", "TrancheRevenuMensuel",
-    "DEP", "idENT_MEN", "RG", "numcom_UU2010"
+    "NPERS",
+    "PONDV1",
+    "TrancheRevenuMensuel",
+    "DEP",
+    "idENT_MEN",
+    "RG",
+    "numcom_UU2010",
 ]
 
 Q_INDIVIDU_COLUMNS = [
-    "IDENT_IND", "idENT_MEN",
-    "RG", "V1_GPERMIS", "V1_ICARTABON",
-    "V1_GPERMIS2R"
+    "IDENT_IND",
+    "idENT_MEN",
+    "RG",
+    "V1_GPERMIS",
+    "V1_ICARTABON",
+    "V1_GPERMIS2R",
 ]
 
 Q_TCM_INDIVIDU_COLUMNS = [
-    "AGE", "ETUDES", "IDENT_IND", "IDENT_MEN",
-    "PONDV1", "CS24", "SEXE", "DEP", "SITUA",
+    "AGE",
+    "ETUDES",
+    "IDENT_IND",
+    "IDENT_MEN",
+    "PONDV1",
+    "CS24",
+    "SEXE",
+    "DEP",
+    "SITUA",
 ]
 
 K_DEPLOC_COLUMNS = [
-    "IDENT_IND", "V2_MMOTIFDES", "V2_MMOTIFORI",
-    "V2_TYPJOUR", "V2_MORIHDEP", "V2_MDESHARR", "V2_MDISTTOT",
-    "IDENT_JOUR", "V2_MTP",
-    "V2_MDESDEP", "V2_MORIDEP", "NDEP", "V2_MOBILREF",
-    "PONDKI"
+    "IDENT_IND",
+    "V2_MMOTIFDES",
+    "V2_MMOTIFORI",
+    "V2_TYPJOUR",
+    "V2_MORIHDEP",
+    "V2_MDESHARR",
+    "V2_MDISTTOT",
+    "IDENT_JOUR",
+    "V2_MTP",
+    "V2_MDESDEP",
+    "V2_MORIDEP",
+    "NDEP",
+    "V2_MOBILREF",
+    "PONDKI",
 ]
 
+
 def configure(context):
     context.config("data_path")
 
+
 def execute(context):
     df_individu = pd.read_csv(
         "%s/entd_2008/Q_individu.csv" % context.config("data_path"),
-        sep = ";", encoding = "latin1", usecols = Q_INDIVIDU_COLUMNS,
-        dtype = { "DEP": str }
+        sep=";",
+        encoding="latin1",
+        usecols=Q_INDIVIDU_COLUMNS,
+        dtype={"DEP": str},
     )
 
     df_tcm_individu = pd.read_csv(
         "%s/entd_2008/Q_tcm_individu.csv" % context.config("data_path"),
-        sep = ";", encoding = "latin1", usecols = Q_TCM_INDIVIDU_COLUMNS,
-        dtype = { "DEP": str }
+        sep=";",
+        encoding="latin1",
+        usecols=Q_TCM_INDIVIDU_COLUMNS,
+        dtype={"DEP": str},
     )
 
     df_menage = pd.read_csv(
         "%s/entd_2008/Q_menage.csv" % context.config("data_path"),
-        sep = ";", encoding = "latin1", usecols = Q_MENAGE_COLUMNS,
-        dtype = { "DEP": str }
+        sep=";",
+        encoding="latin1",
+        usecols=Q_MENAGE_COLUMNS,
+        dtype={"DEP": str},
     )
 
     df_tcm_menage = pd.read_csv(
         "%s/entd_2008/Q_tcm_menage_0.csv" % context.config("data_path"),
-        sep = ";", encoding = "latin1", usecols = Q_TCM_MENAGE_COLUMNS,
-        dtype = { "DEP": str }
+        sep=";",
+        encoding="latin1",
+        usecols=Q_TCM_MENAGE_COLUMNS,
+        dtype={"DEP": str},
     )
 
     df_deploc = pd.read_csv(
         "%s/entd_2008/K_deploc.csv" % context.config("data_path"),
-        sep = ";", encoding = "latin1", usecols = K_DEPLOC_COLUMNS,
-        dtype = { "DEP": str, "V2_MTP": str }
+        sep=";",
+        encoding="latin1",
+        usecols=K_DEPLOC_COLUMNS,
+        dtype={"DEP": str, "V2_MTP": str},
     )
 
     return df_individu, df_tcm_individu, df_menage, df_tcm_menage, df_deploc
 
+
 def validate(context):
-    for name in ("Q_individu.csv", "Q_tcm_individu.csv", "Q_menage.csv", "Q_tcm_menage_0.csv", "K_deploc.csv"):
+    for name in (
+        "Q_individu.csv",
+        "Q_tcm_individu.csv",
+        "Q_menage.csv",
+        "Q_tcm_menage_0.csv",
+        "K_deploc.csv",
+    ):
         if not os.path.exists("%s/entd_2008/%s" % (context.config("data_path"), name)):
             raise RuntimeError("File missing from ENTD: %s" % name)
 
     return [
         os.path.getsize("%s/entd_2008/Q_individu.csv" % context.config("data_path")),
-        os.path.getsize("%s/entd_2008/Q_tcm_individu.csv" % context.config("data_path")),
+        os.path.getsize(
+            "%s/entd_2008/Q_tcm_individu.csv" % context.config("data_path")
+        ),
         os.path.getsize("%s/entd_2008/Q_menage.csv" % context.config("data_path")),
-        os.path.getsize("%s/entd_2008/Q_tcm_menage_0.csv" % context.config("data_path")),
-        os.path.getsize("%s/entd_2008/K_deploc.csv" % context.config("data_path"))
+        os.path.getsize(
+            "%s/entd_2008/Q_tcm_menage_0.csv" % context.config("data_path")
+        ),
+        os.path.getsize("%s/entd_2008/K_deploc.csv" % context.config("data_path")),
     ]
diff --git a/data/hts/entd/reweighted.py b/data/hts/entd/reweighted.py
index 517a3ca9..2367e68d 100644
--- a/data/hts/entd/reweighted.py
+++ b/data/hts/entd/reweighted.py
@@ -1,8 +1,10 @@
 import numpy as np
 
+
 def configure(context):
     context.stage("data.hts.entd.filtered")
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.entd.filtered")
 
diff --git a/data/hts/hts.py b/data/hts/hts.py
index 86bc0365..59ce73e7 100644
--- a/data/hts/hts.py
+++ b/data/hts/hts.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import numpy as np
 
+
 def swap_departure_arrival_times(df, f):
     assert "arrival_time" in df
     assert "departure_time" in df
@@ -11,6 +12,7 @@ def swap_departure_arrival_times(df, f):
     df.loc[f, "departure_time"] = arrival_times
     df.loc[f, "arrival_time"] = departure_times
 
+
 def fix_trip_times(df_trips):
     """
     - Negative duration:
@@ -22,7 +24,16 @@ def fix_trip_times(df_trips):
 
     - Intresecting trips
     """
-    columns = ["trip_id", "person_id", "departure_time", "arrival_time", "preceding_purpose", "following_purpose", "is_first_trip", "is_last_trip"]
+    columns = [
+        "trip_id",
+        "person_id",
+        "departure_time",
+        "arrival_time",
+        "preceding_purpose",
+        "following_purpose",
+        "is_first_trip",
+        "is_last_trip",
+    ]
     df_main = df_trips
     df_next = df_main.shift(-1)
     df_previous = df_main.shift(1)
@@ -33,9 +44,16 @@ def fix_trip_times(df_trips):
 
     # 1.1) Departure and arrival time may have been swapped, and chain is consistent
     f_swap = np.copy(f_negative)
-    f_swap &= (df_main["arrival_time"] > df_previous["arrival_time"]) | df_main["is_first_trip"]
-    f_swap &= (df_main["departure_time"] < df_next["departure_time"]) | df_main["is_last_trip"]
-    print("  of which %d can swap departure and arrival time without conflicts with previous or following trip" % np.count_nonzero(f_swap))
+    f_swap &= (df_main["arrival_time"] > df_previous["arrival_time"]) | df_main[
+        "is_first_trip"
+    ]
+    f_swap &= (df_main["departure_time"] < df_next["departure_time"]) | df_main[
+        "is_last_trip"
+    ]
+    print(
+        "  of which %d can swap departure and arrival time without conflicts with previous or following trip"
+        % np.count_nonzero(f_swap)
+    )
 
     swap_departure_arrival_times(df_main, f_swap)
     f_negative[f_swap] = False
@@ -44,13 +62,19 @@ def fix_trip_times(df_trips):
     #      However, the offset duration is unlikely to be a trip over midnight
     offset = df_main["departure_time"] - df_main["arrival_time"]
     f_swap = (offset > 0) & (offset < 10 * 3600)
-    print("  of which %d are unlikely to cover midnight, so we swap arrival and departure time although there are conflicts" % np.count_nonzero(f_swap))
+    print(
+        "  of which %d are unlikely to cover midnight, so we swap arrival and departure time although there are conflicts"
+        % np.count_nonzero(f_swap)
+    )
 
     swap_departure_arrival_times(df_main, f_swap)
     f_negative[f_swap] = False
 
     # 1.3) Covering midnight -> Shift arrival time
-    print("  of which %d seem to cover midnight, so we shift arrival time by 24h" % np.count_nonzero(f_negative))
+    print(
+        "  of which %d seem to cover midnight, so we shift arrival time by 24h"
+        % np.count_nonzero(f_negative)
+    )
     df_main.loc[f_negative, "arrival_time"] += 24 * 3600.0
 
     # 2) Current trip is after following trip
@@ -83,10 +107,16 @@ def fix_trip_times(df_trips):
     # Intersecting trips
     f = ~df_main["is_last_trip"]
     f &= df_main["arrival_time"] > df_next["departure_time"]
-    print("Found %d occurences where current trip ends after next trip starts" % np.count_nonzero(f))
+    print(
+        "Found %d occurences where current trip ends after next trip starts"
+        % np.count_nonzero(f)
+    )
 
     f &= df_main["departure_time"] <= df_next["departure_time"]
-    print("  of which we're able to shorten %d to make it consistent" % np.count_nonzero(f))
+    print(
+        "  of which we're able to shorten %d to make it consistent"
+        % np.count_nonzero(f)
+    )
     df_main.loc[f, "arrival_time"] = df_next["departure_time"]
 
     # Included trips (moving the first one to the start of the following trip and setting duration to zero)
@@ -95,10 +125,14 @@ def fix_trip_times(df_trips):
     f &= df_main["arrival_time"] <= df_next["arrival_time"]
     df_main.loc[f, "departure_time"] = df_next["departure_time"]
     df_main.loc[f, "arrival_time"] = df_next["departure_time"]
-    print("Found %d occurences where current trip is included in next trip" % np.count_nonzero(f))
+    print(
+        "Found %d occurences where current trip is included in next trip"
+        % np.count_nonzero(f)
+    )
 
     return df_main
 
+
 def check_trip_times(df_trips):
     print("Validating trip times...")
     any_errors = False
@@ -168,31 +202,43 @@ def check_trip_times(df_trips):
         print("  => All trip times are consistent!")
         return True
 
+
 def fix_activity_types(df_trips):
-    f = (df_trips["preceding_purpose"] != df_trips["following_purpose"].shift(1)) & ~df_trips["is_first_trip"]
-    df_trips.loc[f, "preceding_purpose"] = df_trips.shift(1)["following_purpose"][f].values
+    f = (
+        df_trips["preceding_purpose"] != df_trips["following_purpose"].shift(1)
+    ) & ~df_trips["is_first_trip"]
+    df_trips.loc[f, "preceding_purpose"] = df_trips.shift(1)["following_purpose"][
+        f
+    ].values
     print("Fixing %d inconsistent activity types" % np.count_nonzero(f))
 
     check_activity_types(df_trips)
 
+
 def check_activity_types(df_trips):
-    f  = (df_trips["following_purpose"] != df_trips["preceding_purpose"].shift(-1)) & ~df_trips["is_last_trip"]
-    f |= (df_trips["following_purpose"].shift(1) != df_trips["preceding_purpose"]) & ~df_trips["is_first_trip"]
+    f = (
+        df_trips["following_purpose"] != df_trips["preceding_purpose"].shift(-1)
+    ) & ~df_trips["is_last_trip"]
+    f |= (
+        df_trips["following_purpose"].shift(1) != df_trips["preceding_purpose"]
+    ) & ~df_trips["is_first_trip"]
 
     error_count = np.count_nonzero(f)
     print("Trips with inconsistent activity types: %d" % error_count)
 
     return error_count == 0
 
+
 def compute_first_last(df_trips):
     assert "person_id" in df_trips
 
-    df_trips = df_trips.sort_values(by = ["person_id", "trip_id"])
+    df_trips = df_trips.sort_values(by=["person_id", "trip_id"])
     df_trips["is_first_trip"] = df_trips["person_id"].ne(df_trips["person_id"].shift(1))
     df_trips["is_last_trip"] = df_trips["person_id"].ne(df_trips["person_id"].shift(-1))
 
     return df_trips
 
+
 def compute_activity_duration(df_trips):
     assert "departure_time" in df_trips
     assert "arrival_time" in df_trips
@@ -201,13 +247,17 @@ def compute_activity_duration(df_trips):
     df_trips["activity_duration"] = df_next["departure_time"] - df_trips["arrival_time"]
     df_trips.loc[df_trips["is_last_trip"], "activity_duration"] = np.nan
 
+
 def check_household_size(df_households, df_persons):
-    df_size = df_persons.groupby("household_id").size().reset_index(name = "count")
-    df_size = pd.merge(df_households[["household_id", "household_size"]], df_size, on = "household_id")
+    df_size = df_persons.groupby("household_id").size().reset_index(name="count")
+    df_size = pd.merge(
+        df_households[["household_id", "household_size"]], df_size, on="household_id"
+    )
 
     assert len(df_size) == len(df_households)
     assert (df_size["household_size"] == df_size["count"]).all()
 
+
 def calculate_consumption_units(df_persons):
     df_units = df_persons[["household_id", "age"]].copy()
     df_units["under_14"] = df_units["age"] < 14
@@ -220,28 +270,52 @@ def calculate_consumption_units(df_persons):
 
     return df_units[["household_id", "consumption_units"]]
 
+
 HOUSEHOLD_COLUMNS = [
-    "household_id", "household_weight", "household_size",
-    "number_of_vehicles", "number_of_bikes", "departement_id",
-    "consumption_units", # "income_class"
+    "household_id",
+    "household_weight",
+    "household_size",
+    "number_of_vehicles",
+    "number_of_bikes",
+    "departement_id",
+    "consumption_units",  # "income_class"
 ]
 
 PERSON_COLUMNS = [
-    "person_id", "household_id", "person_weight",
-    "age", "sex", "employed", "studies",
-    "has_license", "has_pt_subscription",
-    "number_of_trips", "departement_id", "trip_weight",
-    "is_passenger", "socioprofessional_class"
+    "person_id",
+    "household_id",
+    "person_weight",
+    "age",
+    "sex",
+    "employed",
+    "studies",
+    "has_license",
+    "has_pt_subscription",
+    "number_of_trips",
+    "departement_id",
+    "trip_weight",
+    "is_passenger",
+    "socioprofessional_class",
 ]
 
 TRIP_COLUMNS = [
-    "person_id", "trip_id", "trip_weight",
-    "departure_time", "arrival_time",
-    "trip_duration", "activity_duration",
-    "following_purpose", "preceding_purpose", "is_last_trip", "is_first_trip",
-    "mode", "origin_departement_id", "destination_departement_id"
+    "person_id",
+    "trip_id",
+    "trip_weight",
+    "departure_time",
+    "arrival_time",
+    "trip_duration",
+    "activity_duration",
+    "following_purpose",
+    "preceding_purpose",
+    "is_last_trip",
+    "is_first_trip",
+    "mode",
+    "origin_departement_id",
+    "destination_departement_id",
 ]
 
+
 def check(df_households, df_persons, df_trips):
     assert check_trip_times(df_trips)
     assert check_activity_types(df_trips)
diff --git a/data/hts/output.py b/data/hts/output.py
index cee14cad..1ee0eca3 100644
--- a/data/hts/output.py
+++ b/data/hts/output.py
@@ -9,23 +9,34 @@
 pipeline.
 """
 
+
 def configure(context):
     context.stage("data.hts.selected")
 
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
 
+
 def execute(context):
     df_households, df_persons, df_trips = context.stage("data.hts.selected")
 
-    df_households.to_csv("%s/%shts_households.csv" % (
-        context.config("output_path"), context.config("output_prefix")
-    ), sep = ";", index = False)
+    df_households.to_csv(
+        "%s/%shts_households.csv"
+        % (context.config("output_path"), context.config("output_prefix")),
+        sep=";",
+        index=False,
+    )
 
-    df_persons.to_csv("%s/%shts_persons.csv" % (
-        context.config("output_path"), context.config("output_prefix")
-    ), sep = ";", index = False)
+    df_persons.to_csv(
+        "%s/%shts_persons.csv"
+        % (context.config("output_path"), context.config("output_prefix")),
+        sep=";",
+        index=False,
+    )
 
-    df_trips.to_csv("%s/%shts_trips.csv" % (
-        context.config("output_path"), context.config("output_prefix")
-    ), sep = ";", index = False)
+    df_trips.to_csv(
+        "%s/%shts_trips.csv"
+        % (context.config("output_path"), context.config("output_prefix")),
+        sep=";",
+        index=False,
+    )
diff --git a/data/hts/selected.py b/data/hts/selected.py
index d5c5bd43..1832fbc7 100644
--- a/data/hts/selected.py
+++ b/data/hts/selected.py
@@ -1,19 +1,21 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
     hts = context.config("hts")
 
     if hts == "egt":
-        context.stage("data.hts.egt.filtered", alias = "hts")
+        context.stage("data.hts.egt.filtered", alias="hts")
     elif hts == "entd":
-        context.stage("data.hts.entd.reweighted", alias = "hts")
+        context.stage("data.hts.entd.reweighted", alias="hts")
     elif hts == "edgt_lyon":
-        context.stage("data.hts.edgt_lyon.reweighted", alias = "hts")
+        context.stage("data.hts.edgt_lyon.reweighted", alias="hts")
     elif hts == "edgt_44":
-        context.stage("data.hts.edgt_44.reweighted", alias = "hts")
+        context.stage("data.hts.edgt_44.reweighted", alias="hts")
     else:
         raise RuntimeError("Unknown HTS: %s" % hts)
 
+
 def execute(context):
     return context.stage("hts")
diff --git a/data/income/municipality.py b/data/income/municipality.py
index 7bf65015..76fbb2f2 100644
--- a/data/income/municipality.py
+++ b/data/income/municipality.py
@@ -19,21 +19,44 @@
 EQASIM_INCOME_ATTRIBUTES = ["size", "family_comp"]
 
 # final columns of the income DataFrame
-INCOME_DF_COLUMNS = ["commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "attribute", "value", "is_imputed", "is_missing", "reference_median"]
+INCOME_DF_COLUMNS = [
+    "commune_id",
+    "q1",
+    "q2",
+    "q3",
+    "q4",
+    "q5",
+    "q6",
+    "q7",
+    "q8",
+    "q9",
+    "attribute",
+    "value",
+    "is_imputed",
+    "is_missing",
+    "reference_median",
+]
 
 
 def configure(context):
     context.config("data_path")
     context.stage("data.spatial.municipalities")
-    context.config("income_com_path", "filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip")
+    context.config(
+        "income_com_path", "filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip"
+    )
     context.config("income_com_xlsx", "FILO2019_DISP_COM.xlsx")
     context.config("income_year", 19)
 
 
-def _income_distributions_from_filosofi_ensemble_sheet(filsofi_sheets, year, df_municipalities):
+def _income_distributions_from_filosofi_ensemble_sheet(
+    filsofi_sheets, year, df_municipalities
+):
     requested_communes = set(df_municipalities["commune_id"].unique())
 
-    df = filsofi_sheets["ENSEMBLE"][["CODGEO"] + [("D%d" % q) + year if q != 5 else "Q2" + year for q in range(1, 10)]]
+    df = filsofi_sheets["ENSEMBLE"][
+        ["CODGEO"]
+        + [("D%d" % q) + year if q != 5 else "Q2" + year for q in range(1, 10)]
+    ]
     df.columns = ["commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9"]
     df.loc[:, "reference_median"] = df["q5"].values
 
@@ -42,13 +65,21 @@ def _income_distributions_from_filosofi_ensemble_sheet(filsofi_sheets, year, df_
 
     # Find communes without data
     df["commune_id"] = df["commune_id"].astype("category")
-    missing_communes = set(df_municipalities["commune_id"].unique()) - set(df["commune_id"].unique())
-    print("Found %d/%d municipalities that are missing" % (len(missing_communes), len(requested_communes)))
+    missing_communes = set(df_municipalities["commune_id"].unique()) - set(
+        df["commune_id"].unique()
+    )
+    print(
+        "Found %d/%d municipalities that are missing"
+        % (len(missing_communes), len(requested_communes))
+    )
 
     # Find communes without full distribution
     df["is_imputed"] = df["q2"].isna()
     df["is_missing"] = False
-    print("Found %d/%d municipalities which do not have full distribution" % (sum(df["is_imputed"]), len(requested_communes)))
+    print(
+        "Found %d/%d municipalities which do not have full distribution"
+        % (sum(df["is_imputed"]), len(requested_communes))
+    )
 
     # First, find suitable distribution for incomplete cases by finding the one with the most similar median
     incomplete_medians = df[df["is_imputed"]]["q5"].values
@@ -56,29 +87,44 @@ def _income_distributions_from_filosofi_ensemble_sheet(filsofi_sheets, year, df_
     df_complete = df[~df["is_imputed"]]
     complete_medians = df_complete["q5"].values
 
-    indices = np.argmin(np.abs(complete_medians[:, np.newaxis] - incomplete_medians[np.newaxis, :]), axis = 0)
+    indices = np.argmin(
+        np.abs(complete_medians[:, np.newaxis] - incomplete_medians[np.newaxis, :]),
+        axis=0,
+    )
 
     for k in range(1, 10):
-        df.loc[df["is_imputed"], "q%d" % k] = df_complete.iloc[indices]["q%d" % k].values
+        df.loc[df["is_imputed"], "q%d" % k] = df_complete.iloc[indices][
+            "q%d" % k
+        ].values
 
     # Second, add missing municipalities by neirest neighbor
     # ... build tree of existing communes
-    df_existing = df_municipalities[df_municipalities["commune_id"].astype(str).isin(df["commune_id"])] # pandas Bug
-    coordinates = np.vstack([df_existing["geometry"].centroid.x, df_existing["geometry"].centroid.y]).T
+    df_existing = df_municipalities[
+        df_municipalities["commune_id"].astype(str).isin(df["commune_id"])
+    ]  # pandas Bug
+    coordinates = np.vstack(
+        [df_existing["geometry"].centroid.x, df_existing["geometry"].centroid.y]
+    ).T
     kd_tree = KDTree(coordinates)
 
     # ... query tree for missing communes
-    df_missing = df_municipalities[df_municipalities["commune_id"].astype(str).isin(missing_communes)] # pandas Bug
+    df_missing = df_municipalities[
+        df_municipalities["commune_id"].astype(str).isin(missing_communes)
+    ]  # pandas Bug
 
     if len(df_missing) > 0:
-        coordinates = np.vstack([df_missing["geometry"].centroid.x, df_missing["geometry"].centroid.y]).T
+        coordinates = np.vstack(
+            [df_missing["geometry"].centroid.x, df_missing["geometry"].centroid.y]
+        ).T
         indices = kd_tree.query(coordinates)[1].flatten()
 
         # ... build data frame of imputed communes
-        df_reconstructed = pd.concat([
-            df[df["commune_id"] == df_existing.iloc[index]["commune_id"]]
-            for index in indices
-        ])
+        df_reconstructed = pd.concat(
+            [
+                df[df["commune_id"] == df_existing.iloc[index]["commune_id"]]
+                for index in indices
+            ]
+        )
         df_reconstructed["commune_id"] = df_missing["commune_id"].values
         df_reconstructed["is_imputed"] = True
         df_reconstructed["is_missing"] = True
@@ -97,11 +143,15 @@ def _income_distributions_from_filosofi_ensemble_sheet(filsofi_sheets, year, df_
     return df[INCOME_DF_COLUMNS]
 
 
-def _income_distributions_from_filosofi_attribute_sheets(filsofi_sheets, year, df_municipalities, attributes):
+def _income_distributions_from_filosofi_attribute_sheets(
+    filsofi_sheets, year, df_municipalities, attributes
+):
     requested_communes = set(df_municipalities["commune_id"].unique())
 
     # read attributes
-    df_with_attributes = read_filosofi_attributes(filsofi_sheets, year, attributes, requested_communes)
+    df_with_attributes = read_filosofi_attributes(
+        filsofi_sheets, year, attributes, requested_communes
+    )
 
     df_with_attributes.rename(
         columns={
@@ -139,8 +189,8 @@ def _read_filosofi_excel(context):
             sheet_list = sheet_list + [x["sheet"] for x in attr["modalities"]]
 
     # open and read income data file
-    with zipfile.ZipFile("{}/{}".format(
-            context.config("data_path"), context.config("income_com_path"))
+    with zipfile.ZipFile(
+        "{}/{}".format(context.config("data_path"), context.config("income_com_path"))
     ) as archive:
         with archive.open(context.config("income_com_xlsx")) as f:
             df = pd.read_excel(f, sheet_name=sheet_list, skiprows=5)
@@ -159,17 +209,25 @@ def execute(context):
     filosofi_excel, attributes = _read_filosofi_excel(context)
 
     # Read ENSEMBLE sheet: global distributions, by commune
-    ensemble_distributions = _income_distributions_from_filosofi_ensemble_sheet(filosofi_excel, year, df_municipalities)
+    ensemble_distributions = _income_distributions_from_filosofi_ensemble_sheet(
+        filosofi_excel, year, df_municipalities
+    )
 
     # Read attribute sheets: distributions on individuals with specific attribute values
     # (ex: sheet TYPMENR_2 corresponds to households with `family_comp`=`Single_wom`)
-    attribute_distributions = _income_distributions_from_filosofi_attribute_sheets(filosofi_excel, year, df_municipalities, attributes)
+    attribute_distributions = _income_distributions_from_filosofi_attribute_sheets(
+        filosofi_excel, year, df_municipalities, attributes
+    )
 
     return pd.concat([ensemble_distributions, attribute_distributions])
 
 
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("income_com_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("income_com_path"))
+    ):
         raise RuntimeError("Municipality Filosofi data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("income_com_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("income_com_path"))
+    )
diff --git a/data/income/region.py b/data/income/region.py
index 29643d0c..bb062ae3 100644
--- a/data/income/region.py
+++ b/data/income/region.py
@@ -6,19 +6,22 @@
 Loads the regional aggregated income distribution.
 """
 
+
 def configure(context):
     context.config("data_path")
-    context.config("income_reg_path", "filosofi_2019/indic-struct-distrib-revenu-2019-SUPRA.zip")
+    context.config(
+        "income_reg_path", "filosofi_2019/indic-struct-distrib-revenu-2019-SUPRA.zip"
+    )
     context.config("income_reg_xlsx", "FILO2019_DISP_REG.xlsx")
     context.config("income_year", 19)
 
+
 def execute(context):
-    with zipfile.ZipFile("{}/{}".format(
-        context.config("data_path"), context.config("income_reg_path"))) as archive:
+    with zipfile.ZipFile(
+        "{}/{}".format(context.config("data_path"), context.config("income_reg_path"))
+    ) as archive:
         with archive.open(context.config("income_reg_xlsx")) as f:
-            df = pd.read_excel(f,
-                sheet_name = "ENSEMBLE", skiprows = 5
-            )
+            df = pd.read_excel(f, sheet_name="ENSEMBLE", skiprows=5)
 
     values = df[df["CODGEO"] == 11][
         [
@@ -29,8 +32,13 @@ def execute(context):
 
     return values
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("income_reg_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("income_reg_path"))
+    ):
         raise RuntimeError("Regional Filosofi data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("income_reg_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("income_reg_path"))
+    )
diff --git a/data/od/cleaned.py b/data/od/cleaned.py
index e13348f1..2690cab9 100644
--- a/data/od/cleaned.py
+++ b/data/od/cleaned.py
@@ -6,25 +6,34 @@
 and education.
 """
 
+
 def configure(context):
     context.stage("data.od.raw")
     context.stage("data.spatial.codes")
 
-RENAME = { "COMMUNE" : "origin_id", "DCLT" : "destination_id", "IPONDI" : "weight", "DCETUF" : "destination_id" }
+
+RENAME = {
+    "COMMUNE": "origin_id",
+    "DCLT": "destination_id",
+    "IPONDI": "weight",
+    "DCETUF": "destination_id",
+}
+
 
 def execute(context):
-    
-    
+
     # Load data
     df_work, df_education = context.stage("data.od.raw")
 
     # Renaming
-    df_work = df_work.rename(RENAME, axis = 1)
-    df_education = df_education.rename(RENAME, axis = 1)
+    df_work = df_work.rename(RENAME, axis=1)
+    df_education = df_education.rename(RENAME, axis=1)
 
     # Fix arrondissements
     df_work.loc[~df_work["ARM"].str.contains("Z"), "origin_id"] = df_work["ARM"]
-    df_education.loc[~df_education["ARM"].str.contains("Z"), "origin_id"] = df_education["ARM"]
+    df_education.loc[~df_education["ARM"].str.contains("Z"), "origin_id"] = (
+        df_education["ARM"]
+    )
 
     # Verify spatial data for work
     df_codes = context.stage("data.spatial.codes")
@@ -32,7 +41,9 @@ def execute(context):
     df_work["origin_id"] = df_work["origin_id"].astype("category")
     df_work["destination_id"] = df_work["destination_id"].astype("category")
 
-    excess_communes = (set(df_work["origin_id"].unique()) | set(df_work["destination_id"].unique())) - set(df_codes["commune_id"].unique())
+    excess_communes = (
+        set(df_work["origin_id"].unique()) | set(df_work["destination_id"].unique())
+    ) - set(df_codes["commune_id"].unique())
     if len(excess_communes) > 0:
         raise RuntimeError("Found additional communes: %s" % excess_communes)
 
@@ -42,7 +53,10 @@ def execute(context):
     df_education["origin_id"] = df_education["origin_id"].astype("category")
     df_education["destination_id"] = df_education["destination_id"].astype("category")
 
-    excess_communes = (set(df_education["origin_id"].unique()) | set(df_education["destination_id"].unique())) - set(df_codes["commune_id"].unique())
+    excess_communes = (
+        set(df_education["origin_id"].unique())
+        | set(df_education["destination_id"].unique())
+    ) - set(df_codes["commune_id"].unique())
     if len(excess_communes) > 0:
         raise RuntimeError("Found additional communes: %s" % excess_communes)
 
@@ -55,7 +69,7 @@ def execute(context):
     df_work.loc[df_work["TRANS"] == 5, "commute_mode"] = "car"
     df_work.loc[df_work["TRANS"] == 6, "commute_mode"] = "pt"
     df_work["commute_mode"] = df_work["commute_mode"].astype("category")
-    
+
     assert not np.any(df_work["commute_mode"].isna())
 
     # Clean age range for education
@@ -65,15 +79,23 @@ def execute(context):
     df_education.loc[df_education["AGEREV10"] == 15, "age_range"] = "high_school"
     df_education.loc[df_education["AGEREV10"] >= 18, "age_range"] = "higher_education"
     df_education["age_range"] = df_education["age_range"].astype("category")
-    
+
     assert not np.any(df_education["age_range"].isna())
 
     # Aggregate the flows
     print("Aggregating work ...")
-    df_work = df_work.groupby(["origin_id", "destination_id", "commute_mode"])["weight"].sum().reset_index()
+    df_work = (
+        df_work.groupby(["origin_id", "destination_id", "commute_mode"])["weight"]
+        .sum()
+        .reset_index()
+    )
 
     print("Aggregating education ...")
-    df_education = df_education.groupby(["origin_id", "destination_id","age_range"])["weight"].sum().reset_index()
+    df_education = (
+        df_education.groupby(["origin_id", "destination_id", "age_range"])["weight"]
+        .sum()
+        .reset_index()
+    )
 
     df_work["weight"] = df_work["weight"].fillna(0.0)
     df_education["weight"] = df_education["weight"].fillna(0.0)
diff --git a/data/od/raw.py b/data/od/raw.py
index 41bc515b..fb70cce9 100644
--- a/data/od/raw.py
+++ b/data/od/raw.py
@@ -6,6 +6,7 @@
 Loads raw OD data from French census data.
 """
 
+
 def configure(context):
     context.stage("data.spatial.codes")
     context.config("data_path")
@@ -14,27 +15,34 @@ def configure(context):
     context.config("od_pro_csv", "FD_MOBPRO_2019.csv")
     context.config("od_sco_csv", "FD_MOBSCO_2019.csv")
 
+
 def execute(context):
     df_codes = context.stage("data.spatial.codes")
     requested_communes = df_codes["commune_id"].unique()
 
     # First, load work
-    with context.progress(label = "Reading work flows ...") as progress:
+    with context.progress(label="Reading work flows ...") as progress:
         df_records = []
 
         COLUMNS_DTYPES = {
-            "COMMUNE":"str", 
-            "ARM":"str", 
-            "TRANS":"int",
-            "IPONDI":"float", 
-            "DCLT":"str"
+            "COMMUNE": "str",
+            "ARM": "str",
+            "TRANS": "int",
+            "IPONDI": "float",
+            "DCLT": "str",
         }
 
         with zipfile.ZipFile(
-            "{}/{}".format(context.config("data_path"), context.config("od_pro_path"))) as archive:
+            "{}/{}".format(context.config("data_path"), context.config("od_pro_path"))
+        ) as archive:
             with archive.open(context.config("od_pro_csv")) as f:
-                csv = pd.read_csv(f, usecols = COLUMNS_DTYPES.keys(), 
-                                  dtype = COLUMNS_DTYPES, sep = ";",chunksize = 10240)
+                csv = pd.read_csv(
+                    f,
+                    usecols=COLUMNS_DTYPES.keys(),
+                    dtype=COLUMNS_DTYPES,
+                    sep=";",
+                    chunksize=10240,
+                )
 
                 for df_chunk in csv:
                     progress.update(len(df_chunk))
@@ -50,22 +58,28 @@ def execute(context):
     work = pd.concat(df_records)
 
     # Second, load education
-    with context.progress(label = "Reading education flows ...") as progress:
+    with context.progress(label="Reading education flows ...") as progress:
         df_records = []
 
         COLUMNS_DTYPES = {
-            "COMMUNE":"str", 
-            "ARM":"str", 
-            "IPONDI":"float",
-            "DCETUF":"str",
-            "AGEREV10":"int"
+            "COMMUNE": "str",
+            "ARM": "str",
+            "IPONDI": "float",
+            "DCETUF": "str",
+            "AGEREV10": "int",
         }
 
         with zipfile.ZipFile(
-            "{}/{}".format(context.config("data_path"), context.config("od_sco_path"))) as archive:
+            "{}/{}".format(context.config("data_path"), context.config("od_sco_path"))
+        ) as archive:
             with archive.open(context.config("od_sco_csv")) as f:
-                csv = pd.read_csv(f, usecols = COLUMNS_DTYPES.keys(), 
-                                  dtype = COLUMNS_DTYPES, sep = ";",chunksize = 10240)
+                csv = pd.read_csv(
+                    f,
+                    usecols=COLUMNS_DTYPES.keys(),
+                    dtype=COLUMNS_DTYPES,
+                    sep=";",
+                    chunksize=10240,
+                )
 
                 for df_chunk in csv:
                     progress.update(len(df_chunk))
@@ -84,13 +98,21 @@ def execute(context):
 
 
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("od_pro_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("od_pro_path"))
+    ):
         raise RuntimeError("RP MOBPRO data is not available")
 
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("od_sco_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("od_sco_path"))
+    ):
         raise RuntimeError("RP MOBSCO data is not available")
 
     return [
-        os.path.getsize("%s/%s" % (context.config("data_path"), context.config("od_pro_path"))),
-        os.path.getsize("%s/%s" % (context.config("data_path"), context.config("od_sco_path")))
+        os.path.getsize(
+            "%s/%s" % (context.config("data_path"), context.config("od_pro_path"))
+        ),
+        os.path.getsize(
+            "%s/%s" % (context.config("data_path"), context.config("od_sco_path"))
+        ),
     ]
diff --git a/data/od/weighted.py b/data/od/weighted.py
index f50702f6..e9c5e86e 100644
--- a/data/od/weighted.py
+++ b/data/od/weighted.py
@@ -9,13 +9,15 @@
 Potential TODO: Do this by mode of transport!
 """
 
+
 def configure(context):
     context.stage("data.od.cleaned")
     context.stage("data.spatial.codes")
 
-    context.config("education_location_source","bpe")
+    context.config("education_location_source", "bpe")
+
 
-def fix_origins(df, commune_ids, purpose,category): 
+def fix_origins(df, commune_ids, purpose, category):
     existing_ids = set(np.unique(df["origin_id"]))
     missing_ids = commune_ids - existing_ids
     categories = set(np.unique(df[category]))
@@ -23,14 +25,27 @@ def fix_origins(df, commune_ids, purpose,category):
     rows = []
     for origin_id in missing_ids:
         for destination_id in commune_ids:
-            for category_name in categories :
-                rows.append((origin_id, destination_id, category_name, 1.0 if origin_id == destination_id else 0.0))
+            for category_name in categories:
+                rows.append(
+                    (
+                        origin_id,
+                        destination_id,
+                        category_name,
+                        1.0 if origin_id == destination_id else 0.0,
+                    )
+                )
 
     print("Fixing %d origins for %s" % (len(missing_ids), purpose))
 
-    return pd.concat([df, pd.DataFrame.from_records(
-        rows, columns = ["origin_id", "destination_id", category, "weight"]
-    )]).sort_values(["origin_id", "destination_id"])
+    return pd.concat(
+        [
+            df,
+            pd.DataFrame.from_records(
+                rows, columns=["origin_id", "destination_id", category, "weight"]
+            ),
+        ]
+    ).sort_values(["origin_id", "destination_id"])
+
 
 def execute(context):
     df_codes = context.stage("data.spatial.codes")
@@ -40,22 +55,44 @@ def execute(context):
     df_work, df_education = context.stage("data.od.cleaned")
 
     # Add missing origins
-    df_work = fix_origins(df_work, commune_ids, "work","commute_mode")
-    df_education = fix_origins(df_education, commune_ids, "education","age_range")
+    df_work = fix_origins(df_work, commune_ids, "work", "commute_mode")
+    df_education = fix_origins(df_education, commune_ids, "education", "age_range")
 
     # Aggregate work (we do not consider different modes at the moment)
-    df_work = df_work[["origin_id", "destination_id", "weight"]].groupby(["origin_id", "destination_id"]).sum().reset_index()
-   
+    df_work = (
+        df_work[["origin_id", "destination_id", "weight"]]
+        .groupby(["origin_id", "destination_id"])
+        .sum()
+        .reset_index()
+    )
+
     # Compute totals
-    df_total = df_work[["origin_id", "weight"]].groupby("origin_id").sum().reset_index().rename({ "weight" : "total" }, axis = 1)
-    df_work = pd.merge(df_work, df_total, on = "origin_id")
+    df_total = (
+        df_work[["origin_id", "weight"]]
+        .groupby("origin_id")
+        .sum()
+        .reset_index()
+        .rename({"weight": "total"}, axis=1)
+    )
+    df_work = pd.merge(df_work, df_total, on="origin_id")
+
+    df_total = (
+        df_education[["origin_id", "age_range", "weight"]]
+        .groupby(["origin_id", "age_range"])
+        .sum()
+        .reset_index()
+        .rename({"weight": "total"}, axis=1)
+    )
+    df_education = pd.merge(df_education, df_total, on=["origin_id", "age_range"])
 
-    df_total = df_education[["origin_id","age_range", "weight"]].groupby(["origin_id","age_range"]).sum().reset_index().rename({ "weight" : "total" }, axis = 1)
-    df_education = pd.merge(df_education, df_total, on = ["origin_id","age_range"])
-    
-    if context.config("education_location_source") == 'bpe':
+    if context.config("education_location_source") == "bpe":
         # Aggregate education (we do not consider different age range with bpe source)
-        df_education = df_education[["origin_id", "destination_id", "weight","total"]].groupby(["origin_id", "destination_id"]).sum().reset_index()    
+        df_education = (
+            df_education[["origin_id", "destination_id", "weight", "total"]]
+            .groupby(["origin_id", "destination_id"])
+            .sum()
+            .reset_index()
+        )
     # Compute weight
     df_work["weight"] /= df_work["total"]
     df_education["weight"] /= df_education["total"]
@@ -63,5 +100,5 @@ def execute(context):
     del df_work["total"]
     del df_education["total"]
     df_education = df_education.fillna(0.0)
-    
+
     return df_work, df_education
diff --git a/data/osm/cleaned.py b/data/osm/cleaned.py
index c15de109..f7fb95cf 100644
--- a/data/osm/cleaned.py
+++ b/data/osm/cleaned.py
@@ -18,6 +18,7 @@
 Additionally, the stage cuts the OSM data to the requested region of the pipeline.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("osm_path", "osm_idf")
@@ -28,11 +29,12 @@ def configure(context):
     context.stage("data.osm.osmosis")
     context.stage("data.spatial.municipalities")
 
-def write_poly(df, path, geometry_column = "geometry"):
+
+def write_poly(df, path, geometry_column="geometry"):
     df = df.to_crs("EPSG:4326")
 
     df["aggregate"] = 0
-    area = df.dissolve(by = "aggregate")[geometry_column].values[0]
+    area = df.dissolve(by="aggregate")[geometry_column].values[0]
 
     if not hasattr(area, "exterior"):
         print("Selected area is not connected -> Using convex hull.")
@@ -51,9 +53,12 @@ def write_poly(df, path, geometry_column = "geometry"):
     with open(path, "w+") as f:
         f.write("\n".join(data))
 
+
 def execute(context):
-    input_files = get_input_files("{}/{}".format(context.config("data_path"), context.config("osm_path")))
-    
+    input_files = get_input_files(
+        "{}/{}".format(context.config("data_path"), context.config("osm_path"))
+    )
+
     # Prepare bounding area
     df_area = context.stage("data.spatial.municipalities")
     write_poly(df_area, "%s/boundary.poly" % context.path())
@@ -70,12 +75,22 @@ def execute(context):
 
         absolute_path = os.path.abspath(path)
 
-        data.osm.osmosis.run(context, [
-            "--read-%s" % mode, absolute_path,
-            "--tag-filter", "accept-ways", "highway=%s" % highway_tags, "railway=%s" % railway_tags,
-            "--bounding-polygon", "file=%s/boundary.poly" % context.path(), "completeWays=yes",
-            "--write-pbf", "filtered_%d.osm.pbf" % index
-        ])
+        data.osm.osmosis.run(
+            context,
+            [
+                "--read-%s" % mode,
+                absolute_path,
+                "--tag-filter",
+                "accept-ways",
+                "highway=%s" % highway_tags,
+                "railway=%s" % railway_tags,
+                "--bounding-polygon",
+                "file=%s/boundary.poly" % context.path(),
+                "completeWays=yes",
+                "--write-pbf",
+                "filtered_%d.osm.pbf" % index,
+            ],
+        )
 
     # Merge filtered files if there are multiple ones
     print("Merging and compressing OSM data...")
@@ -98,17 +113,23 @@ def execute(context):
 
     return "output.osm.gz"
 
+
 def get_input_files(base_path):
     osm_paths = sorted(list(glob.glob("{}/*.osm.pbf".format(base_path))))
     osm_paths += sorted(list(glob.glob("{}/*.osm.xml".format(base_path))))
 
     if len(osm_paths) == 0:
-        raise RuntimeError("Did not find any OSM data (.osm.pbf) in {}".format(base_path))
-    
+        raise RuntimeError(
+            "Did not find any OSM data (.osm.pbf) in {}".format(base_path)
+        )
+
     return osm_paths
 
+
 def validate(context):
-    input_files = get_input_files("{}/{}".format(context.config("data_path"), context.config("osm_path")))
+    input_files = get_input_files(
+        "{}/{}".format(context.config("data_path"), context.config("osm_path"))
+    )
     total_size = 0
 
     for path in input_files:
diff --git a/data/osm/osmosis.py b/data/osm/osmosis.py
index 3913ddf4..39959d5e 100644
--- a/data/osm/osmosis.py
+++ b/data/osm/osmosis.py
@@ -1,15 +1,17 @@
 import subprocess as sp
 import shutil, os
 
+
 def configure(context):
     context.config("osmosis_binary", "osmosis")
 
     context.config("java_binary", "java")
     context.config("java_memory", "50G")
 
-def run(context, arguments = [], cwd = None):
+
+def run(context, arguments=[], cwd=None):
     """
-        This function calls osmosis.
+    This function calls osmosis.
     """
     # Make sure there is a dependency
     context.stage("data.osm.osmosis")
@@ -18,9 +20,7 @@ def run(context, arguments = [], cwd = None):
         cwd = context.path()
 
     # Prepare command line
-    command_line = [
-        shutil.which(context.config("osmosis_binary"))
-    ] + arguments
+    command_line = [shutil.which(context.config("osmosis_binary"))] + arguments
 
     # Prepare environment
     environment = os.environ.copy()
@@ -28,20 +28,23 @@ def run(context, arguments = [], cwd = None):
     environment["JAVACMD_OPTIONS"] = "-Xmx%s" % context.config("java_memory")
 
     # Run Osmosis
-    return_code = sp.check_call(command_line, cwd = cwd, env = environment)
+    return_code = sp.check_call(command_line, cwd=cwd, env=environment)
 
     if not return_code == 0:
         raise RuntimeError("Osmosis return code: %d" % return_code)
 
+
 def validate(context):
     if shutil.which(context.config("osmosis_binary")) in ["", None]:
-        raise RuntimeError("Cannot find Osmosis binary at: %s" % context.config("osmosis_binary"))
+        raise RuntimeError(
+            "Cannot find Osmosis binary at: %s" % context.config("osmosis_binary")
+        )
 
-    if not b"0.48." in sp.check_output([
-        shutil.which(context.config("osmosis_binary")),
-        "-v"
-    ], stderr = sp.STDOUT):
+    if not b"0.48." in sp.check_output(
+        [shutil.which(context.config("osmosis_binary")), "-v"], stderr=sp.STDOUT
+    ):
         print("WARNING! Osmosis of at least version 0.48.x is recommended!")
 
+
 def execute(context):
     pass
diff --git a/data/sirene/cleaned.py b/data/sirene/cleaned.py
index 9bef6da5..0c5ef575 100644
--- a/data/sirene/cleaned.py
+++ b/data/sirene/cleaned.py
@@ -4,26 +4,27 @@
 """
 Clean the SIRENE enterprise census.
 """
- 
+
+
 def configure(context):
-    context.stage("data.sirene.raw_siren", ephemeral = True)
-    context.stage("data.sirene.raw_siret", ephemeral = True)
+    context.stage("data.sirene.raw_siren", ephemeral=True)
+    context.stage("data.sirene.raw_siret", ephemeral=True)
     context.stage("data.spatial.codes")
     context.config("exclude_no_employee", False)
 
+
 def execute(context):
     df_sirene_establishments = context.stage("data.sirene.raw_siret")
     df_sirene_headquarters = context.stage("data.sirene.raw_siren")
 
-
     # Filter out establishments without a corresponding headquarter
-    df_sirene = df_sirene_establishments[df_sirene_establishments["siren"].isin(df_sirene_headquarters["siren"])].copy()
+    df_sirene = df_sirene_establishments[
+        df_sirene_establishments["siren"].isin(df_sirene_headquarters["siren"])
+    ].copy()
 
     # Remove inactive enterprises
-    df_sirene = df_sirene[
-        df_sirene["etatAdministratifEtablissement"] == "A"
-    ].copy()
-    
+    df_sirene = df_sirene[df_sirene["etatAdministratifEtablissement"] == "A"].copy()
+
     if context.config("exclude_no_employee"):
         # exclude "NN", "00", and NaN
         df_sirene = df_sirene[
@@ -32,37 +33,93 @@ def execute(context):
         ].copy()
 
     # Define work place weights by person under salary ....
-    df_sirene["minimum_employees"] = 1 # Includes "NN", "00", and NaN
-    df_sirene["maximum_employees"] = 1 # Includes "NN", "00", and NaN
-
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "01", "minimum_employees"] = 1
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "01", "maximum_employees"] = 2
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "02", "minimum_employees"] = 3
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "02", "maximum_employees"] = 5
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "03", "minimum_employees"] = 6
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "03", "maximum_employees"] = 9
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "11", "minimum_employees"] = 10
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "11", "maximum_employees"] = 19
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "12", "minimum_employees"] = 20
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "12", "maximum_employees"] = 49
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "21", "minimum_employees"] = 50
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "21", "maximum_employees"] = 99
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "22", "minimum_employees"] = 100
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "22", "maximum_employees"] = 199
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "31", "minimum_employees"] = 200
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "31", "maximum_employees"] = 249
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "32", "minimum_employees"] = 250
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "32", "maximum_employees"] = 499
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "41", "minimum_employees"] = 500
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "41", "maximum_employees"] = 999
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "42", "minimum_employees"] = 1000
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "42", "maximum_employees"] = 1999
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "51", "minimum_employees"] = 2000
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "51", "maximum_employees"] = 4999
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "52", "minimum_employees"] = 5000
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "52", "maximum_employees"] = 9999
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "53", "minimum_employees"] = 10000
-    df_sirene.loc[df_sirene["trancheEffectifsEtablissement"] == "53", "maximum_employees"] = np.inf
+    df_sirene["minimum_employees"] = 1  # Includes "NN", "00", and NaN
+    df_sirene["maximum_employees"] = 1  # Includes "NN", "00", and NaN
+
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "01", "minimum_employees"
+    ] = 1
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "01", "maximum_employees"
+    ] = 2
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "02", "minimum_employees"
+    ] = 3
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "02", "maximum_employees"
+    ] = 5
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "03", "minimum_employees"
+    ] = 6
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "03", "maximum_employees"
+    ] = 9
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "11", "minimum_employees"
+    ] = 10
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "11", "maximum_employees"
+    ] = 19
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "12", "minimum_employees"
+    ] = 20
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "12", "maximum_employees"
+    ] = 49
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "21", "minimum_employees"
+    ] = 50
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "21", "maximum_employees"
+    ] = 99
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "22", "minimum_employees"
+    ] = 100
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "22", "maximum_employees"
+    ] = 199
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "31", "minimum_employees"
+    ] = 200
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "31", "maximum_employees"
+    ] = 249
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "32", "minimum_employees"
+    ] = 250
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "32", "maximum_employees"
+    ] = 499
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "41", "minimum_employees"
+    ] = 500
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "41", "maximum_employees"
+    ] = 999
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "42", "minimum_employees"
+    ] = 1000
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "42", "maximum_employees"
+    ] = 1999
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "51", "minimum_employees"
+    ] = 2000
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "51", "maximum_employees"
+    ] = 4999
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "52", "minimum_employees"
+    ] = 5000
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "52", "maximum_employees"
+    ] = 9999
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "53", "minimum_employees"
+    ] = 10000
+    df_sirene.loc[
+        df_sirene["trancheEffectifsEtablissement"] == "53", "maximum_employees"
+    ] = np.inf
 
     # Add activity classification
     df_sirene["ape"] = df_sirene["activitePrincipaleEtablissement"]
@@ -80,15 +137,24 @@ def execute(context):
     if len(excess_communes) > 5:
         raise RuntimeError("Found more than 5 excess municipalities in SIRENE data")
 
-    df_sirene = df_sirene[["siren", "commune_id", "minimum_employees", "maximum_employees", "ape", "siret"]]
+    df_sirene = df_sirene[
+        [
+            "siren",
+            "commune_id",
+            "minimum_employees",
+            "maximum_employees",
+            "ape",
+            "siret",
+        ]
+    ]
 
     # Add law status
     initial_count = len(df_sirene)
 
-    df_sirene = pd.merge(df_sirene, df_sirene_headquarters, on = "siren")
+    df_sirene = pd.merge(df_sirene, df_sirene_headquarters, on="siren")
 
     df_sirene["law_status"] = df_sirene["categorieJuridiqueUniteLegale"]
-    df_sirene = df_sirene.drop(columns =  ["categorieJuridiqueUniteLegale", "siren"])
+    df_sirene = df_sirene.drop(columns=["categorieJuridiqueUniteLegale", "siren"])
 
     final_count = len(df_sirene)
     assert initial_count == final_count
diff --git a/data/sirene/localized.py b/data/sirene/localized.py
index 243b51c7..e2111c1d 100644
--- a/data/sirene/localized.py
+++ b/data/sirene/localized.py
@@ -6,6 +6,8 @@
 Should we consider using location accuracy variable to optimize process?
 
 """
+
+
 def configure(context):
     context.stage("data.sirene.cleaned")
     context.stage("data.sirene.raw_geoloc")
@@ -15,19 +17,20 @@ def execute(context):
     df_sirene = context.stage("data.sirene.cleaned")
     df_siret_geoloc = context.stage("data.sirene.raw_geoloc")
 
-
     # merging geographical SIREN file (containing only SIRET and location) with full SIREN file (all variables and processed)
-    df_siret_geoloc.set_index(("siret"),inplace=True,verify_integrity=True)
-    df_sirene.set_index(("siret"),inplace=True,verify_integrity=True)
+    df_siret_geoloc.set_index(("siret"), inplace=True, verify_integrity=True)
+    df_sirene.set_index(("siret"), inplace=True, verify_integrity=True)
     df_siret_geoloc.sort_index(inplace=True)
     df_sirene.sort_index(inplace=True)
 
-    df_sirene = df_sirene.join(df_siret_geoloc,how="left")
-    df_sirene.dropna(subset=['x', 'y'],inplace=True)
-
+    df_sirene = df_sirene.join(df_siret_geoloc, how="left")
+    df_sirene.dropna(subset=["x", "y"], inplace=True)
 
     # convert to geopandas dataframe with Lambert 93, EPSG:2154 french official projection
-    df_sirene = gpd.GeoDataFrame(df_sirene, geometry=gpd.points_from_xy(df_sirene.x, df_sirene.y),crs="EPSG:2154")
-
+    df_sirene = gpd.GeoDataFrame(
+        df_sirene,
+        geometry=gpd.points_from_xy(df_sirene.x, df_sirene.y),
+        crs="EPSG:2154",
+    )
 
     return df_sirene
diff --git a/data/sirene/output.py b/data/sirene/output.py
index a64a9a27..87de6fe9 100644
--- a/data/sirene/output.py
+++ b/data/sirene/output.py
@@ -3,16 +3,20 @@
 makes it easy to extract the data set from the pipeline.
 """
 
+
 def configure(context):
     context.stage("data.sirene.localized")
 
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
 
+
 def execute(context):
     df_sirene = context.stage("data.sirene.localized")
     df_sirene["commune_id"] = df_sirene["commune_id"].astype(str)
 
-    df_sirene.to_file("%s/%ssirene.gpkg" % (
-        context.config("output_path"), context.config("output_prefix")), driver = "GPKG")
-    
+    df_sirene.to_file(
+        "%s/%ssirene.gpkg"
+        % (context.config("output_path"), context.config("output_prefix")),
+        driver="GPKG",
+    )
diff --git a/data/sirene/raw_geoloc.py b/data/sirene/raw_geoloc.py
index 7887710c..5537f499 100644
--- a/data/sirene/raw_geoloc.py
+++ b/data/sirene/raw_geoloc.py
@@ -5,10 +5,14 @@
 This stage loads the geolocalization data for the French enterprise registry.
 """
 
+
 def configure(context):
     context.config("data_path")
-    context.config("siret_geo_path", "sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip")
-    
+    context.config(
+        "siret_geo_path",
+        "sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip",
+    )
+
     context.stage("data.spatial.codes")
 
 
@@ -16,37 +20,47 @@ def execute(context):
     # Filter by departement
     df_codes = context.stage("data.spatial.codes")
     requested_departements = set(df_codes["departement_id"].unique())
-    
+
     COLUMNS_DTYPES = {
-        "siret":"int64", 
-        "x":"float", 
-        "y":"float",
-        "plg_code_commune":"str",
+        "siret": "int64",
+        "x": "float",
+        "y": "float",
+        "plg_code_commune": "str",
     }
 
-    df_siret_geoloc = pd.DataFrame(columns=["siret","x","y"])
-    
-    with context.progress(label = "Reading geolocalized SIRET ...") as progress:
-         csv = pd.read_csv("%s/%s" % (context.config("data_path"), context.config("siret_geo_path")), 
-                          usecols = COLUMNS_DTYPES.keys(), sep=";",dtype = COLUMNS_DTYPES,chunksize = 10240)
-    
-         for df_chunk in csv:
+    df_siret_geoloc = pd.DataFrame(columns=["siret", "x", "y"])
+
+    with context.progress(label="Reading geolocalized SIRET ...") as progress:
+        csv = pd.read_csv(
+            "%s/%s" % (context.config("data_path"), context.config("siret_geo_path")),
+            usecols=COLUMNS_DTYPES.keys(),
+            sep=";",
+            dtype=COLUMNS_DTYPES,
+            chunksize=10240,
+        )
+
+        for df_chunk in csv:
             progress.update(len(df_chunk))
-            
-            f = df_chunk["siret"].isna() # Just to get a mask
-            
+
+            f = df_chunk["siret"].isna()  # Just to get a mask
+
             for departement in requested_departements:
 
                 f |= df_chunk["plg_code_commune"].str.startswith(departement)
 
-            df_siret_geoloc = pd.concat([df_siret_geoloc, df_chunk[f]],ignore_index=True)
+            df_siret_geoloc = pd.concat(
+                [df_siret_geoloc, df_chunk[f]], ignore_index=True
+            )
 
     return df_siret_geoloc
 
- 
 
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("siret_geo_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("siret_geo_path"))
+    ):
         raise RuntimeError("SIRENE: geolocaized SIRET data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("siret_geo_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("siret_geo_path"))
+    )
diff --git a/data/sirene/raw_siren.py b/data/sirene/raw_siren.py
index 0a7d0ae5..a612f1ce 100644
--- a/data/sirene/raw_siren.py
+++ b/data/sirene/raw_siren.py
@@ -5,41 +5,48 @@
 This stage loads the raw data from the French enterprise registry.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("siren_path", "sirene/StockUniteLegale_utf8.zip")
 
     context.stage("data.sirene.raw_siret")
 
+
 def execute(context):
     relevant_siren = context.stage("data.sirene.raw_siret")["siren"].unique()
     df_siren = []
 
-
-
     COLUMNS_DTYPES = {
-        "siren":"int32", 
-        "categorieJuridiqueUniteLegale":"str", 
+        "siren": "int32",
+        "categorieJuridiqueUniteLegale": "str",
     }
-    
-    with context.progress(label = "Reading SIREN...") as progress:
-        csv = pd.read_csv("%s/%s" % (context.config("data_path"), context.config("siren_path")),
-              usecols = COLUMNS_DTYPES.keys(), dtype = COLUMNS_DTYPES,chunksize = 10240)
+
+    with context.progress(label="Reading SIREN...") as progress:
+        csv = pd.read_csv(
+            "%s/%s" % (context.config("data_path"), context.config("siren_path")),
+            usecols=COLUMNS_DTYPES.keys(),
+            dtype=COLUMNS_DTYPES,
+            chunksize=10240,
+        )
 
         for df_chunk in csv:
             progress.update(len(df_chunk))
 
-            df_chunk = df_chunk[
-                df_chunk["siren"].isin(relevant_siren)
-            ]
+            df_chunk = df_chunk[df_chunk["siren"].isin(relevant_siren)]
 
             if len(df_chunk) > 0:
                 df_siren.append(df_chunk)
 
     return pd.concat(df_siren)
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("siren_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("siren_path"))
+    ):
         raise RuntimeError("SIRENE: SIREN data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("siren_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("siren_path"))
+    )
diff --git a/data/sirene/raw_siret.py b/data/sirene/raw_siret.py
index 7b10713a..0bbadbcd 100644
--- a/data/sirene/raw_siret.py
+++ b/data/sirene/raw_siret.py
@@ -5,12 +5,14 @@
 This stage loads the raw data from the French enterprise registry.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("siret_path", "sirene/StockEtablissement_utf8.zip")
 
     context.stage("data.spatial.codes")
 
+
 def execute(context):
     # Filter by departement
     df_codes = context.stage("data.spatial.codes")
@@ -18,24 +20,27 @@ def execute(context):
 
     df_siret = []
 
-
     COLUMNS_DTYPES = {
-        "siren":"int32", 
-        "siret":"int64", 
-        "codeCommuneEtablissement":"str",
-        "activitePrincipaleEtablissement":"str", 
-        "trancheEffectifsEtablissement":"str",
-        "etatAdministratifEtablissement":"str"
+        "siren": "int32",
+        "siret": "int64",
+        "codeCommuneEtablissement": "str",
+        "activitePrincipaleEtablissement": "str",
+        "trancheEffectifsEtablissement": "str",
+        "etatAdministratifEtablissement": "str",
     }
-    
-    with context.progress(label = "Reading SIRET...") as progress:
-        csv = pd.read_csv("%s/%s" % (context.config("data_path"), context.config("siret_path")),
-                          usecols = COLUMNS_DTYPES.keys(), dtype = COLUMNS_DTYPES,chunksize = 10240)
+
+    with context.progress(label="Reading SIRET...") as progress:
+        csv = pd.read_csv(
+            "%s/%s" % (context.config("data_path"), context.config("siret_path")),
+            usecols=COLUMNS_DTYPES.keys(),
+            dtype=COLUMNS_DTYPES,
+            chunksize=10240,
+        )
 
         for df_chunk in csv:
             progress.update(len(df_chunk))
 
-            f = df_chunk["codeCommuneEtablissement"].isna() # Just to get a mask
+            f = df_chunk["codeCommuneEtablissement"].isna()  # Just to get a mask
 
             for departement in requested_departements:
                 f |= df_chunk["codeCommuneEtablissement"].str.startswith(departement)
@@ -46,11 +51,15 @@ def execute(context):
             if len(df_chunk) > 0:
                 df_siret.append(df_chunk)
 
-
     return pd.concat(df_siret)
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("siret_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("siret_path"))
+    ):
         raise RuntimeError("SIRENE: SIRET data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("siret_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("siret_path"))
+    )
diff --git a/data/spatial/centroid_distances.py b/data/spatial/centroid_distances.py
index b84d6371..ff8507de 100644
--- a/data/spatial/centroid_distances.py
+++ b/data/spatial/centroid_distances.py
@@ -1,18 +1,32 @@
 import pandas as pd
 
+
 def configure(context):
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
     df = context.stage("data.spatial.municipalities")
     records = []
 
-    with context.progress(total = len(df)**2, label = "Calculating centroid distances ...") as progress:
+    with context.progress(
+        total=len(df) ** 2, label="Calculating centroid distances ..."
+    ) as progress:
         for origin_id, origin_geometry in zip(df["commune_id"], df["geometry"]):
-            for destination_id, destination_geometry in zip(df["commune_id"], df["geometry"]):
-                records.append((
-                    origin_id, destination_id, origin_geometry.centroid.distance(destination_geometry.centroid)
-                ))
+            for destination_id, destination_geometry in zip(
+                df["commune_id"], df["geometry"]
+            ):
+                records.append(
+                    (
+                        origin_id,
+                        destination_id,
+                        origin_geometry.centroid.distance(
+                            destination_geometry.centroid
+                        ),
+                    )
+                )
                 progress.update()
 
-    return pd.DataFrame.from_records(records, columns = ["origin_id", "destination_id", "centroid_distance"])
+    return pd.DataFrame.from_records(
+        records, columns=["origin_id", "destination_id", "centroid_distance"]
+    )
diff --git a/data/spatial/code_changes.py b/data/spatial/code_changes.py
index a65df499..4c80a724 100644
--- a/data/spatial/code_changes.py
+++ b/data/spatial/code_changes.py
@@ -10,43 +10,58 @@
 YEAR = 2021
 SOURCE = "codes_%d/reference_IRIS_geo%d.xlsx" % (YEAR, YEAR)
 
+
 def configure(context):
     context.config("data_path")
 
     context.config("regions", [11])
     context.config("departments", [])
 
+
 def execute(context):
     # Load IRIS registry
     df_modifications = pd.read_excel(
         "%s/%s" % (context.config("data_path"), SOURCE),
-        skiprows = 5, sheet_name = "Modifications_IRIS"
-    )[["IRIS_INI", "IRIS_FIN", "COM_INI", "COM_FIN"]].rename(columns = {
-        "IRIS_INI": "initial_iris", "IRIS_FIN": "final_iris",
-        "COM_INI": "initial_commune", "COM_FIN": "final_commune"
-    })
+        skiprows=5,
+        sheet_name="Modifications_IRIS",
+    )[["IRIS_INI", "IRIS_FIN", "COM_INI", "COM_FIN"]].rename(
+        columns={
+            "IRIS_INI": "initial_iris",
+            "IRIS_FIN": "final_iris",
+            "COM_INI": "initial_commune",
+            "COM_FIN": "final_commune",
+        }
+    )
 
-    df_modifications["initial_iris"] = df_modifications["initial_iris"].astype("category")
+    df_modifications["initial_iris"] = df_modifications["initial_iris"].astype(
+        "category"
+    )
     df_modifications["final_iris"] = df_modifications["final_iris"].astype("category")
-    df_modifications["initial_commune"] = df_modifications["initial_commune"].astype("category")
-    df_modifications["final_commune"] = df_modifications["final_commune"].astype("category")
+    df_modifications["initial_commune"] = df_modifications["initial_commune"].astype(
+        "category"
+    )
+    df_modifications["final_commune"] = df_modifications["final_commune"].astype(
+        "category"
+    )
 
     return df_modifications
 
+
 def validate(context):
     if not os.path.exists("%s/%s" % (context.config("data_path"), SOURCE)):
         raise RuntimeError("Spatial reference codes are not available")
 
     return os.path.getsize("%s/%s" % (context.config("data_path"), SOURCE))
 
+
 def update(df_changes, level, values):
     initial_slot = "initial_%s" % level
     final_slot = "final_%s" % level
 
     df_source = df_changes[df_changes[initial_slot].isin(values.unique())]
-    dictionary = { k: v for k, v in zip(df_source[initial_slot], df_source[final_slot]) }
+    dictionary = {k: v for k, v in zip(df_source[initial_slot], df_source[final_slot])}
 
     if len(dictionary) > 0:
         print("Updating %d deprecated zone identifiers ..." % len(dictionary))
-        
+
     return values.replace(dictionary)
diff --git a/data/spatial/codes.py b/data/spatial/codes.py
index 38200a14..c7049363 100644
--- a/data/spatial/codes.py
+++ b/data/spatial/codes.py
@@ -8,6 +8,7 @@
 departement and région.
 """
 
+
 def configure(context):
     context.config("data_path")
 
@@ -16,19 +17,23 @@ def configure(context):
     context.config("codes_path", "codes_2021/reference_IRIS_geo2021.zip")
     context.config("codes_xlsx", "reference_IRIS_geo2021.xlsx")
 
+
 def execute(context):
     # Load IRIS registry
     with zipfile.ZipFile(
-        "{}/{}".format(context.config("data_path"), context.config("codes_path"))) as archive:
+        "{}/{}".format(context.config("data_path"), context.config("codes_path"))
+    ) as archive:
         with archive.open(context.config("codes_xlsx")) as f:
-            df_codes = pd.read_excel(f,
-                skiprows = 5, sheet_name = "Emboitements_IRIS"
-            )[["CODE_IRIS", "DEPCOM", "DEP", "REG"]].rename(columns = {
-                "CODE_IRIS": "iris_id",
-                "DEPCOM": "commune_id",
-                "DEP": "departement_id",
-                "REG": "region_id"
-            })
+            df_codes = pd.read_excel(f, skiprows=5, sheet_name="Emboitements_IRIS")[
+                ["CODE_IRIS", "DEPCOM", "DEP", "REG"]
+            ].rename(
+                columns={
+                    "CODE_IRIS": "iris_id",
+                    "DEPCOM": "commune_id",
+                    "DEP": "departement_id",
+                    "REG": "region_id",
+                }
+            )
 
     df_codes["iris_id"] = df_codes["iris_id"].astype("category")
     df_codes["commune_id"] = df_codes["commune_id"].astype("category")
@@ -47,12 +52,19 @@ def execute(context):
 
     df_codes["iris_id"] = df_codes["iris_id"].cat.remove_unused_categories()
     df_codes["commune_id"] = df_codes["commune_id"].cat.remove_unused_categories()
-    df_codes["departement_id"] = df_codes["departement_id"].cat.remove_unused_categories()
+    df_codes["departement_id"] = df_codes[
+        "departement_id"
+    ].cat.remove_unused_categories()
 
     return df_codes
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("codes_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("codes_path"))
+    ):
         raise RuntimeError("Spatial reference codes are not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("codes_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("codes_path"))
+    )
diff --git a/data/spatial/departments.py b/data/spatial/departments.py
index c055e51e..15b669af 100644
--- a/data/spatial/departments.py
+++ b/data/spatial/departments.py
@@ -7,11 +7,17 @@
 Provides the municipality zoning system.
 """
 
+
 def configure(context):
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    df_departements = context.stage("data.spatial.municipalities").dissolve(
-        by = "departement_id").drop(columns = ["commune_id", "has_iris"]).reset_index()
+    df_departements = (
+        context.stage("data.spatial.municipalities")
+        .dissolve(by="departement_id")
+        .drop(columns=["commune_id", "has_iris"])
+        .reset_index()
+    )
 
     return df_departements
diff --git a/data/spatial/iris.py b/data/spatial/iris.py
index 8f10457a..56603084 100644
--- a/data/spatial/iris.py
+++ b/data/spatial/iris.py
@@ -8,35 +8,35 @@
 Loads the IRIS zoning system.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("iris_path", "iris_2021")
     context.stage("data.spatial.codes")
 
+
 def execute(context):
     df_codes = context.stage("data.spatial.codes")
 
-    source_path = find_iris("{}/{}".format(context.config("data_path"), context.config("iris_path")))
+    source_path = find_iris(
+        "{}/{}".format(context.config("data_path"), context.config("iris_path"))
+    )
 
     with py7zr.SevenZipFile(source_path) as archive:
-        contour_paths = [
-            path for path in archive.getnames()
-            if "LAMB93" in path
-        ]
+        contour_paths = [path for path in archive.getnames() if "LAMB93" in path]
 
         archive.extract(context.path(), contour_paths)
-    
+
     shp_path = [path for path in contour_paths if path.endswith(".shp")]
 
     if len(shp_path) != 1:
-        raise RuntimeError("Cannot find IRIS shapes inside the archive, please report this as an error!")
+        raise RuntimeError(
+            "Cannot find IRIS shapes inside the archive, please report this as an error!"
+        )
 
-    df_iris = gpd.read_file("{}/{}".format(context.path(), shp_path[0]))[[
-        "CODE_IRIS", "INSEE_COM", "geometry"
-    ]].rename(columns = {
-        "CODE_IRIS": "iris_id",
-        "INSEE_COM": "commune_id"
-    })
+    df_iris = gpd.read_file("{}/{}".format(context.path(), shp_path[0]))[
+        ["CODE_IRIS", "INSEE_COM", "geometry"]
+    ].rename(columns={"CODE_IRIS": "iris_id", "INSEE_COM": "commune_id"})
 
     df_iris.crs = "EPSG:2154"
 
@@ -44,28 +44,35 @@ def execute(context):
     df_iris["commune_id"] = df_iris["commune_id"].astype("category")
 
     # Merge with requested codes and verify integrity
-    df_iris = pd.merge(df_iris, df_codes, on = ["iris_id", "commune_id"])
+    df_iris = pd.merge(df_iris, df_codes, on=["iris_id", "commune_id"])
 
     requested_iris = set(df_codes["iris_id"].unique())
     merged_iris = set(df_iris["iris_id"].unique())
 
     if requested_iris != merged_iris:
-        raise RuntimeError("Some IRIS are missing: %s" % (requested_iris - merged_iris,))
+        raise RuntimeError(
+            "Some IRIS are missing: %s" % (requested_iris - merged_iris,)
+        )
 
     return df_iris
 
+
 def find_iris(path):
     candidates = sorted(list(glob.glob("{}/*.7z".format(path))))
 
     if len(candidates) == 0:
         raise RuntimeError("IRIS data is not available in {}".format(path))
-    
+
     if len(candidates) > 1:
-        raise RuntimeError("Multiple candidates for IRIS are available in {}".format(path))
-    
+        raise RuntimeError(
+            "Multiple candidates for IRIS are available in {}".format(path)
+        )
+
     return candidates[0]
 
 
 def validate(context):
-    path = find_iris("{}/{}".format(context.config("data_path"), context.config("iris_path")))
+    path = find_iris(
+        "{}/{}".format(context.config("data_path"), context.config("iris_path"))
+    )
     return os.path.getsize(path)
diff --git a/data/spatial/municipalities.py b/data/spatial/municipalities.py
index b46eb696..71c553cf 100644
--- a/data/spatial/municipalities.py
+++ b/data/spatial/municipalities.py
@@ -7,14 +7,20 @@
 Provides the municipality zoning system.
 """
 
+
 def configure(context):
     context.stage("data.spatial.iris")
 
+
 def execute(context):
     df_iris = context.stage("data.spatial.iris")
     df_iris["has_iris"] = ~df_iris["iris_id"].astype(str).str.endswith("0000")
 
-    df_municipalities = context.stage("data.spatial.iris").dissolve(
-        by = "commune_id").drop(columns = ["iris_id"]).reset_index()
+    df_municipalities = (
+        context.stage("data.spatial.iris")
+        .dissolve(by="commune_id")
+        .drop(columns=["iris_id"])
+        .reset_index()
+    )
 
     return df_municipalities
diff --git a/data/spatial/population.py b/data/spatial/population.py
index 04ab94bb..624df8ce 100644
--- a/data/spatial/population.py
+++ b/data/spatial/population.py
@@ -6,6 +6,7 @@
 Loads aggregate population data.
 """
 
+
 def configure(context):
     context.config("data_path")
     context.stage("data.spatial.codes")
@@ -13,19 +14,28 @@ def configure(context):
     context.config("population_xlsx", "base-ic-evol-struct-pop-2019.xlsx")
     context.config("population_year", 19)
 
+
 def execute(context):
     year = str(context.config("population_year"))
 
     with zipfile.ZipFile(
-        "{}/{}".format(context.config("data_path"), context.config("population_path"))) as archive:
+        "{}/{}".format(context.config("data_path"), context.config("population_path"))
+    ) as archive:
         with archive.open(context.config("population_xlsx")) as f:
             df_population = pd.read_excel(
                 f,
-                skiprows = 5, sheet_name = "IRIS", usecols = ["IRIS", "COM", "DEP", "REG", "P%s_POP" % year]
-            ).rename(columns = {
-                "IRIS": "iris_id", "COM": "commune_id", "DEP": "departement_id", "REG": "region_id",
-                "P%s_POP" % year: "population"
-            })
+                skiprows=5,
+                sheet_name="IRIS",
+                usecols=["IRIS", "COM", "DEP", "REG", "P%s_POP" % year],
+            ).rename(
+                columns={
+                    "IRIS": "iris_id",
+                    "COM": "commune_id",
+                    "DEP": "departement_id",
+                    "REG": "region_id",
+                    "P%s_POP" % year: "population",
+                }
+            )
 
     df_population["iris_id"] = df_population["iris_id"].astype("category")
     df_population["commune_id"] = df_population["commune_id"].astype("category")
@@ -34,19 +44,31 @@ def execute(context):
 
     # Merge into code data and verify integrity
     df_codes = context.stage("data.spatial.codes")
-    df_population = pd.merge(df_population, df_codes, on = ["iris_id", "commune_id", "departement_id", "region_id"])
+    df_population = pd.merge(
+        df_population,
+        df_codes,
+        on=["iris_id", "commune_id", "departement_id", "region_id"],
+    )
 
     requested_iris = set(df_codes["iris_id"].unique())
     merged_iris = set(df_population["iris_id"].unique())
 
     if requested_iris != merged_iris:
-        raise RuntimeError("Some IRIS are missing: %s" % (requested_iris - merged_iris,))
+        raise RuntimeError(
+            "Some IRIS are missing: %s" % (requested_iris - merged_iris,)
+        )
+
+    return df_population[
+        ["region_id", "departement_id", "commune_id", "iris_id", "population"]
+    ]
 
-    return df_population[["region_id", "departement_id", "commune_id", "iris_id", "population"]]
 
 def validate(context):
-    if not os.path.exists("{}/{}".format(context.config("data_path"), context.config("population_path"))):
+    if not os.path.exists(
+        "{}/{}".format(context.config("data_path"), context.config("population_path"))
+    ):
         raise RuntimeError("Aggregated census data is not available")
 
-    return os.path.getsize("{}/{}".format(context.config("data_path"), context.config("population_path")))
-     
\ No newline at end of file
+    return os.path.getsize(
+        "{}/{}".format(context.config("data_path"), context.config("population_path"))
+    )
diff --git a/data/spatial/urban_type.py b/data/spatial/urban_type.py
index 7e5c0c26..4f80a954 100644
--- a/data/spatial/urban_type.py
+++ b/data/spatial/urban_type.py
@@ -5,56 +5,73 @@
 
 # START Money patching openpyxl to parse INSEE file
 from openpyxl.styles.colors import WHITE, RGB
+
 __old_rgb_set__ = RGB.__set__
 
+
 def __rgb_set_fixed__(self, instance, value):
     try:
         __old_rgb_set__(self, instance, value)
     except ValueError as e:
-        if e.args[0] == 'Colors must be aRGB hex values':
+        if e.args[0] == "Colors must be aRGB hex values":
             __old_rgb_set__(self, instance, WHITE)
 
+
 RGB.__set__ = __rgb_set_fixed__
 # END Monkey patching openpyxl
 
 # Loads the input data for the urban type (unité urbain)
 
+
 def configure(context):
     context.stage("data.spatial.municipalities")
 
     context.config("data_path")
     context.config("urban_type_path", "urban_type/UU2020_au_01-01-2023.zip")
 
+
 def execute(context):
-    with zipfile.ZipFile("{}/{}".format(
-        context.config("data_path"), context.config("urban_type_path"))) as archive:
+    with zipfile.ZipFile(
+        "{}/{}".format(context.config("data_path"), context.config("urban_type_path"))
+    ) as archive:
         assert len(archive.filelist) == 1
         with archive.open(archive.filelist[0]) as f:
-            df = pd.read_excel(f, sheet_name = "Composition_communale", skiprows = 5)
-            
+            df = pd.read_excel(f, sheet_name="Composition_communale", skiprows=5)
+
     df = df[["CODGEO", "STATUT_2017"]].copy()
-    df = df.set_axis(["commune_id", "urban_type"], axis = "columns")
+    df = df.set_axis(["commune_id", "urban_type"], axis="columns")
 
     # Cities that have districts are not detailed in the UU file, only the whole city is mentioned
     # However the municipalities file details the districts with their respective INSEE codes
-    cities_with_districts = {"75056": [str(75101 + i) for i in (range(20))],  # Paris
-                             "69123": [str(69001 + i) for i in range(9)],  # Lyon
-                             "13055": [str(13201 + i) for i in range(15)]}  # Marseilles
+    cities_with_districts = {
+        "75056": [str(75101 + i) for i in (range(20))],  # Paris
+        "69123": [str(69001 + i) for i in range(9)],  # Lyon
+        "13055": [str(13201 + i) for i in range(15)],
+    }  # Marseilles
 
     # Replacing each line of the UU file corresponding to a city with districts by multiple lines one for each districts
     for city_code in cities_with_districts:
         base_type = df[df["commune_id"] == city_code].iloc[0]["urban_type"]
         replacement_codes = cities_with_districts[city_code]
 
-        df = pd.concat([df, pd.DataFrame({
-            "commune_id": replacement_codes,
-            "urban_type": [base_type] * len(replacement_codes)
-        })])
-    
+        df = pd.concat(
+            [
+                df,
+                pd.DataFrame(
+                    {
+                        "commune_id": replacement_codes,
+                        "urban_type": [base_type] * len(replacement_codes),
+                    }
+                ),
+            ]
+        )
+
     df = df[~df["commune_id"].isin(cities_with_districts.keys())]
 
     # Clean unités urbaines
-    df["urban_type"] = df["urban_type"].replace({"B":"suburb","C":"central_city","I":"isolated_city","H":"none"})
+    df["urban_type"] = df["urban_type"].replace(
+        {"B": "suburb", "C": "central_city", "I": "isolated_city", "H": "none"}
+    )
     assert np.all(~df["urban_type"].isna())
     df["urban_type"] = df["urban_type"].astype("category")
 
@@ -66,8 +83,13 @@ def execute(context):
 
     return df
 
+
 def validate(context):
-    if not os.path.exists("%s/%s" % (context.config("data_path"), context.config("urban_type_path"))):
+    if not os.path.exists(
+        "%s/%s" % (context.config("data_path"), context.config("urban_type_path"))
+    ):
         raise RuntimeError("Urban type data is not available")
 
-    return os.path.getsize("%s/%s" % (context.config("data_path"), context.config("urban_type_path")))
+    return os.path.getsize(
+        "%s/%s" % (context.config("data_path"), context.config("urban_type_path"))
+    )
diff --git a/data/spatial/utils.py b/data/spatial/utils.py
index 29b272d4..048116b4 100644
--- a/data/spatial/utils.py
+++ b/data/spatial/utils.py
@@ -3,20 +3,23 @@
 import geopandas as gpd
 import pandas as pd
 
-def to_gpd(context, df, x = "x", y = "y", crs = "EPSG:2154", column = "geometry"):
+
+def to_gpd(context, df, x="x", y="y", crs="EPSG:2154", column="geometry"):
     df[column] = [
-        geo.Point(*coord) for coord in context.progress(
-            zip(df[x], df[y]), total = len(df),
-            label = "Converting coordinates"
-        )]
-    df = gpd.GeoDataFrame(df, crs = "EPSG:2154", geometry = column)
+        geo.Point(*coord)
+        for coord in context.progress(
+            zip(df[x], df[y]), total=len(df), label="Converting coordinates"
+        )
+    ]
+    df = gpd.GeoDataFrame(df, crs="EPSG:2154", geometry=column)
 
     if not df.crs == "EPSG:2154":
         df = df.to_crs("EPSG:2154")
 
     return df
 
-def sample_from_shape(shape, count, random, sample_size = None):
+
+def sample_from_shape(shape, count, random, sample_size=None):
     points = []
 
     if sample_size is None:
@@ -24,15 +27,16 @@ def sample_from_shape(shape, count, random, sample_size = None):
 
     while len(points) < count:
         minx, miny, maxx, maxy = shape.bounds
-        candidates = random.random_sample(size = (sample_size, 2))
-        candidates[:,0] = minx + candidates[:,0] * (maxx - minx)
-        candidates[:,1] = miny + candidates[:,1] * (maxy - miny)
+        candidates = random.random_sample(size=(sample_size, 2))
+        candidates[:, 0] = minx + candidates[:, 0] * (maxx - minx)
+        candidates[:, 1] = miny + candidates[:, 1] * (maxy - miny)
         candidates = [geo.Point(*point) for point in candidates]
         candidates = [point for point in candidates if shape.contains(point)]
         points += candidates
 
     return np.array([(point.x, point.y) for point in points[:count]])
 
+
 def _sample_from_zones(context, args):
     attribute_value, random_seed = args
 
@@ -46,9 +50,12 @@ def _sample_from_zones(context, args):
     f = df[attribute] == attribute_value
     coordinates = sample_from_shape(zone, np.count_nonzero(f), random)
 
-    return pd.DataFrame(coordinates, columns = ["x", "y"], index = f[f].index)
+    return pd.DataFrame(coordinates, columns=["x", "y"], index=f[f].index)
+
 
-def sample_from_zones(context, df_zones, df, attribute, random, label = "Sampling coordinates ..."):
+def sample_from_zones(
+    context, df_zones, df, attribute, random, label="Sampling coordinates ..."
+):
     assert attribute in df
     assert attribute in df_zones
 
@@ -57,8 +64,14 @@ def sample_from_zones(context, df_zones, df, attribute, random, label = "Samplin
 
     df_result = []
 
-    with context.parallel(dict(df_zones = df_zones, df = df, attribute = attribute)) as parallel:
-        for df_partial in context.progress(parallel.imap(_sample_from_zones, zip(unique_values, random_seeds)), label = label, total = len(unique_values)):
+    with context.parallel(
+        dict(df_zones=df_zones, df=df, attribute=attribute)
+    ) as parallel:
+        for df_partial in context.progress(
+            parallel.imap(_sample_from_zones, zip(unique_values, random_seeds)),
+            label=label,
+            total=len(unique_values),
+        ):
             df_result.append(df_partial)
 
     return pd.concat(df_result)
diff --git a/data/tiles/raw.py b/data/tiles/raw.py
index b42a5d33..7af35c73 100644
--- a/data/tiles/raw.py
+++ b/data/tiles/raw.py
@@ -9,6 +9,7 @@
 This stage loads the raw data from the French population income, poverty and living standards in tiled data.
 """
 
+
 def configure(context):
     context.stage("data.spatial.departments")
     context.config("data_path")
@@ -62,4 +63,4 @@ def validate(context):
 
     return os.path.getsize(
         "{}/{}".format(context.config("data_path"), context.config("tiles_path"))
-    )
\ No newline at end of file
+    )
diff --git a/data/vehicles/raw.py b/data/vehicles/raw.py
index 95a9fc31..b726ab63 100644
--- a/data/vehicles/raw.py
+++ b/data/vehicles/raw.py
@@ -9,31 +9,49 @@
 https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021
 """
 
+
 def configure(context):
     context.config("data_path")
     context.config("vehicles_path", "vehicles")
     context.config("vehicles_year", 2021)
     context.stage("data.spatial.codes")
 
+
 def execute(context):
     df_codes = context.stage("data.spatial.codes")
 
     # the downloaded excel files meta-data are actually have a badly formatted ISO datetime
-    # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1659 
-    with mock.patch.object(excel.ExcelReader, 'read_properties', lambda self: None):
+    # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1659
+    with mock.patch.object(excel.ExcelReader, "read_properties", lambda self: None):
         year = str(context.config("vehicles_year"))
-        
-        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")) as archive:
+
+        with zipfile.ZipFile(
+            "{}/{}/{}".format(
+                context.config("data_path"),
+                context.config("vehicles_path"),
+                "parc_vp_communes.zip",
+            )
+        ) as archive:
             with archive.open("Parc_VP_Communes_{}.xlsx".format(year)) as f:
                 df_municipalities = pd.read_excel(f)
 
-        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")) as archive:
+        with zipfile.ZipFile(
+            "{}/{}/{}".format(
+                context.config("data_path"),
+                context.config("vehicles_path"),
+                "parc_vp_regions.zip",
+            )
+        ) as archive:
             with archive.open("Parc_VP_Regions_{}.xlsx".format(year)) as f:
                 df_regions = pd.read_excel(f)
-    
+
     df_municipalities["region_id"] = df_municipalities["Code région"].astype("category")
-    df_municipalities["departement_id"] = df_municipalities["Code départment"].astype("category")
-    df_municipalities["commune_id"] = df_municipalities["Code commune"].astype("category")
+    df_municipalities["departement_id"] = df_municipalities["Code départment"].astype(
+        "category"
+    )
+    df_municipalities["commune_id"] = df_municipalities["Code commune"].astype(
+        "category"
+    )
 
     df_regions["region_id"] = df_regions["Code région"].astype("category")
 
@@ -41,14 +59,22 @@ def execute(context):
     requested_regions = set(df_codes["region_id"].astype(str).unique())
 
     if len(requested_departements) > 0:
-        df_municipalities = df_municipalities[df_municipalities["departement_id"].isin(requested_departements)]
+        df_municipalities = df_municipalities[
+            df_municipalities["departement_id"].isin(requested_departements)
+        ]
 
     if len(requested_regions) > 0:
         df_regions = df_regions[df_regions["region_id"].isin(requested_regions)]
 
-    df_municipalities["region_id"] = df_municipalities["region_id"].cat.remove_unused_categories()
-    df_municipalities["departement_id"] = df_municipalities["departement_id"].cat.remove_unused_categories()
-    df_municipalities["commune_id"] = df_municipalities["commune_id"].cat.remove_unused_categories()
+    df_municipalities["region_id"] = df_municipalities[
+        "region_id"
+    ].cat.remove_unused_categories()
+    df_municipalities["departement_id"] = df_municipalities[
+        "departement_id"
+    ].cat.remove_unused_categories()
+    df_municipalities["commune_id"] = df_municipalities[
+        "commune_id"
+    ].cat.remove_unused_categories()
 
     df_regions["region_id"] = df_regions["region_id"].cat.remove_unused_categories()
 
@@ -65,19 +91,46 @@ def execute(context):
     df_regions["fleet"] = df_regions[count_column_name]
     df_regions["age"] = df_regions[age_column_name]
 
-    df_vehicle_fleet_counts = df_municipalities.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna()
-    df_vehicle_age_counts = df_regions.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna()
+    df_vehicle_fleet_counts = (
+        df_municipalities.groupby(["region_id", "commune_id", "critair", "technology"])[
+            "fleet"
+        ]
+        .sum()
+        .reset_index()
+        .dropna()
+    )
+    df_vehicle_age_counts = (
+        df_regions.groupby(["region_id", "critair", "technology", "age"])["fleet"]
+        .sum()
+        .reset_index()
+        .dropna()
+    )
 
     return df_vehicle_fleet_counts, df_vehicle_age_counts
 
+
 def validate(context):
-    municipalities_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")
-    regions_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")
+    municipalities_path = "{}/{}/{}".format(
+        context.config("data_path"),
+        context.config("vehicles_path"),
+        "parc_vp_communes.zip",
+    )
+    regions_path = "{}/{}/{}".format(
+        context.config("data_path"),
+        context.config("vehicles_path"),
+        "parc_vp_regions.zip",
+    )
 
     if not os.path.exists(municipalities_path):
-        raise RuntimeError("Municipalities vehicle data is not available at {}".format(municipalities_path))
-    
+        raise RuntimeError(
+            "Municipalities vehicle data is not available at {}".format(
+                municipalities_path
+            )
+        )
+
     if not os.path.exists(regions_path):
-        raise RuntimeError("Regions vehicle data is not available at {}".format(regions_path))
+        raise RuntimeError(
+            "Regions vehicle data is not available at {}".format(regions_path)
+        )
 
     return os.path.getsize(municipalities_path) + os.path.getsize(regions_path)
diff --git a/data/vehicles/types.py b/data/vehicles/types.py
index b10b8c65..f8ef6828 100644
--- a/data/vehicles/types.py
+++ b/data/vehicles/types.py
@@ -4,18 +4,28 @@
 This stage creates the various type of vehicles needed for the simulation with HBEFA emissions
 """
 
-HBEFA_TECH = ['petrol', 'diesel']
-HBEFA_EURO = ['1', '2', '3', '4', '5', '6ab', '6c', '6d']
+HBEFA_TECH = ["petrol", "diesel"]
+HBEFA_EURO = ["1", "2", "3", "4", "5", "6ab", "6c", "6d"]
+
 
 def configure(context):
     pass
 
+
 def execute(context):
 
     vehicle_types = [
         {
-            'type_id': 'default_car', 'nb_seats': 4, 'length': 5.0, 'width': 1.0, 'pce': 1.0, 'mode': "car",
-            'hbefa_cat': "PASSENGER_CAR", 'hbefa_tech': "average", 'hbefa_size': "average", 'hbefa_emission': "average",
+            "type_id": "default_car",
+            "nb_seats": 4,
+            "length": 5.0,
+            "width": 1.0,
+            "pce": 1.0,
+            "mode": "car",
+            "hbefa_cat": "PASSENGER_CAR",
+            "hbefa_tech": "average",
+            "hbefa_size": "average",
+            "hbefa_emission": "average",
         }
     ]
 
@@ -25,7 +35,7 @@ def execute(context):
 
             id = "car_%s_%s" % (technology, euro)
 
-            if technology == "diesel" and euro in ['2', '3']:
+            if technology == "diesel" and euro in ["2", "3"]:
                 euro += " (DPF)"
 
             size = ">=2L" if technology == "petrol" else "<1,4L"
@@ -35,10 +45,17 @@ def execute(context):
 
             emission = "PC %s Euro-%s" % (tech, euro)
 
-            vehicle_types.append({
-                'type_id': id, 'length': 7.5, 'width': 1.0,
-                'hbefa_cat': "PASSENGER_CAR", 'hbefa_tech': tech, 'hbefa_size': size, 'hbefa_emission': emission,
-            })
+            vehicle_types.append(
+                {
+                    "type_id": id,
+                    "length": 7.5,
+                    "width": 1.0,
+                    "hbefa_cat": "PASSENGER_CAR",
+                    "hbefa_tech": tech,
+                    "hbefa_size": size,
+                    "hbefa_emission": emission,
+                }
+            )
 
     df_types = pd.DataFrame.from_records(vehicle_types)
-    return df_types
\ No newline at end of file
+    return df_types
diff --git a/docs/verify_data.py b/docs/verify_data.py
index f657dbff..777a6482 100644
--- a/docs/verify_data.py
+++ b/docs/verify_data.py
@@ -12,8 +12,8 @@
             "https://www.insee.fr/fr/statistiques/6544333",
             "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZA_csv.zip",
             "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZD_csv.zip",
-            "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZE_csv.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZE_csv.zip",
+        ],
     },
     {
         "name": "OD Matrices 2019",
@@ -21,30 +21,30 @@
             "https://www.insee.fr/fr/statistiques/6456056",
             "https://www.insee.fr/fr/statistiques/6456052",
             "https://www.insee.fr/fr/statistiques/fichier/6456056/RP2019_mobpro_csv.zip",
-            "https://www.insee.fr/fr/statistiques/fichier/6456052/RP2019_mobsco_csv.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/6456052/RP2019_mobsco_csv.zip",
+        ],
     },
     {
         "name": "Population totals 2019",
         "urls": [
             "https://www.insee.fr/fr/statistiques/6543200",
-            "https://www.insee.fr/fr/statistiques/fichier/6543200/base-ic-evol-struct-pop-2019_csv.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/6543200/base-ic-evol-struct-pop-2019_csv.zip",
+        ],
     },
     {
         "name": "Filosofi 2019",
         "urls": [
             "https://www.insee.fr/fr/statistiques/6036907",
             "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-COMMUNES_csv.zip",
-            "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-SUPRA_csv.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-SUPRA_csv.zip",
+        ],
     },
     {
         "name": "BPE 2021",
         "urls": [
             "https://www.insee.fr/fr/statistiques/3568638",
-            "https://www.insee.fr/fr/statistiques/fichier/3568638/bpe21_ensemble_xy_csv.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/3568638/bpe21_ensemble_xy_csv.zip",
+        ],
     },
     {
         "name": "ENTD 2008",
@@ -55,35 +55,32 @@
             "https://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/Q_menage.csv",
             "https://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/Q_individu.csv",
             "https://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/Q_ind_lieu_teg.csv",
-            "https://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/K_deploc.csv"
-        ]
+            "https://www.statistiques.developpement-durable.gouv.fr/sites/default/files/2019-01/K_deploc.csv",
+        ],
     },
     {
         "name": "IRIS 2021",
         "urls": [
             "https://geoservices.ign.fr/contoursiris",
-            "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2021-01$CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z"
-        ]
+            "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2021-01$CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z",
+        ],
     },
     {
         "name": "Zoning 2021",
         "urls": [
             "https://www.insee.fr/fr/information/2017499",
-            "https://www.insee.fr/fr/statistiques/fichier/2017499/reference_IRIS_geo2021.zip"
-        ]
+            "https://www.insee.fr/fr/statistiques/fichier/2017499/reference_IRIS_geo2021.zip",
+        ],
     },
     {
         "name": "SIRENE",
         "urls": [
             "https://www.data.gouv.fr/fr/datasets/base-sirene-des-entreprises-et-de-leurs-etablissements-siren-siret/"
-        ]
+        ],
     },
-   
     {
         "name": "SIRET géolocalisé",
-        "urls": [
-            "https://adresse.data.gouv.fr/donnees-nationales"
-        ]
+        "urls": ["https://adresse.data.gouv.fr/donnees-nationales"],
     },
     # {
     #     "name": "BD-TOPO",
@@ -101,8 +98,8 @@
             "https://download.geofabrik.de/europe/france/ile-de-france-latest.osm.pbf",
             "https://download.geofabrik.de/europe/france/rhone-alpes-latest.osm.pbf",
             "https://download.geofabrik.de/europe/france/languedoc-roussillon-latest.osm.pbf",
-            "https://download.geofabrik.de/europe/france/midi-pyrenees-latest.osm.pbf"
-        ]
+            "https://download.geofabrik.de/europe/france/midi-pyrenees-latest.osm.pbf",
+        ],
     },
     {
         "name": "GTFS",
@@ -115,9 +112,9 @@
             "https://download.data.grandlyon.com/files/rdata/tcl_sytral.tcltheorique/GTFS_TCL.ZIP",
             "https://eu.ftp.opendatasoft.com/sncf/gtfs/export-ter-gtfs-last.zip",
             "https://eu.ftp.opendatasoft.com/sncf/gtfs/export-intercites-gtfs-last.zip",
-            "https://ressources.data.sncf.com/explore/dataset/horaires-des-train-voyages-tgvinouiouigo/files/538b55483fac4c1dad455022a0257014/download/"
-        ]
-    }
+            "https://ressources.data.sncf.com/explore/dataset/horaires-des-train-voyages-tgvinouiouigo/files/538b55483fac4c1dad455022a0257014/download/",
+        ],
+    },
 ]
 
 # Start testing process
@@ -125,7 +122,7 @@
 from urllib.request import urlopen
 
 any_errors = False
-sleep_time = 10 # s
+sleep_time = 10  # s
 
 for test in tests:
     print("Testing %s ..." % test["name"])
diff --git a/documentation/info/collect.py b/documentation/info/collect.py
index aca5c7f1..f0df1d72 100644
--- a/documentation/info/collect.py
+++ b/documentation/info/collect.py
@@ -1,6 +1,7 @@
 import numpy as np
 import json
 
+
 def configure(context):
     context.stage("data.hts.comparison")
     context.stage("data.census.cleaned")
@@ -12,6 +13,7 @@ def configure(context):
     context.stage("data.census.filtered")
     context.stage("data.sirene.localized")
 
+
 def execute(context):
     info = {}
 
@@ -26,12 +28,30 @@ def execute(context):
     info["census"] = {
         "number_of_households": len(df_census["household_id"].unique()),
         "number_of_persons": len(df_census),
-        "weighted_number_of_households": df_census[["household_id", "weight"]].drop_duplicates("household_id")["weight"].sum(),
+        "weighted_number_of_households": df_census[["household_id", "weight"]]
+        .drop_duplicates("household_id")["weight"]
+        .sum(),
         "weighted_number_of_persons": df_census["weight"].sum(),
-        "share_of_households_without_iris": np.sum(df_households[~(df_households["iris_id"] != "undefined") & (df_households["commune_id"] != "undefined")]["weight"]) / np.sum(df_households["weight"]),
-        "share_of_households_without_commune": np.sum(df_households[~(df_households["iris_id"] != "undefined") & ~(df_households["commune_id"] != "undefined")]["weight"]) / np.sum(df_households["weight"]),
-        "filtered_households_share": context.get_info("data.census.filtered", "filtered_households_share"),
-        "filtered_persons_share": context.get_info("data.census.filtered", "filtered_persons_share"),
+        "share_of_households_without_iris": np.sum(
+            df_households[
+                ~(df_households["iris_id"] != "undefined")
+                & (df_households["commune_id"] != "undefined")
+            ]["weight"]
+        )
+        / np.sum(df_households["weight"]),
+        "share_of_households_without_commune": np.sum(
+            df_households[
+                ~(df_households["iris_id"] != "undefined")
+                & ~(df_households["commune_id"] != "undefined")
+            ]["weight"]
+        )
+        / np.sum(df_households["weight"]),
+        "filtered_households_share": context.get_info(
+            "data.census.filtered", "filtered_households_share"
+        ),
+        "filtered_persons_share": context.get_info(
+            "data.census.filtered", "filtered_persons_share"
+        ),
     }
 
     # OD data
@@ -39,7 +59,7 @@ def execute(context):
 
     info["od"] = {
         "number_of_work_commutes": len(df_od_work),
-        "number_of_education_commutes": len(df_od_education)
+        "number_of_education_commutes": len(df_od_education),
     }
 
     # BPE
@@ -48,8 +68,12 @@ def execute(context):
     info["bpe"] = {
         "number_of_enterprises": len(df_bpe),
         "number_of_shop_enterprises": int(np.sum(df_bpe["activity_type"] == "shop")),
-        "number_of_leisure_enterprises": int(np.sum(df_bpe["activity_type"] == "leisure")),
-        "number_of_education_enterprises": int(np.sum(df_bpe["activity_type"] == "education")),
+        "number_of_leisure_enterprises": int(
+            np.sum(df_bpe["activity_type"] == "leisure")
+        ),
+        "number_of_education_enterprises": int(
+            np.sum(df_bpe["activity_type"] == "education")
+        ),
         "number_of_other_enterprises": int(np.sum(df_bpe["activity_type"] == "other")),
     }
 
@@ -58,28 +82,37 @@ def execute(context):
 
     info["zones"] = {
         "number_of_municipalities": len(df_codes["commune_id"].unique()),
-        "number_of_iris": len(df_codes["iris_id"].unique())
+        "number_of_iris": len(df_codes["iris_id"].unique()),
     }
 
     with open("%s/zones.json" % context.cache_path, "w+") as f:
-        json.dump(info, f, indent = True)
+        json.dump(info, f, indent=True)
 
     # Income
     df_income_municipality = context.stage("data.income.municipality")
-    df_income_municipality = df_income_municipality[(df_income_municipality["attribute"] == "all") & (df_income_municipality["value"] == "all")]
+    df_income_municipality = df_income_municipality[
+        (df_income_municipality["attribute"] == "all")
+        & (df_income_municipality["value"] == "all")
+    ]
     df_income_region = context.stage("data.income.region")
 
     info["income"] = {
         "minimum_median": int(df_income_municipality["q5"].min()),
         "maximum_median": int(df_income_municipality["q5"].max()),
         "median_region": int(df_income_region[4]),
-        "number_of_incomplete_distributions": int(np.sum(~df_income_municipality["is_missing"] & df_income_municipality["is_imputed"])),
-        "number_of_missing_distributions": int(np.sum(df_income_municipality["is_missing"]))
+        "number_of_incomplete_distributions": int(
+            np.sum(
+                ~df_income_municipality["is_missing"]
+                & df_income_municipality["is_imputed"]
+            )
+        ),
+        "number_of_missing_distributions": int(
+            np.sum(df_income_municipality["is_missing"])
+        ),
     }
 
-
     # Output
     with open("%s/info.json" % context.cache_path, "w+") as f:
-        json.dump(info, f, indent = True)
+        json.dump(info, f, indent=True)
 
     return info
diff --git a/documentation/info/tex.py b/documentation/info/tex.py
index 1e42fc00..b0730bae 100644
--- a/documentation/info/tex.py
+++ b/documentation/info/tex.py
@@ -1,29 +1,54 @@
 import numpy as np
 
+
 def configure(context):
     context.stage("documentation.info.collect")
 
+
 def execute(context):
     info = context.stage("documentation.info.collect")
 
     variables = {
-        "infoBpeNumberOfEnterprises": "{:,d}".format(info["bpe"]["number_of_enterprises"]),
-        "infoBpeNumberOfEducationEnterprises": "{:,d}".format(info["bpe"]["number_of_education_enterprises"]),
-        "infoBpeNumberOfShopEnterprises": "{:,d}".format(info["bpe"]["number_of_shop_enterprises"]),
-        "infoBpeNumberOfLeisureEnterprises": "{:,d}".format(info["bpe"]["number_of_leisure_enterprises"]),
-        "infoBpeNumberOfOtherEnterprises": "{:,d}".format(info["bpe"]["number_of_other_enterprises"]),
-
-        "infoZonesNumberOfMunicipalities": "{:,d}".format(info["zones"]["number_of_municipalities"]),
+        "infoBpeNumberOfEnterprises": "{:,d}".format(
+            info["bpe"]["number_of_enterprises"]
+        ),
+        "infoBpeNumberOfEducationEnterprises": "{:,d}".format(
+            info["bpe"]["number_of_education_enterprises"]
+        ),
+        "infoBpeNumberOfShopEnterprises": "{:,d}".format(
+            info["bpe"]["number_of_shop_enterprises"]
+        ),
+        "infoBpeNumberOfLeisureEnterprises": "{:,d}".format(
+            info["bpe"]["number_of_leisure_enterprises"]
+        ),
+        "infoBpeNumberOfOtherEnterprises": "{:,d}".format(
+            info["bpe"]["number_of_other_enterprises"]
+        ),
+        "infoZonesNumberOfMunicipalities": "{:,d}".format(
+            info["zones"]["number_of_municipalities"]
+        ),
         "infoZonesNumberOfIris": "{:,d}".format(info["zones"]["number_of_iris"]),
-
-        "infoIncomeMinimumMedian": "{:,.0f}".format(1e3 * np.round(info["income"]["minimum_median"] * 1e-3)),
-        "infoIncomeMaximumMedian": "{:,.0f}".format(1e3 * np.round(info["income"]["maximum_median"] * 1e-3)),
-        "infoIncomeMedianRegion": "{:,.0f}".format(1e3 * np.round(info["income"]["median_region"] * 1e-3)),
-        "infoIncomeNumberOfIncompleteDistributions": "{:,d}".format(info["income"]["number_of_incomplete_distributions"]),
-        "infoIncomeNumberOfMissingDistributions": "{:,d}".format(info["income"]["number_of_missing_distributions"]),
-
-        "infoCensusFilteredHouseholds": "{:.2f}\\%".format(1e2 * info["census"]["filtered_households_share"]),
-        "infoCensusFilteredPersons": "{:.2f}\\%".format(1e2 * info["census"]["filtered_persons_share"])
+        "infoIncomeMinimumMedian": "{:,.0f}".format(
+            1e3 * np.round(info["income"]["minimum_median"] * 1e-3)
+        ),
+        "infoIncomeMaximumMedian": "{:,.0f}".format(
+            1e3 * np.round(info["income"]["maximum_median"] * 1e-3)
+        ),
+        "infoIncomeMedianRegion": "{:,.0f}".format(
+            1e3 * np.round(info["income"]["median_region"] * 1e-3)
+        ),
+        "infoIncomeNumberOfIncompleteDistributions": "{:,d}".format(
+            info["income"]["number_of_incomplete_distributions"]
+        ),
+        "infoIncomeNumberOfMissingDistributions": "{:,d}".format(
+            info["income"]["number_of_missing_distributions"]
+        ),
+        "infoCensusFilteredHouseholds": "{:.2f}\\%".format(
+            1e2 * info["census"]["filtered_households_share"]
+        ),
+        "infoCensusFilteredPersons": "{:.2f}\\%".format(
+            1e2 * info["census"]["filtered_persons_share"]
+        ),
     }
 
     latex = []
diff --git a/documentation/meta_output.py b/documentation/meta_output.py
index e21bfbf6..2937e29c 100644
--- a/documentation/meta_output.py
+++ b/documentation/meta_output.py
@@ -1,6 +1,7 @@
 import os, datetime, json
 import subprocess as sp
 
+
 def configure(context):
     context.stage("matsim.runtime.git")
     context.config("output_path")
@@ -9,6 +10,7 @@ def configure(context):
     for option in ("sampling_rate", "hts", "random_seed"):
         context.config(option)
 
+
 def get_version():
     version_path = os.path.dirname(os.path.realpath(__file__))
     version_path = os.path.realpath("{}/../VERSION".format(version_path))
@@ -16,28 +18,39 @@ def get_version():
     with open(version_path) as f:
         return f.read().strip()
 
+
 def get_commit():
     root_path = os.path.dirname(os.path.realpath(__file__))
     root_path = os.path.realpath("{}/..".format(root_path))
 
     try:
-        return sp.check_output(["git", "rev-parse", "HEAD"], cwd = root_path).strip().decode("utf-8")
+        return (
+            sp.check_output(["git", "rev-parse", "HEAD"], cwd=root_path)
+            .strip()
+            .decode("utf-8")
+        )
     except sp.CalledProcessError:
         return "unknown"
 
+
 def execute(context):
     # Write meta information
     information = dict(
-        sampling_rate = context.config("sampling_rate"),
-        hts = context.config("hts"),
-        random_seed = context.config("random_seed"),
-        created = datetime.datetime.now(datetime.timezone.utc).isoformat(),
-        version = get_version(),
-        commit = get_commit()
+        sampling_rate=context.config("sampling_rate"),
+        hts=context.config("hts"),
+        random_seed=context.config("random_seed"),
+        created=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        version=get_version(),
+        commit=get_commit(),
     )
 
-    with open("%s/%smeta.json" % (context.config("output_path"), context.config("output_prefix")), "w+") as f:
-        json.dump(information, f, indent = 4)
+    with open(
+        "%s/%smeta.json"
+        % (context.config("output_path"), context.config("output_prefix")),
+        "w+",
+    ) as f:
+        json.dump(information, f, indent=4)
+
 
 def validate(context):
     return get_version()
diff --git a/documentation/paper.py b/documentation/paper.py
index 1f0a783f..fb630c74 100644
--- a/documentation/paper.py
+++ b/documentation/paper.py
@@ -1,8 +1,9 @@
 import shutil
 
+
 def configure(context):
     context.stage("documentation.plots.data.hts_comparison")
-    #context.stage("documentation.plots.theory.sampling_error")
+    # context.stage("documentation.plots.theory.sampling_error")
     context.stage("documentation.plots.monte_carlo")
     context.stage("documentation.plots.income")
 
@@ -20,38 +21,73 @@ def configure(context):
 
     context.config("paper_path")
 
+
 def execute(context):
     paper_path = context.config("paper_path")
 
     # Copy plots and tables
     mapping = {
-        "hts_comparison_distance.pdf": ("documentation.plots.data.hts_comparison", "distance_distribution.pdf"),
-        "hts_comparison_age.pdf": ("documentation.plots.data.hts_comparison", "age_distribution.pdf"),
-
-        #"theory_sampling_error.pdf": ("documentation.plots.theory.sampling_error", "sampling_error.pdf"),
-
-        #"sampling_sample_count.pdf": ("documentation.plots.sampling.sample_count", "sample_count.pdf"),
-        #"sampling_error_probability.pdf": ("documentation.plots.sampling.error_probability", "error_probability.pdf"),
+        "hts_comparison_distance.pdf": (
+            "documentation.plots.data.hts_comparison",
+            "distance_distribution.pdf",
+        ),
+        "hts_comparison_age.pdf": (
+            "documentation.plots.data.hts_comparison",
+            "age_distribution.pdf",
+        ),
+        # "theory_sampling_error.pdf": ("documentation.plots.theory.sampling_error", "sampling_error.pdf"),
+        # "sampling_sample_count.pdf": ("documentation.plots.sampling.sample_count", "sample_count.pdf"),
+        # "sampling_error_probability.pdf": ("documentation.plots.sampling.error_probability", "error_probability.pdf"),
         "monte_carlo.pdf": ("documentation.plots.monte_carlo", "monte_carlo.pdf"),
-        "monte_carlo_table.tex": ("documentation.plots.monte_carlo", "monte_carlo_table.tex"),
-
-        "income_distributions.pdf": ("documentation.plots.income", "income_distributions.pdf"),
-
-        "socdem_comparison_persons.pdf": ("documentation.plots.sociodemographics.general", "person.pdf"),
-        "socdem_comparison_households.pdf": ("documentation.plots.sociodemographics.general", "household.pdf"),
-        "socdem_spatial_comparison.pdf": ("documentation.plots.sociodemographics.local", "comparison.pdf"),
-        "activity_chain_comparison.pdf": ("documentation.plots.sociodemographics.chains", "activity_chains.pdf"),
-
-        "commute_flow_bars.pdf": ("documentation.plots.commute_flow", "commute_flows.pdf"),
-        "commute_flow_boxplot.pdf": ("documentation.plots.commute_flow", "commute_flow_boxplot.pdf"),
-        "commute_distance_cdf.pdf": ("documentation.plots.commute_distance", "commute_distance_cdf.pdf"),
-
-        "secloc_distributions.pdf": ("documentation.plots.secondary_locations", "input_distributions.pdf"),
-        "secloc_output.pdf": ("documentation.plots.secondary_locations", "distance_distributions.pdf"),
-
+        "monte_carlo_table.tex": (
+            "documentation.plots.monte_carlo",
+            "monte_carlo_table.tex",
+        ),
+        "income_distributions.pdf": (
+            "documentation.plots.income",
+            "income_distributions.pdf",
+        ),
+        "socdem_comparison_persons.pdf": (
+            "documentation.plots.sociodemographics.general",
+            "person.pdf",
+        ),
+        "socdem_comparison_households.pdf": (
+            "documentation.plots.sociodemographics.general",
+            "household.pdf",
+        ),
+        "socdem_spatial_comparison.pdf": (
+            "documentation.plots.sociodemographics.local",
+            "comparison.pdf",
+        ),
+        "activity_chain_comparison.pdf": (
+            "documentation.plots.sociodemographics.chains",
+            "activity_chains.pdf",
+        ),
+        "commute_flow_bars.pdf": (
+            "documentation.plots.commute_flow",
+            "commute_flows.pdf",
+        ),
+        "commute_flow_boxplot.pdf": (
+            "documentation.plots.commute_flow",
+            "commute_flow_boxplot.pdf",
+        ),
+        "commute_distance_cdf.pdf": (
+            "documentation.plots.commute_distance",
+            "commute_distance_cdf.pdf",
+        ),
+        "secloc_distributions.pdf": (
+            "documentation.plots.secondary_locations",
+            "input_distributions.pdf",
+        ),
+        "secloc_output.pdf": (
+            "documentation.plots.secondary_locations",
+            "distance_distributions.pdf",
+        ),
         "income.geojson": ("documentation.shapes", "income.geojson"),
         "info.tex": ("documentation.info.tex", "info.tex"),
     }
 
     for target, (stage, path) in mapping.items():
-        shutil.copy("%s/%s" % (context.path(stage), path), "%s/%s" % (paper_path, target))
+        shutil.copy(
+            "%s/%s" % (context.path(stage), path), "%s/%s" % (paper_path, target)
+        )
diff --git a/documentation/plots/commute_distance.py b/documentation/plots/commute_distance.py
index 7d24fb3b..a11086de 100644
--- a/documentation/plots/commute_distance.py
+++ b/documentation/plots/commute_distance.py
@@ -7,12 +7,18 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
-    context.stage("analysis.reference.hts.commute_distance", alias = "hts")
-    context.stage("analysis.synthesis.commute_distance", dict(sampling_rate = SAMPLING_RATE), alias = "data")
-    context.stage("analysis.reference.od.commute_distance", alias = "census")
+    context.stage("analysis.reference.hts.commute_distance", alias="hts")
+    context.stage(
+        "analysis.synthesis.commute_distance",
+        dict(sampling_rate=SAMPLING_RATE),
+        alias="data",
+    )
+    context.stage("analysis.reference.od.commute_distance", alias="census")
     context.config("hts")
 
+
 def execute(context):
     plotting.setup()
 
@@ -21,32 +27,57 @@ def execute(context):
     census_data = context.stage("census")
     hts_name = context.config("hts")
 
-    plt.figure(figsize = plotting.SHORT_FIGSIZE)
+    plt.figure(figsize=plotting.SHORT_FIGSIZE)
 
     parts = [
-        { "slot": "work", "linestyle": "-", "title": "Work" },
-        { "slot": "education", "linestyle": "--", "title": "Educ." }
+        {"slot": "work", "linestyle": "-", "title": "Work"},
+        {"slot": "education", "linestyle": "--", "title": "Educ."},
     ]
 
     for part in parts:
         slot = part["slot"]
 
-        #plt.plot(census_data[slot]["centroid_distance"] * 1e-3, census_data[slot]["cdf"], color = plotting.COLORS["census"], linestyle = part["linestyle"], linewidth = 1.0)
-
-        plt.plot(data[slot]["mean"], data[slot]["cdf"], color = "k", linestyle = part["linestyle"], linewidth = 1.0)
-        plt.fill_betweenx(data[slot]["cdf"], data[slot]["min"], data[slot]["max"], color = "k", linewidth = 0.0, alpha = 0.25)
-
-        plt.plot(hts_data[slot]["euclidean_distance"] * 1e-3, hts_data[slot]["cdf"], color = plotting.COLORS[hts_name], linestyle = part["linestyle"], linewidth = 1.0)
-
-        plt.plot([np.nan], color = "k", linewidth = 1.0, linestyle = part["linestyle"], label = part["title"])
-
-    plt.plot([np.nan], color = "k", linewidth = 1.0, label = "Synthetic")
-    plt.plot([np.nan], color = plotting.COLORS[hts_name], linewidth = 1.0, label = "HTS")
+        # plt.plot(census_data[slot]["centroid_distance"] * 1e-3, census_data[slot]["cdf"], color = plotting.COLORS["census"], linestyle = part["linestyle"], linewidth = 1.0)
+
+        plt.plot(
+            data[slot]["mean"],
+            data[slot]["cdf"],
+            color="k",
+            linestyle=part["linestyle"],
+            linewidth=1.0,
+        )
+        plt.fill_betweenx(
+            data[slot]["cdf"],
+            data[slot]["min"],
+            data[slot]["max"],
+            color="k",
+            linewidth=0.0,
+            alpha=0.25,
+        )
+
+        plt.plot(
+            hts_data[slot]["euclidean_distance"] * 1e-3,
+            hts_data[slot]["cdf"],
+            color=plotting.COLORS[hts_name],
+            linestyle=part["linestyle"],
+            linewidth=1.0,
+        )
+
+        plt.plot(
+            [np.nan],
+            color="k",
+            linewidth=1.0,
+            linestyle=part["linestyle"],
+            label=part["title"],
+        )
+
+    plt.plot([np.nan], color="k", linewidth=1.0, label="Synthetic")
+    plt.plot([np.nan], color=plotting.COLORS[hts_name], linewidth=1.0, label="HTS")
 
     plt.xlim([0, 40])
     plt.ylim([0, 1])
 
-    plt.legend(loc = "best", ncol = 2)
+    plt.legend(loc="best", ncol=2)
 
     plt.grid()
     plt.gca().set_axisbelow(True)
diff --git a/documentation/plots/commute_flow.py b/documentation/plots/commute_flow.py
index 2203b3f5..e3fe47f5 100644
--- a/documentation/plots/commute_flow.py
+++ b/documentation/plots/commute_flow.py
@@ -7,12 +7,18 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
     context.config("hts")
 
-    context.stage("analysis.reference.od.commute_flow", alias = "census")
-    context.stage("analysis.reference.hts.commute_flow", alias = "hts")
-    context.stage("analysis.synthesis.commute_flow", dict(sampling_rate = SAMPLING_RATE), alias = "data")
+    context.stage("analysis.reference.od.commute_flow", alias="census")
+    context.stage("analysis.reference.hts.commute_flow", alias="hts")
+    context.stage(
+        "analysis.synthesis.commute_flow",
+        dict(sampling_rate=SAMPLING_RATE),
+        alias="data",
+    )
+
 
 def execute(context):
     plotting.setup()
@@ -22,11 +28,11 @@ def execute(context):
     df_hts, df_correction = context.stage("hts")
 
     # PLOT: Work / education flows
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
 
     figures = [
-        { "slot": "work", "title": "Work", "top": 12 },
-        { "slot": "education", "title": "Education", "top": 12, "factor": 0.7 }
+        {"slot": "work", "title": "Work", "top": 12},
+        {"slot": "education", "title": "Education", "top": 12, "factor": 0.7},
     ]
 
     for index, figure in enumerate(figures):
@@ -34,35 +40,77 @@ def execute(context):
         slot = figure["slot"]
 
         df = context.stage("data")[slot]
-        df = pd.merge(df, df_census[slot].rename(columns = { "weight": "reference" }), on = ["home", slot])
-        df = pd.merge(df, df_correction[slot], on = "home")
-        df["scaled_reference"] = df["reference"] * (figure["factor"] if "factor" in figure else df["factor"])
+        df = pd.merge(
+            df,
+            df_census[slot].rename(columns={"weight": "reference"}),
+            on=["home", slot],
+        )
+        df = pd.merge(df, df_correction[slot], on="home")
+        df["scaled_reference"] = df["reference"] * (
+            figure["factor"] if "factor" in figure else df["factor"]
+        )
 
         count = figure["top"]
-        df = df.sort_values(by = "scaled_reference", ascending = False).head(count)
-
-        plt.bar(np.arange(count), df["reference"], width = 0.4, align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["census"], alpha = 0.25)
-        plt.bar(np.arange(count), df["scaled_reference"], width = 0.4, label = "Census", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["census"])
-        plt.bar(np.arange(count) + 0.4, df["mean"] / SAMPLING_RATE, width = 0.4, label = "Synthetic", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["synthetic"])
+        df = df.sort_values(by="scaled_reference", ascending=False).head(count)
+
+        plt.bar(
+            np.arange(count),
+            df["reference"],
+            width=0.4,
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["census"],
+            alpha=0.25,
+        )
+        plt.bar(
+            np.arange(count),
+            df["scaled_reference"],
+            width=0.4,
+            label="Census",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["census"],
+        )
+        plt.bar(
+            np.arange(count) + 0.4,
+            df["mean"] / SAMPLING_RATE,
+            width=0.4,
+            label="Synthetic",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["synthetic"],
+        )
 
         for index, (min, max) in enumerate(zip(df["min"].values, df["max"].values)):
             index += 0.4 + 0.2
-            plt.plot([index, index], [min / SAMPLING_RATE, max / SAMPLING_RATE], color = 'k', linewidth = 1.0)
+            plt.plot(
+                [index, index],
+                [min / SAMPLING_RATE, max / SAMPLING_RATE],
+                color="k",
+                linewidth=1.0,
+            )
 
         plt.grid()
         plt.gca().set_axisbelow(True)
-        plt.gca().xaxis.grid(alpha = 0.0)
+        plt.gca().xaxis.grid(alpha=0.0)
 
         plt.gca().yaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 1e5))
-        plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%d" % (x * 1e-3,)))
+        plt.gca().yaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "%d" % (x * 1e-3,))
+        )
 
         origins, destinations = df["home"].values, df[figure["slot"]].values
 
         plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(count) + 0.4))
-        plt.gca().xaxis.set_major_formatter(tck.FixedFormatter(["%s\n%s" % item for item in zip(origins, destinations)]))
+        plt.gca().xaxis.set_major_formatter(
+            tck.FixedFormatter(["%s\n%s" % item for item in zip(origins, destinations)])
+        )
 
         plt.ylabel("Commuters [x1000]")
-        plt.legend(loc = "best")
+        plt.legend(loc="best")
         plt.title(figure["title"])
 
     plt.tight_layout()
@@ -70,11 +118,17 @@ def execute(context):
     plt.close()
 
     # PLOT: Scatter
-    plt.figure(figsize = plotting.SHORT_FIGSIZE)
+    plt.figure(figsize=plotting.SHORT_FIGSIZE)
 
     parts = [
-        { "slot": "work", "title": "Work", "marker": ".", "color": "k" },
-        { "slot": "education", "title": "Education", "factor": 0.7, "marker": ".", "color": plotting.COLORS[hts_name] }
+        {"slot": "work", "title": "Work", "marker": ".", "color": "k"},
+        {
+            "slot": "education",
+            "title": "Education",
+            "factor": 0.7,
+            "marker": ".",
+            "color": plotting.COLORS[hts_name],
+        },
     ]
 
     minimum = np.inf
@@ -84,17 +138,33 @@ def execute(context):
         slot = part["slot"]
 
         df = context.stage("data")[slot]
-        df = pd.merge(df, df_census[slot].rename(columns = { "weight": "reference" }), on = ["home", slot])
-        df = pd.merge(df, df_correction[slot], on = "home")
-        df["scaled_reference"] = df["reference"] * (part["factor"] if "factor" in part else df["factor"])
-
-        plt.loglog(df["scaled_reference"], df["mean"] / SAMPLING_RATE, markersize = 2, marker = part["marker"], color = part["color"], linestyle = "none", label = part["title"])
+        df = pd.merge(
+            df,
+            df_census[slot].rename(columns={"weight": "reference"}),
+            on=["home", slot],
+        )
+        df = pd.merge(df, df_correction[slot], on="home")
+        df["scaled_reference"] = df["reference"] * (
+            part["factor"] if "factor" in part else df["factor"]
+        )
+
+        plt.loglog(
+            df["scaled_reference"],
+            df["mean"] / SAMPLING_RATE,
+            markersize=2,
+            marker=part["marker"],
+            color=part["color"],
+            linestyle="none",
+            label=part["title"],
+        )
 
         minimum = np.minimum(minimum, df["scaled_reference"].min() * 0.9)
         maximum = np.maximum(maximum, df["scaled_reference"].max() * 1.1)
 
     x = np.linspace(minimum, maximum, 100)
-    plt.fill_between(x, x * 0.8, x * 1.2, color = "k", alpha = 0.2, linewidth = 0.0, label = r"20% Error")
+    plt.fill_between(
+        x, x * 0.8, x * 1.2, color="k", alpha=0.2, linewidth=0.0, label=r"20% Error"
+    )
 
     plt.xlim([minimum, maximum])
     plt.ylim([minimum, maximum])
@@ -111,37 +181,60 @@ def execute(context):
     plt.close()
 
     # PLOT: Histogram
-    plt.figure(figsize = plotting.SHORT_FIGSIZE)
+    plt.figure(figsize=plotting.SHORT_FIGSIZE)
 
     parts = [
-        { "slot": "work", "title": "Work" },
-        { "slot": "education", "title": "Education", "factor": 0.7 }
+        {"slot": "work", "title": "Work"},
+        {"slot": "education", "title": "Education", "factor": 0.7},
     ]
 
     for index, part in enumerate(parts):
         slot = part["slot"]
 
         df = context.stage("data")[slot]
-        df = pd.merge(df, df_census[slot].rename(columns = { "weight": "reference" }), on = ["home", slot])
-        df = pd.merge(df, df_correction[slot], on = "home")
-        df["scaled_reference"] = df["reference"] * (part["factor"] if "factor" in part else df["factor"])
-
-        df["difference"] = 100 * (df["mean"] / SAMPLING_RATE - df["scaled_reference"]) / df["scaled_reference"]
+        df = pd.merge(
+            df,
+            df_census[slot].rename(columns={"weight": "reference"}),
+            on=["home", slot],
+        )
+        df = pd.merge(df, df_correction[slot], on="home")
+        df["scaled_reference"] = df["reference"] * (
+            part["factor"] if "factor" in part else df["factor"]
+        )
+
+        df["difference"] = (
+            100
+            * (df["mean"] / SAMPLING_RATE - df["scaled_reference"])
+            / df["scaled_reference"]
+        )
 
         min = df["difference"].min()
         max = df["difference"].max()
         mean = df["difference"].mean()
 
         values = df["difference"].values
-        outliers = values # values[(values < min) | (values > max)]
-
-        plt.plot([index - 0.2, index + 0.2], [min, min], color = "k", linewidth = 1.0)
-        plt.plot([index - 0.2, index + 0.2], [max, max], color = "k", linewidth = 1.0)
-        plt.plot([index - 0.2, index + 0.2], [mean, mean], color = "k", linewidth = 1.0, linestyle = ":")
-        plt.plot([index - 0.2, index - 0.2], [min, max], color = "k", linewidth = 1.0)
-        plt.plot([index + 0.2, index + 0.2], [min, max], color = "k", linewidth = 1.0)
-
-        plt.plot([index] * len(outliers), outliers, color = "k", marker = ".", markersize = 2, linestyle = "none")
+        outliers = values  # values[(values < min) | (values > max)]
+
+        plt.plot([index - 0.2, index + 0.2], [min, min], color="k", linewidth=1.0)
+        plt.plot([index - 0.2, index + 0.2], [max, max], color="k", linewidth=1.0)
+        plt.plot(
+            [index - 0.2, index + 0.2],
+            [mean, mean],
+            color="k",
+            linewidth=1.0,
+            linestyle=":",
+        )
+        plt.plot([index - 0.2, index - 0.2], [min, max], color="k", linewidth=1.0)
+        plt.plot([index + 0.2, index + 0.2], [min, max], color="k", linewidth=1.0)
+
+        plt.plot(
+            [index] * len(outliers),
+            outliers,
+            color="k",
+            marker=".",
+            markersize=2,
+            linestyle="none",
+        )
 
     plt.gca().xaxis.set_major_locator(tck.FixedLocator([0, 1]))
     plt.gca().xaxis.set_major_formatter(tck.FixedFormatter(["Work", "Education"]))
@@ -151,12 +244,14 @@ def execute(context):
     plt.xlim([-0.5, 1.5])
     plt.grid()
     plt.gca().set_axisbelow(True)
-    plt.gca().xaxis.grid(alpha = 0.0)
+    plt.gca().xaxis.grid(alpha=0.0)
 
-    plt.bar([np.nan], [np.nan], color = "none", edgecolor = "k", linewidth = 1.0, label = "5% - 95%")
-    plt.plot([np.nan], color = "k", linestyle = ":", label = "Mean")
+    plt.bar(
+        [np.nan], [np.nan], color="none", edgecolor="k", linewidth=1.0, label="5% - 95%"
+    )
+    plt.plot([np.nan], color="k", linestyle=":", label="Mean")
 
-    plt.legend(loc = "best")
+    plt.legend(loc="best")
 
     plt.tight_layout()
     plt.savefig("%s/commute_flow_boxplot.pdf" % context.path())
diff --git a/documentation/plots/data/hts_chains.py b/documentation/plots/data/hts_chains.py
index 3da51ecf..1b47c36b 100644
--- a/documentation/plots/data/hts_chains.py
+++ b/documentation/plots/data/hts_chains.py
@@ -5,46 +5,74 @@
 import matplotlib.ticker as tck
 import documentation.plotting as plotting
 
+
 def configure(context):
-    context.stage("analysis.reference.hts.chains", { "hts": "egt" }, alias = "egt")
-    context.stage("analysis.reference.hts.chains", { "hts": "entd" }, alias = "entd")
+    context.stage("analysis.reference.hts.chains", {"hts": "egt"}, alias="egt")
+    context.stage("analysis.reference.hts.chains", {"hts": "entd"}, alias="entd")
+
 
 def execute(context):
     plotting.setup()
 
     marginal = ("age_range", "sex", "chain")
-    df_egt = context.stage("egt")[marginal].rename(columns = { "weight": "egt" })
-    df_entd = context.stage("entd")[marginal].rename(columns = { "weight": "entd" })
+    df_egt = context.stage("egt")[marginal].rename(columns={"weight": "egt"})
+    df_entd = context.stage("entd")[marginal].rename(columns={"weight": "entd"})
 
-    df = pd.merge(df_egt, df_entd, on = ["age_range", "sex", "chain"])
+    df = pd.merge(df_egt, df_entd, on=["age_range", "sex", "chain"])
     df = df[df["age_range"]]
 
-    df_female = df[df["sex"] == "female"].sort_values(by = "egt", ascending = False).head(10)
-    df_male = df[df["sex"] == "male"].sort_values(by = "egt", ascending = False).head(10)
+    df_female = (
+        df[df["sex"] == "female"].sort_values(by="egt", ascending=False).head(10)
+    )
+    df_male = df[df["sex"] == "male"].sort_values(by="egt", ascending=False).head(10)
 
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
 
-    for index, (df, title) in enumerate(zip([df_male, df_female], ["Male (18-40)", "Female (18-40)"])):
+    for index, (df, title) in enumerate(
+        zip([df_male, df_female], ["Male (18-40)", "Female (18-40)"])
+    ):
         plt.subplot(1, 2, index + 1)
 
-        plt.bar(np.arange(10), df["egt"], width = 0.4, label = "EGT", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["egt"])
-        plt.bar(np.arange(10) + 0.4, df["entd"], width = 0.4, label = "ENTD", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["entd"])
+        plt.bar(
+            np.arange(10),
+            df["egt"],
+            width=0.4,
+            label="EGT",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["egt"],
+        )
+        plt.bar(
+            np.arange(10) + 0.4,
+            df["entd"],
+            width=0.4,
+            label="ENTD",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["entd"],
+        )
 
         plt.grid()
         plt.gca().set_axisbelow(True)
-        plt.gca().xaxis.grid(alpha = 0.0)
+        plt.gca().xaxis.grid(alpha=0.0)
 
         plt.gca().yaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 1e5))
-        plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%d" % (x * 1e-3,)))
+        plt.gca().yaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "%d" % (x * 1e-3,))
+        )
 
         plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(10) + 0.4))
-        plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "\n".join(df["chain"].values[p]).upper()))
+        plt.gca().xaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "\n".join(df["chain"].values[p]).upper())
+        )
 
         if index == 1:
             plt.gca().yaxis.set_major_formatter(tck.FixedFormatter([""] * 1000))
             plt.gca().yaxis.get_label().set_visible(False)
 
-        plt.legend(loc = "best", title = title)
+        plt.legend(loc="best", title=title)
 
         if index == 0:
             plt.ylabel("Number of persons [x1000]")
diff --git a/documentation/plots/data/hts_comparison.py b/documentation/plots/data/hts_comparison.py
index 4cb2d35e..c98186d0 100644
--- a/documentation/plots/data/hts_comparison.py
+++ b/documentation/plots/data/hts_comparison.py
@@ -6,9 +6,11 @@
 
 import documentation.plotting as plotting
 
+
 def configure(context):
     context.stage("data.hts.comparison")
 
+
 def execute(context):
     plotting.setup()
 
@@ -22,20 +24,37 @@ def execute(context):
 
     plt.figure()
 
-    plt.bar(df_distance[f_entd]["distance_class"].values, df_distance[f_entd]["trip_weight"].values / 1e6, width = 0.4, label = "ENTD (Routed)", align = "edge", color = plotting.COLORS["entd"], linewidth = 0.5, edgecolor = "white")
-    plt.bar(df_distance[f_egt]["distance_class"].values + 0.4, df_distance[f_egt]["trip_weight"].values / 1e6, width = 0.4, label = "EGT (Euclidean)", align = "edge", color = plotting.COLORS["egt"], linewidth = 0.5, edgecolor = "white")
+    plt.bar(
+        df_distance[f_entd]["distance_class"].values,
+        df_distance[f_entd]["trip_weight"].values / 1e6,
+        width=0.4,
+        label="ENTD (Routed)",
+        align="edge",
+        color=plotting.COLORS["entd"],
+        linewidth=0.5,
+        edgecolor="white",
+    )
+    plt.bar(
+        df_distance[f_egt]["distance_class"].values + 0.4,
+        df_distance[f_egt]["trip_weight"].values / 1e6,
+        width=0.4,
+        label="EGT (Euclidean)",
+        align="edge",
+        color=plotting.COLORS["egt"],
+        linewidth=0.5,
+        edgecolor="white",
+    )
 
     plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(0, 10, 2) + 0.4))
-    plt.gca().xaxis.set_major_formatter(tck.FixedFormatter(["<%dkm" % d for d in np.arange(1, 10, 2)]))
-
-    plt.gca().annotate(
-        r"≥10 km",
-        xy = (10.0, 8.0), xycoords = 'data', ha = "right"
+    plt.gca().xaxis.set_major_formatter(
+        tck.FixedFormatter(["<%dkm" % d for d in np.arange(1, 10, 2)])
     )
 
+    plt.gca().annotate(r"≥10 km", xy=(10.0, 8.0), xycoords="data", ha="right")
+
     plt.grid()
     plt.gca().set_axisbelow(True)
-    plt.gca().xaxis.grid(alpha = 0.0)
+    plt.gca().xaxis.grid(alpha=0.0)
 
     plt.xlabel("Trip distance")
     plt.ylabel("Number of trips [$10^6$]")
@@ -55,37 +74,70 @@ def execute(context):
 
     plt.figure()
 
-    plt.bar(df_age[f_census]["age_class"].values, df_age[f_census]["person_weight"].values / 1e6, width = 0.25, label = "Census", align = "edge", color = plotting.COLORS["census"], linewidth = 0.5, edgecolor = "white")
-    plt.bar(df_age[f_entd]["age_class"].values + 0.25, df_age[f_entd]["person_weight"].values / 1e6, width = 0.25, label = "ENTD", align = "edge", color = plotting.COLORS["entd"], linewidth = 0.5, edgecolor = "white")
-    plt.bar(df_age[f_egt]["age_class"].values + 0.5, df_age[f_egt]["person_weight"].values / 1e6, width = 0.25, label = "EGT", align = "edge", color = plotting.COLORS["egt"], linewidth = 0.5, edgecolor = "white")
+    plt.bar(
+        df_age[f_census]["age_class"].values,
+        df_age[f_census]["person_weight"].values / 1e6,
+        width=0.25,
+        label="Census",
+        align="edge",
+        color=plotting.COLORS["census"],
+        linewidth=0.5,
+        edgecolor="white",
+    )
+    plt.bar(
+        df_age[f_entd]["age_class"].values + 0.25,
+        df_age[f_entd]["person_weight"].values / 1e6,
+        width=0.25,
+        label="ENTD",
+        align="edge",
+        color=plotting.COLORS["entd"],
+        linewidth=0.5,
+        edgecolor="white",
+    )
+    plt.bar(
+        df_age[f_egt]["age_class"].values + 0.5,
+        df_age[f_egt]["person_weight"].values / 1e6,
+        width=0.25,
+        label="EGT",
+        align="edge",
+        color=plotting.COLORS["egt"],
+        linewidth=0.5,
+        edgecolor="white",
+    )
 
     plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(1000) + 0.75 / 2))
-    plt.gca().xaxis.set_major_formatter(tck.FixedFormatter(["%d0s" % d for d in np.arange(1, 10, 2)]))
+    plt.gca().xaxis.set_major_formatter(
+        tck.FixedFormatter(["%d0s" % d for d in np.arange(1, 10, 2)])
+    )
 
     AGE_BOUNDS = ["<15", "15-29", "30-44", "45-59", "60-74", ">75"]
     plt.gca().xaxis.set_major_formatter(tck.FixedFormatter(AGE_BOUNDS))
 
     plt.gca().annotate(
         "A",
-        xy = (1.5 + 0.5 * 0.25, 2.0), xycoords='data',
-        xytext = (1.5 + 0.5 * 0.25, 2.35), textcoords='data',
-        arrowprops = { "arrowstyle": "-|>", "facecolor": "black", "linewidth": 0.5 },
-        bbox = { "pad": 0.0, "linewidth": 0.0, "facecolor": (1.0, 0.0, 0.0, 0.0) },
-        ha = 'center'
+        xy=(1.5 + 0.5 * 0.25, 2.0),
+        xycoords="data",
+        xytext=(1.5 + 0.5 * 0.25, 2.35),
+        textcoords="data",
+        arrowprops={"arrowstyle": "-|>", "facecolor": "black", "linewidth": 0.5},
+        bbox={"pad": 0.0, "linewidth": 0.0, "facecolor": (1.0, 0.0, 0.0, 0.0)},
+        ha="center",
     )
 
     plt.gca().annotate(
         "B",
-        xy = (4.25 + 0.5 * 0.25, 1.3), xycoords='data',
-        xytext = (4.25 + 0.5 * 0.25, 1.65), textcoords='data',
-        arrowprops = { "arrowstyle": "-|>", "facecolor": "black", "linewidth": 0.5 },
-        bbox = { "pad": 0.0, "linewidth": 0.0, "facecolor": (1.0, 0.0, 0.0, 0.0) },
-        ha = 'center'
+        xy=(4.25 + 0.5 * 0.25, 1.3),
+        xycoords="data",
+        xytext=(4.25 + 0.5 * 0.25, 1.65),
+        textcoords="data",
+        arrowprops={"arrowstyle": "-|>", "facecolor": "black", "linewidth": 0.5},
+        bbox={"pad": 0.0, "linewidth": 0.0, "facecolor": (1.0, 0.0, 0.0, 0.0)},
+        ha="center",
     )
 
     plt.grid()
     plt.gca().set_axisbelow(True)
-    plt.gca().xaxis.grid(alpha = 0.0)
+    plt.gca().xaxis.grid(alpha=0.0)
 
     plt.xlabel("Age")
     plt.ylabel("Number of persons [x$10^6$]")
diff --git a/documentation/plots/income.py b/documentation/plots/income.py
index f24ae137..b5029c5b 100644
--- a/documentation/plots/income.py
+++ b/documentation/plots/income.py
@@ -8,18 +8,24 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
     context.stage("data.income.municipality")
 
-    context.stage("analysis.synthesis.income", dict(sampling_rate = SAMPLING_RATE), alias = "data")
+    context.stage(
+        "analysis.synthesis.income", dict(sampling_rate=SAMPLING_RATE), alias="data"
+    )
     context.stage("analysis.reference.income")
 
+
 def execute(context):
     plotting.setup()
 
     # Income imputation
     df_income = context.stage("data.income.municipality")
-    df_income = df_income[(df_income["attribute"] == "all") & (df_income["value"] == "all")]
+    df_income = df_income[
+        (df_income["attribute"] == "all") & (df_income["value"] == "all")
+    ]
     df_imputed = df_income[df_income["is_imputed"]]
 
     plt.figure()
@@ -29,8 +35,21 @@ def execute(context):
     plt.plot([minimum, maximum], [minimum, maximum], "k--")
 
     f = ~df_imputed["is_missing"]
-    plt.plot(df_imputed[f]["reference_median"] * 1e-3, df_imputed[f]["q5"] * 1e-3, '.', markersize = 3, color = plotting.COLORSET[0], label = "y")
-    plt.plot(df_imputed[~f]["reference_median"] * 1e-3, df_imputed[~f]["q5"] * 1e-3, 'x', markersize = 3, color = plotting.COLORSET[1])
+    plt.plot(
+        df_imputed[f]["reference_median"] * 1e-3,
+        df_imputed[f]["q5"] * 1e-3,
+        ".",
+        markersize=3,
+        color=plotting.COLORSET[0],
+        label="y",
+    )
+    plt.plot(
+        df_imputed[~f]["reference_median"] * 1e-3,
+        df_imputed[~f]["q5"] * 1e-3,
+        "x",
+        markersize=3,
+        color=plotting.COLORSET[1],
+    )
 
     plt.xlabel("Reference median income [1000 EUR]")
     plt.ylabel("Imputed median income [1000 EUR]")
@@ -47,23 +66,57 @@ def execute(context):
     df_reference = context.stage("analysis.reference.income")
 
     f = df_reference["source"] == "entd"
-    plt.plot(df_reference[f]["income"].values * 1e-3, df_reference[f]["cdf"].values, color = plotting.COLORS["entd"], label = "ENTD", linewidth = 1.0)
+    plt.plot(
+        df_reference[f]["income"].values * 1e-3,
+        df_reference[f]["cdf"].values,
+        color=plotting.COLORS["entd"],
+        label="ENTD",
+        linewidth=1.0,
+    )
 
     f = df_reference["source"] == "egt"
-    plt.plot(df_reference[f]["income"].values * 1e-3, df_reference[f]["cdf"].values, color = plotting.COLORS["egt"], label = "EGT", linewidth = 1.0)
+    plt.plot(
+        df_reference[f]["income"].values * 1e-3,
+        df_reference[f]["cdf"].values,
+        color=plotting.COLORS["egt"],
+        label="EGT",
+        linewidth=1.0,
+    )
 
     f = df_reference["source"] == "filo"
-    plt.plot(df_reference[f]["income"].values * 1e-3, df_reference[f]["cdf"].values, color = plotting.COLORS["census"], label = "Tax data", linewidth = 1.0, marker = ".", markersize = 3)
-
-    plt.plot(df_data["mean"].values * 1e-3, df_data["cdf"].values, color = "k", label = "Synthetic", linewidth = 1.0, linestyle = ":")
-    plt.fill_betweenx(df_data["cdf"].values, df_data["min"].values * 1e-3, df_data["max"].values * 1e-3, color = "k", linewidth = 0.0, alpha = 0.25)
+    plt.plot(
+        df_reference[f]["income"].values * 1e-3,
+        df_reference[f]["cdf"].values,
+        color=plotting.COLORS["census"],
+        label="Tax data",
+        linewidth=1.0,
+        marker=".",
+        markersize=3,
+    )
+
+    plt.plot(
+        df_data["mean"].values * 1e-3,
+        df_data["cdf"].values,
+        color="k",
+        label="Synthetic",
+        linewidth=1.0,
+        linestyle=":",
+    )
+    plt.fill_betweenx(
+        df_data["cdf"].values,
+        df_data["min"].values * 1e-3,
+        df_data["max"].values * 1e-3,
+        color="k",
+        linewidth=0.0,
+        alpha=0.25,
+    )
 
     plt.xlim([0, 60])
 
     plt.xlabel("Household income [1000 EUR]")
     plt.ylabel("Cumulative density")
 
-    plt.legend(loc = "lower right")
+    plt.legend(loc="lower right")
     plt.grid()
 
     plt.tight_layout()
diff --git a/documentation/plots/language.py b/documentation/plots/language.py
index 03131b97..3d1f9d43 100644
--- a/documentation/plots/language.py
+++ b/documentation/plots/language.py
@@ -1,5 +1,7 @@
-
 def get_source(source):
-    if source == "egt": return "EGT"
-    if source == "entd": return "ENTD"
-    if source == "census": return "Census"
+    if source == "egt":
+        return "EGT"
+    if source == "entd":
+        return "ENTD"
+    if source == "census":
+        return "Census"
diff --git a/documentation/plots/matching.py b/documentation/plots/matching.py
index d1777a8e..05ae8b00 100644
--- a/documentation/plots/matching.py
+++ b/documentation/plots/matching.py
@@ -7,19 +7,25 @@
 SAMPLING_RATE = 0.05
 POPULATION_SAMPLES = 200
 
+
 def configure(context):
-    context.stage("analysis.matching", {
-        "sampling_rate": SAMPLING_RATE,
-        "analysis_populations": POPULATION_SAMPLES,
-    }, alias = "data")
+    context.stage(
+        "analysis.matching",
+        {
+            "sampling_rate": SAMPLING_RATE,
+            "analysis_populations": POPULATION_SAMPLES,
+        },
+        alias="data",
+    )
+
 
 def execute(context):
     data = context.stage("data")
     variables = max(data.keys()) + 1
 
     means = [np.mean(data[v] / data[0]) for v in range(variables)]
-    #mins = [np.percentile(data[v] / data[0], 10) for v in range(variables)]
-    #maxs = [np.percentile(data[v] / data[0], 90) for v in range(variables)]
+    # mins = [np.percentile(data[v] / data[0], 10) for v in range(variables)]
+    # maxs = [np.percentile(data[v] / data[0], 90) for v in range(variables)]
 
     mins = [np.min(data[v] / data[0]) for v in range(variables)]
     maxs = [np.max(data[v] / data[0]) for v in range(variables)]
@@ -28,16 +34,27 @@ def execute(context):
     plotting.setup()
 
     plt.figure()
-    plt.bar(range(variables), means, color = plotting.COLORS["synthetic"])
+    plt.bar(range(variables), means, color=plotting.COLORS["synthetic"])
 
     for v, min, max in zip(range(variables), mins, maxs):
-        plt.plot([v, v,], [min, max], linewidth = 1, label = "90% Conf.", color = "k")
+        plt.plot(
+            [
+                v,
+                v,
+            ],
+            [min, max],
+            linewidth=1,
+            label="90% Conf.",
+            color="k",
+        )
 
     plt.xlabel("Variables")
     plt.ylabel("Matching rate")
 
     plt.gca().yaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 0.2))
-    plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%d%%" % (100 * x,)))
+    plt.gca().yaxis.set_major_formatter(
+        tck.FuncFormatter(lambda x, p: "%d%%" % (100 * x,))
+    )
 
     plt.tight_layout()
     plt.savefig("%s/matching_rate.pdf" % context.path())
diff --git a/documentation/plots/monte_carlo.py b/documentation/plots/monte_carlo.py
index 122f11e4..cd71c018 100644
--- a/documentation/plots/monte_carlo.py
+++ b/documentation/plots/monte_carlo.py
@@ -9,10 +9,12 @@
 from analysis.synthesis.statistics.monte_carlo import SAMPLING_RATES
 from analysis.synthesis.statistics.monte_carlo import ACQUISITION_SAMPLE_SIZE
 
+
 def configure(context):
     context.stage("analysis.reference.census.sociodemographics")
     context.stage("analysis.synthesis.statistics.monte_carlo")
 
+
 SELECTED_MARGINAL = ("age_class", "employed")
 SELECTED_VALUES = (3, True)
 
@@ -24,14 +26,13 @@ def configure(context):
     "studies",
 ]
 
-ADDITIONAL_VALUES = [
-    (3, True), (4, True), (5, True)
-]
+ADDITIONAL_VALUES = [(3, True), (4, True), (5, True)]
 
 from analysis.marginals import AGE_CLASS_LABELS
 
 ADDITIONAL_LABELS = AGE_CLASS_LABELS[3:6]
 
+
 def select(reference, data, marginal, values):
     df_marginal = data[marginal]
     df_reference = reference[marginal]
@@ -44,6 +45,7 @@ def select(reference, data, marginal, values):
 
     return df_marginal, reference_value
 
+
 def execute(context):
     data = context.stage("analysis.synthesis.statistics.monte_carlo")
 
@@ -55,13 +57,15 @@ def execute(context):
         values = np.sort(df_marginal[(marginal,)].drop_duplicates().values)
 
         for value in values:
-            row = { "marginal": marginal, "value": value }
+            row = {"marginal": marginal, "value": value}
 
             df_value = df_marginal[df_marginal[marginal] == value]
             df_value = df_value[df_value["samples"] == ACQUISITION_SAMPLE_SIZE]
 
             assert len(df_value) == len(SAMPLING_RATES)
-            probabilities = df_value.sort_values(by = ["sampling_rate", "samples"])["error_probability"].values[:,0]
+            probabilities = df_value.sort_values(by=["sampling_rate", "samples"])[
+                "error_probability"
+            ].values[:, 0]
 
             for sampling_rate, probability in zip(SAMPLING_RATES, probabilities):
                 row[sampling_rate] = probability
@@ -70,7 +74,7 @@ def execute(context):
 
     df_table = pd.DataFrame.from_records(df_table)
     df_table = create_table(df_table)
-    df_table.to_latex("%s/monte_carlo_table.tex" % context.path(), escape = False)
+    df_table.to_latex("%s/monte_carlo_table.tex" % context.path(), escape=False)
 
     # Prepare data for plotting
     reference = context.stage("analysis.reference.census.sociodemographics")["person"]
@@ -78,52 +82,100 @@ def execute(context):
     # Perform plotting
     plotting.setup()
 
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
 
     # ... subplot on nominal stratum values
     plt.subplot(1, 2, 1)
-    plt.title("(a) Monte Carlo analysis", fontsize = plotting.FONT_SIZE)
+    plt.title("(a) Monte Carlo analysis", fontsize=plotting.FONT_SIZE)
 
-    df_marginal, reference_value = select(reference, data, SELECTED_MARGINAL, SELECTED_VALUES)
+    df_marginal, reference_value = select(
+        reference, data, SELECTED_MARGINAL, SELECTED_VALUES
+    )
     assert len(df_marginal) == ACQUISITION_SAMPLE_SIZE * len(SAMPLING_RATES)
 
     display_sampling_rates = [0.001, 0.01, 0.05]
 
     for index, sampling_rate in enumerate([0.001, 0.01, 0.05]):
         df_rate = df_marginal[df_marginal["sampling_rate"] == sampling_rate]
-        df_rate = df_rate.sort_values(by = "samples")
-        plt.fill_between(df_rate["samples"], df_rate[("weight", "q5")], df_rate[("weight", "q95")], alpha = 0.25 + index * 0.2, color = plotting.COLORSET[0], linewidth = 0.0)
-
-    plt.plot([1, ACQUISITION_SAMPLE_SIZE], [reference_value] * 2, 'k--', label = "Ref. $y$", linewidth = 1.0)
-    plt.plot([1, ACQUISITION_SAMPLE_SIZE], [reference_value * 0.99] * 2, 'k:', label = "1% Err.", linewidth = 1.0)
-    plt.plot([1, ACQUISITION_SAMPLE_SIZE], [reference_value * 1.01] * 2, 'k:', linewidth = 1.0)
+        df_rate = df_rate.sort_values(by="samples")
+        plt.fill_between(
+            df_rate["samples"],
+            df_rate[("weight", "q5")],
+            df_rate[("weight", "q95")],
+            alpha=0.25 + index * 0.2,
+            color=plotting.COLORSET[0],
+            linewidth=0.0,
+        )
+
+    plt.plot(
+        [1, ACQUISITION_SAMPLE_SIZE],
+        [reference_value] * 2,
+        "k--",
+        label="Ref. $y$",
+        linewidth=1.0,
+    )
+    plt.plot(
+        [1, ACQUISITION_SAMPLE_SIZE],
+        [reference_value * 0.99] * 2,
+        "k:",
+        label="1% Err.",
+        linewidth=1.0,
+    )
+    plt.plot(
+        [1, ACQUISITION_SAMPLE_SIZE], [reference_value * 1.01] * 2, "k:", linewidth=1.0
+    )
 
     plt.xlabel("Sample size $N$")
     plt.ylabel("Stratum weight")
 
-    plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x, p: "%.2fM" % (x * 1e-6,)))
+    plt.gca().yaxis.set_major_formatter(
+        tck.FuncFormatter(lambda x, p: "%.2fM" % (x * 1e-6,))
+    )
 
     plt.grid()
     plt.gca().set_axisbelow(True)
     plt.xlim([1, ACQUISITION_SAMPLE_SIZE])
 
-    plt.fill_between([np.nan], [np.nan], [np.nan], color = plotting.COLORSET[0], alpha = 0.25, label = "90% Conf.")
-    plt.legend(loc = "lower center", ncol = 2)
+    plt.fill_between(
+        [np.nan],
+        [np.nan],
+        [np.nan],
+        color=plotting.COLORSET[0],
+        alpha=0.25,
+        label="90% Conf.",
+    )
+    plt.legend(loc="lower center", ncol=2)
 
     # ... subplot on nominal stratum values
     plt.subplot(1, 2, 2)
-    plt.title("(b) Error probability", fontsize = plotting.FONT_SIZE)
+    plt.title("(b) Error probability", fontsize=plotting.FONT_SIZE)
 
     for index, values in enumerate(ADDITIONAL_VALUES):
-        df_marginal, reference_value = select(reference, data, SELECTED_MARGINAL, values)
+        df_marginal, reference_value = select(
+            reference, data, SELECTED_MARGINAL, values
+        )
         assert len(df_marginal) == ACQUISITION_SAMPLE_SIZE * len(SAMPLING_RATES)
 
         df_max = df_marginal[df_marginal["samples"] == ACQUISITION_SAMPLE_SIZE]
-        df_max = df_max.sort_values(by = "sampling_rate")
-
-        plt.plot(100 * np.array(SAMPLING_RATES), df_max[("error_probability", "mean")], color = plotting.COLORSET[index], label = "Age %s" % ADDITIONAL_LABELS[index], marker = ".", markersize = 3.0, linewidth = 1.0)
-
-    plt.plot([0, 100 * max(SAMPLING_RATES)], [0.9] * 2, 'k:', label = "90% Prob.", linewidth = 1.0)
+        df_max = df_max.sort_values(by="sampling_rate")
+
+        plt.plot(
+            100 * np.array(SAMPLING_RATES),
+            df_max[("error_probability", "mean")],
+            color=plotting.COLORSET[index],
+            label="Age %s" % ADDITIONAL_LABELS[index],
+            marker=".",
+            markersize=3.0,
+            linewidth=1.0,
+        )
+
+    plt.plot(
+        [0, 100 * max(SAMPLING_RATES)],
+        [0.9] * 2,
+        "k:",
+        label="90% Prob.",
+        linewidth=1.0,
+    )
     plt.xlim([0, 100 * max(SAMPLING_RATES)])
     plt.ylim([0, 1.0])
 
@@ -133,14 +185,16 @@ def execute(context):
     plt.grid()
     plt.gca().set_axisbelow(True)
 
-    plt.legend(loc = "center", ncol = 1)
+    plt.legend(loc="center", ncol=1)
 
     plt.tight_layout()
     plt.savefig("%s/monte_carlo.pdf" % context.path())
     plt.close()
 
+
 import analysis.marginals
 
+
 def label_row(row):
     if row["marginal"] == "age_class":
         return analysis.marginals.AGE_CLASS_LABELS[row["value"]]
@@ -157,28 +211,36 @@ def label_row(row):
     elif row["marginal"] == "socioprofessional_class":
         return analysis.marginals.SOCIOPROFESIONAL_CLASS_LABELS[row["value"]]
 
+
 def bold_probability(x):
     if x >= 0.9:
         return "\\textbf{%.2f}" % x
     else:
         return "%.2f" % x
 
-def create_table(df_table):
-    df_table["value"] = df_table.apply(label_row, axis = 1, raw = False)
 
-    df_table["marginal"] = df_table["marginal"].map({
-        "age_class": "Age",
-        "sex": "Sex",
-        "employed": "Employed",
-        "studies": "Studies",
-        "socioprofessional_class": "Socioprof. Cat."
-    })
+def create_table(df_table):
+    df_table["value"] = df_table.apply(label_row, axis=1, raw=False)
+
+    df_table["marginal"] = df_table["marginal"].map(
+        {
+            "age_class": "Age",
+            "sex": "Sex",
+            "employed": "Employed",
+            "studies": "Studies",
+            "socioprofessional_class": "Socioprof. Cat.",
+        }
+    )
 
     for sampling_rate in SAMPLING_RATES:
         df_table[sampling_rate] = df_table[sampling_rate].apply(bold_probability)
 
-    df_table.columns = ["Variable", "Stratum"] + ["%.1f%%" % (100 * s,) for s in SAMPLING_RATES]
+    df_table.columns = ["Variable", "Stratum"] + [
+        "%.1f%%" % (100 * s,) for s in SAMPLING_RATES
+    ]
     df_table = df_table.set_index(["Variable", "Stratum"])
-    df_table.columns = pd.MultiIndex.from_tuples([("Sampling rate $s$", str(s)) for s in SAMPLING_RATES])
+    df_table.columns = pd.MultiIndex.from_tuples(
+        [("Sampling rate $s$", str(s)) for s in SAMPLING_RATES]
+    )
 
     return df_table
diff --git a/documentation/plots/secondary_locations.py b/documentation/plots/secondary_locations.py
index 296fb335..8b70da8d 100644
--- a/documentation/plots/secondary_locations.py
+++ b/documentation/plots/secondary_locations.py
@@ -3,6 +3,7 @@
 import matplotlib.ticker as tck
 import documentation.plotting as plotting
 
+
 def configure(context):
     context.stage("synthesis.population.spatial.secondary.distance_distributions")
 
@@ -11,17 +12,20 @@ def configure(context):
 
     context.config("hts")
 
+
 def execute(context):
     plotting.setup()
     hts_name = context.config("hts")
 
     # PLOT: Input distributions
-    distributions = context.stage("synthesis.population.spatial.secondary.distance_distributions")
+    distributions = context.stage(
+        "synthesis.population.spatial.secondary.distance_distributions"
+    )
 
     plt.figure()
 
     modes = list(context.stage("analysis.reference.hts.mode_distances").keys())
-    #modes = ["car", "car_passenger", "pt", "bike", "walk"]
+    # modes = ["car", "car_passenger", "pt", "bike", "walk"]
 
     for index, mode in enumerate(modes):
         mode_distribution = distributions[mode]
@@ -36,21 +40,40 @@ def execute(context):
             weights = distribution["weights"] / np.sum(distribution["weights"])
             means.append(np.sum(weights * distribution["values"]))
 
-            q10.append(distribution["values"][np.count_nonzero(distribution["cdf"] < 0.1)])
-            q90.append(distribution["values"][np.count_nonzero(distribution["cdf"] < 0.9)])
+            q10.append(
+                distribution["values"][np.count_nonzero(distribution["cdf"] < 0.1)]
+            )
+            q90.append(
+                distribution["values"][np.count_nonzero(distribution["cdf"] < 0.9)]
+            )
 
         if mode in ("car", "pt"):
-            plt.fill_between([0.0] + list(bounds), q10, q90, color = plotting.COLORSET5[index], alpha = 0.25, linewidth = 0.0)
-
-        plt.plot([0.0] + list(bounds), means, label = "%s (%d)" % (plotting.MODE_LABELS[mode], len(bounds)), linewidth = 1.0, marker = ".", markersize = 3, color = plotting.COLORSET5[index])
+            plt.fill_between(
+                [0.0] + list(bounds),
+                q10,
+                q90,
+                color=plotting.COLORSET5[index],
+                alpha=0.25,
+                linewidth=0.0,
+            )
+
+        plt.plot(
+            [0.0] + list(bounds),
+            means,
+            label="%s (%d)" % (plotting.MODE_LABELS[mode], len(bounds)),
+            linewidth=1.0,
+            marker=".",
+            markersize=3,
+            color=plotting.COLORSET5[index],
+        )
 
     plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 60 * 20))
-    plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: str(x // 60)))
+    plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x, p: str(x // 60)))
 
     plt.gca().yaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 5 * 1000))
-    plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: str(x // 1000)))
+    plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x, p: str(x // 1000)))
 
-    plt.legend(loc = "upper left")
+    plt.legend(loc="upper left")
     plt.xlim([0, 90 * 60 if hts_name == "egt" else 50 * 60])
     plt.ylim([0, 45 * 1000 if hts_name == "egt" else 25 * 1000])
 
@@ -67,29 +90,49 @@ def execute(context):
     df_synthetic = context.stage("analysis.synthesis.mode_distances")
     reference_data = context.stage("analysis.reference.hts.mode_distances")
 
-    plt.figure(figsize =  (6.0, 2.5), dpi = 100) # 2.5 * 2.5
+    plt.figure(figsize=(6.0, 2.5), dpi=100)  # 2.5 * 2.5
 
     limits = dict(
-        car = 20 * 1e3, car_passenger = 20 * 1e3, pt = 20 * 1e3,
-        bike = 6 * 1e3, walk = 1 * 1e3
+        car=20 * 1e3, car_passenger=20 * 1e3, pt=20 * 1e3, bike=6 * 1e3, walk=1 * 1e3
     )
 
-    modes = ["car", "bike" if "bike" in modes else "walk" ]
+    modes = ["car", "bike" if "bike" in modes else "walk"]
 
     for index, mode in enumerate(modes):
         plt.subplot(1, 2, index + 1)
 
         mode_reference = reference_data[mode]
-        plt.plot(mode_reference["values"] * 1e-3, mode_reference["cdf"], linestyle = '--', color = "k", linewidth = 1.0, label = "HTS")
+        plt.plot(
+            mode_reference["values"] * 1e-3,
+            mode_reference["cdf"],
+            linestyle="--",
+            color="k",
+            linewidth=1.0,
+            label="HTS",
+        )
 
         df_mode = df_synthetic[df_synthetic["mode"] == mode]
-        plt.fill_betweenx(df_mode["cdf"], df_mode["min"]* 1e-3, df_mode["max"] * 1e-3, linewidth = 0.0, color = plotting.COLORS[hts_name], alpha = 0.25, label = "Range")
-        plt.plot(df_mode["mean"] * 1e-3, df_mode["cdf"], color = plotting.COLORS[hts_name], linewidth = 1.0, label = "Synthetic")
+        plt.fill_betweenx(
+            df_mode["cdf"],
+            df_mode["min"] * 1e-3,
+            df_mode["max"] * 1e-3,
+            linewidth=0.0,
+            color=plotting.COLORS[hts_name],
+            alpha=0.25,
+            label="Range",
+        )
+        plt.plot(
+            df_mode["mean"] * 1e-3,
+            df_mode["cdf"],
+            color=plotting.COLORS[hts_name],
+            linewidth=1.0,
+            label="Synthetic",
+        )
 
         plt.xlim([0, limits[mode] * 1e-3])
         plt.ylim([0, 1])
 
-        plt.title(plotting.MODE_LABELS[mode], fontsize = plotting.FONT_SIZE)
+        plt.title(plotting.MODE_LABELS[mode], fontsize=plotting.FONT_SIZE)
         plt.xlabel("Euclidean distance [km]")
         plt.grid()
 
@@ -97,7 +140,7 @@ def execute(context):
             plt.ylabel("Cumulative density")
 
         if index % 2 == 1:
-            plt.legend(loc = "best")
+            plt.legend(loc="best")
 
     plt.tight_layout()
     plt.savefig("%s/distance_distributions.pdf" % context.path())
diff --git a/documentation/plots/sociodemographics/chains.py b/documentation/plots/sociodemographics/chains.py
index 6632e6de..fbed851c 100644
--- a/documentation/plots/sociodemographics/chains.py
+++ b/documentation/plots/sociodemographics/chains.py
@@ -7,16 +7,19 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
     context.stage("analysis.reference.hts.chains")
 
     context.stage(
         "analysis.synthesis.sociodemographics.chains",
-        dict(sampling_rate = SAMPLING_RATE), alias = "data"
+        dict(sampling_rate=SAMPLING_RATE),
+        alias="data",
     )
 
     context.config("hts")
 
+
 def execute(context):
     plotting.setup()
 
@@ -26,41 +29,76 @@ def execute(context):
     # PLOT: Activity chains by sex
 
     marginal = ("age_range", "sex", "chain")
-    df = pd.merge(data[marginal], reference[marginal].rename(columns = { "weight": "reference" }))
+    df = pd.merge(
+        data[marginal], reference[marginal].rename(columns={"weight": "reference"})
+    )
     df = df[df["age_range"]]
 
-    df_female = df[df["sex"] == "female"].sort_values(by = "reference", ascending = False).head(10)
-    df_male = df[df["sex"] == "male"].sort_values(by = "reference", ascending = False).head(10)
+    df_female = (
+        df[df["sex"] == "female"].sort_values(by="reference", ascending=False).head(10)
+    )
+    df_male = (
+        df[df["sex"] == "male"].sort_values(by="reference", ascending=False).head(10)
+    )
 
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
     hts_name = context.config("hts")
 
-    for index, (df, title) in enumerate(zip([df_male, df_female], ["Male (18-40)", "Female (18-40)"])):
+    for index, (df, title) in enumerate(
+        zip([df_male, df_female], ["Male (18-40)", "Female (18-40)"])
+    ):
         plt.subplot(1, 2, index + 1)
 
-        plt.bar(np.arange(10), df["reference"], width = 0.4, label = "HTS", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS[hts_name])
-        plt.bar(np.arange(10) + 0.4, df["mean"] / SAMPLING_RATE, width = 0.4, label = "Synthetic", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["synthetic"])
+        plt.bar(
+            np.arange(10),
+            df["reference"],
+            width=0.4,
+            label="HTS",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS[hts_name],
+        )
+        plt.bar(
+            np.arange(10) + 0.4,
+            df["mean"] / SAMPLING_RATE,
+            width=0.4,
+            label="Synthetic",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["synthetic"],
+        )
 
         for location, (min, max) in enumerate(zip(df["min"].values, df["max"].values)):
             location += 0.4 + 0.2
-            plt.plot([location, location], [min / SAMPLING_RATE, max / SAMPLING_RATE], "k", linewidth = 1)
+            plt.plot(
+                [location, location],
+                [min / SAMPLING_RATE, max / SAMPLING_RATE],
+                "k",
+                linewidth=1,
+            )
 
         plt.grid()
         plt.gca().set_axisbelow(True)
-        plt.gca().xaxis.grid(alpha = 0.0)
+        plt.gca().xaxis.grid(alpha=0.0)
 
         if hts_name == "egt":
             plt.ylim([0, 3.5e5])
         else:
             plt.ylim([0, 5e5])
 
-        plt.plot([np.nan], color = "k", linewidth = 1, label = "Range")
+        plt.plot([np.nan], color="k", linewidth=1, label="Range")
 
         plt.gca().yaxis.set_major_locator(tck.FixedLocator(np.arange(100) * 1e5))
-        plt.gca().yaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%d" % (x * 1e-3,)))
+        plt.gca().yaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "%d" % (x * 1e-3,))
+        )
 
         plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(10) + 0.4))
-        plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "\n".join(df["chain"].values[p]).upper()))
+        plt.gca().xaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "\n".join(df["chain"].values[p]).upper())
+        )
 
         if index == 1:
             plt.gca().yaxis.set_major_formatter(tck.FixedFormatter([""] * 1000))
@@ -69,7 +107,7 @@ def execute(context):
         handles, labels = plt.gca().get_legend_handles_labels()
         handles = [handles[-2], handles[-1], handles[-3]]
         labels = [labels[-2], labels[-1], labels[-3]]
-        plt.legend(handles = handles, labels = labels, loc = "best", title = title)
+        plt.legend(handles=handles, labels=labels, loc="best", title=title)
 
         if index == 0:
             plt.ylabel("Number of persons [x1000]")
diff --git a/documentation/plots/sociodemographics/general.py b/documentation/plots/sociodemographics/general.py
index 869f0604..005451ae 100644
--- a/documentation/plots/sociodemographics/general.py
+++ b/documentation/plots/sociodemographics/general.py
@@ -9,6 +9,7 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
     context.config("hts")
 
@@ -17,15 +18,18 @@ def configure(context):
 
     context.stage(
         "analysis.synthesis.sociodemographics.general",
-        dict(sampling_rate = SAMPLING_RATE), alias = "data"
+        dict(sampling_rate=SAMPLING_RATE),
+        alias="data",
     )
 
+
 def get_reference(level, marginal, census, hts):
     if (marginal,) in census[level]:
         return census[level][(marginal,)]
     else:
         return hts[level][(marginal,)]
 
+
 def prepare_reference(hts_marginals, census_marginals, level, marginal):
     if (marginal,) in census_marginals[level]:
         df = census_marginals[level][(marginal,)]
@@ -34,26 +38,34 @@ def prepare_reference(hts_marginals, census_marginals, level, marginal):
         df = hts_marginals[level][(marginal,)]
         df["reference_source"] = "hts"
 
-    df = df.copy().rename(columns = { marginal: "value", "weight": "reference" })
+    df = df.copy().rename(columns={marginal: "value", "weight": "reference"})
     df = df[["value", "reference", "reference_source"]]
-    df = df.sort_values(by = "value")
+    df = df.sort_values(by="value")
 
     return df
 
-def prepare_marginal(data_marginals, hts_marginals, census_marginals, level, marginal, sampling_rate):
-    df = data_marginals[level][(marginal,)].copy().rename(columns = { marginal: "value" })
+
+def prepare_marginal(
+    data_marginals, hts_marginals, census_marginals, level, marginal, sampling_rate
+):
+    df = data_marginals[level][(marginal,)].copy().rename(columns={marginal: "value"})
     df["attribute"] = marginal
     df = df[["attribute", "value", "mean", "min", "max"]]
-    df = df.sort_values(by = "value")
+    df = df.sort_values(by="value")
 
     df["mean"] /= sampling_rate
     df["min"] /= sampling_rate
     df["max"] /= sampling_rate
 
-    df = pd.merge(df, prepare_reference(hts_marginals, census_marginals, level, marginal), on = "value")
+    df = pd.merge(
+        df,
+        prepare_reference(hts_marginals, census_marginals, level, marginal),
+        on="value",
+    )
 
     return df
 
+
 def label(row):
     if row["attribute"] == "age_class":
         return "Age %s" % analysis.marginals.AGE_CLASS_LABELS[row["value"]]
@@ -77,22 +89,43 @@ def label(row):
         return "SC %s" % analysis.marginals.SOCIOPROFESIONAL_CLASS_LABELS[row["value"]]
 
     elif row["attribute"] == "household_size_class":
-        return "Household size %s" % analysis.marginals.HOUSEHOLD_SIZE_LABELS[row["value"]]
+        return (
+            "Household size %s" % analysis.marginals.HOUSEHOLD_SIZE_LABELS[row["value"]]
+        )
 
     elif row["attribute"] == "number_of_vehicles_class":
-        return "No. vehicles %s" % analysis.marginals.NUMBER_OF_VEHICLES_LABELS[row["value"]]
+        return (
+            "No. vehicles %s"
+            % analysis.marginals.NUMBER_OF_VEHICLES_LABELS[row["value"]]
+        )
 
     elif row["attribute"] == "number_of_bikes_class":
-        return "No. bicycles %s" % analysis.marginals.NUMBER_OF_BIKES_LABELS[row["value"]]
+        return (
+            "No. bicycles %s" % analysis.marginals.NUMBER_OF_BIKES_LABELS[row["value"]]
+        )
+
 
 def add_labels(df_figure):
-    df_figure["label"] = df_figure.apply(label, axis = 1, raw = False)
+    df_figure["label"] = df_figure.apply(label, axis=1, raw=False)
+
+
+def prepare_data(
+    data_marginals, hts_marginals, census_marginals, level, marginals, sampling_rate
+):
+    return pd.concat(
+        [
+            prepare_marginal(
+                data_marginals,
+                hts_marginals,
+                census_marginals,
+                level,
+                marginal,
+                sampling_rate,
+            )
+            for marginal in marginals
+        ]
+    )
 
-def prepare_data(data_marginals, hts_marginals, census_marginals, level, marginals, sampling_rate):
-    return pd.concat([
-        prepare_marginal(data_marginals, hts_marginals, census_marginals, level, marginal, sampling_rate)
-        for marginal in marginals
-    ])
 
 def reweight_hts(df_figure, hts_marginals, census_marginals, level):
     hts_total = hts_marginals[level][tuple()]["weight"].values[0]
@@ -101,6 +134,7 @@ def reweight_hts(df_figure, hts_marginals, census_marginals, level):
     f = df_figure["reference_source"] == "hts"
     df_figure.loc[f, "reference"] *= census_total / hts_total
 
+
 def execute(context):
     plotting.setup()
 
@@ -110,19 +144,37 @@ def execute(context):
 
     figures = [
         dict(
-            level = "person", label = "Number of persons", size = (6.0, 5.0),
-            marginals = ["age_class", "sex", "employed", "studies", "has_license", "has_pt_subscription", "socioprofessional_class"]
+            level="person",
+            label="Number of persons",
+            size=(6.0, 5.0),
+            marginals=[
+                "age_class",
+                "sex",
+                "employed",
+                "studies",
+                "has_license",
+                "has_pt_subscription",
+                "socioprofessional_class",
+            ],
         ),
         dict(
-            level = "household", label = "Number of households", size = plotting.WIDE_FIGSIZE,
-            marginals = ["household_size_class", "number_of_vehicles_class", "number_of_bikes_class"]
-        )
+            level="household",
+            label="Number of households",
+            size=plotting.WIDE_FIGSIZE,
+            marginals=[
+                "household_size_class",
+                "number_of_vehicles_class",
+                "number_of_bikes_class",
+            ],
+        ),
     ]
 
     for figure in figures:
-        plt.figure(figsize = figure["size"])
+        plt.figure(figsize=figure["size"])
 
-        df_figure = prepare_data(data, hts, census, figure["level"], figure["marginals"], SAMPLING_RATE)
+        df_figure = prepare_data(
+            data, hts, census, figure["level"], figure["marginals"], SAMPLING_RATE
+        )
 
         reweight_hts(df_figure, hts, census, figure["level"])
         add_labels(df_figure)
@@ -130,32 +182,80 @@ def execute(context):
         locations = np.arange(len(df_figure))
 
         f = (df_figure["reference_source"] == "census").values
-        plt.barh(locations[f], df_figure["reference"].values[f], height = 0.4, label = "Census", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["census"])
-        plt.barh(locations[f] + 0.4, df_figure["mean"].values[f], height = 0.4, label = "Synthetic", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["synthetic"])
+        plt.barh(
+            locations[f],
+            df_figure["reference"].values[f],
+            height=0.4,
+            label="Census",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["census"],
+        )
+        plt.barh(
+            locations[f] + 0.4,
+            df_figure["mean"].values[f],
+            height=0.4,
+            label="Synthetic",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["synthetic"],
+        )
 
         f = (df_figure["reference_source"] == "hts").values
         hts_name = context.config("hts")
-        plt.barh(locations[f], df_figure["reference"].values[f], height = 0.4, label = "HTS", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS[hts_name])
-        plt.barh(locations[f] + 0.4, df_figure["mean"].values[f], height = 0.4, label = None, align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["synthetic"])
+        plt.barh(
+            locations[f],
+            df_figure["reference"].values[f],
+            height=0.4,
+            label="HTS",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS[hts_name],
+        )
+        plt.barh(
+            locations[f] + 0.4,
+            df_figure["mean"].values[f],
+            height=0.4,
+            label=None,
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["synthetic"],
+        )
 
-        for index, (min, max) in enumerate(zip(df_figure["min"].values, df_figure["max"].values)):
+        for index, (min, max) in enumerate(
+            zip(df_figure["min"].values, df_figure["max"].values)
+        ):
             location = index + 0.4 + 0.2
-            plt.plot([min, max], [location, location], "k", linewidth = 1, label = "Range")
+            plt.plot([min, max], [location, location], "k", linewidth=1, label="Range")
 
         plt.gca().yaxis.set_major_locator(tck.FixedLocator(locations + 0.4))
-        plt.gca().yaxis.set_major_formatter(tck.FixedFormatter(df_figure["label"].values))
+        plt.gca().yaxis.set_major_formatter(
+            tck.FixedFormatter(df_figure["label"].values)
+        )
 
         if figure["level"] == "person":
-            plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(1, 100) * 1e6 * 2))
-            plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%dM" % (x / 1e6,)))
+            plt.gca().xaxis.set_major_locator(
+                tck.FixedLocator(np.arange(1, 100) * 1e6 * 2)
+            )
+            plt.gca().xaxis.set_major_formatter(
+                tck.FuncFormatter(lambda x, p: "%dM" % (x / 1e6,))
+            )
 
         if figure["level"] == "household":
-            plt.gca().xaxis.set_major_locator(tck.FixedLocator(np.arange(1, 100) * 1e6 * 0.5))
-            plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%.1fM" % (x / 1e6,)))
+            plt.gca().xaxis.set_major_locator(
+                tck.FixedLocator(np.arange(1, 100) * 1e6 * 0.5)
+            )
+            plt.gca().xaxis.set_major_formatter(
+                tck.FuncFormatter(lambda x, p: "%.1fM" % (x / 1e6,))
+            )
 
         plt.grid()
         plt.gca().set_axisbelow(True)
-        plt.gca().yaxis.grid(alpha = 0.0)
+        plt.gca().yaxis.grid(alpha=0.0)
         plt.gca().invert_yaxis()
 
         plt.xlabel(figure["label"])
@@ -163,7 +263,7 @@ def execute(context):
         handles, labels = plt.gca().get_legend_handles_labels()
         handles = [handles[-2], handles[-1], handles[-3], handles[-4]]
         labels = [labels[-2], labels[-1], labels[-3], labels[-4]]
-        plt.legend(handles = handles, labels = labels, loc = "best")
+        plt.legend(handles=handles, labels=labels, loc="best")
 
         plt.tight_layout()
         plt.savefig("%s/%s.pdf" % (context.path(), figure["level"]))
diff --git a/documentation/plots/sociodemographics/local.py b/documentation/plots/sociodemographics/local.py
index 6d831c12..1e0e9584 100644
--- a/documentation/plots/sociodemographics/local.py
+++ b/documentation/plots/sociodemographics/local.py
@@ -9,15 +9,18 @@
 
 SAMPLING_RATE = 0.05
 
+
 def configure(context):
     context.stage("analysis.reference.census.sociodemographics")
 
     context.stage(
         "analysis.synthesis.sociodemographics.spatial",
-        dict(sampling_rate = SAMPLING_RATE), alias = "data"
+        dict(sampling_rate=SAMPLING_RATE),
+        alias="data",
     )
 
-def filter_commune(marginals, commune_id, levels = ["person", "household"]):
+
+def filter_commune(marginals, commune_id, levels=["person", "household"]):
     result = {}
 
     for level in levels:
@@ -26,7 +29,7 @@ def filter_commune(marginals, commune_id, levels = ["person", "household"]):
         for attributes, df_marginal in marginals[level].items():
             if "commune_id" in attributes:
                 f = df_marginal["commune_id"] == str(commune_id)
-                df_marginal = df_marginal[f].drop(columns = ["commune_id"])
+                df_marginal = df_marginal[f].drop(columns=["commune_id"])
 
                 attributes = list(attributes)
                 attributes.remove("commune_id")
@@ -37,6 +40,7 @@ def filter_commune(marginals, commune_id, levels = ["person", "household"]):
 
     return result
 
+
 def execute(context):
     plotting.setup()
 
@@ -44,20 +48,36 @@ def execute(context):
     data = context.stage("data")
 
     cases = [
-        dict(commune = 75113, title = "13th Arrondissement"),
-        dict(commune = 94028, title = "Alfortville"),
+        dict(commune=75113, title="13th Arrondissement"),
+        dict(commune=94028, title="Alfortville"),
     ]
 
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
 
     for case_index, case in enumerate(cases):
         case_census = filter_commune(census, case["commune"])
         case_data = filter_commune(data, case["commune"])
 
-        df_case = pd.concat([
-            prepare_data(case_data, case_census, case_census, "household", ["household_size_class"], SAMPLING_RATE),
-            prepare_data(case_data, case_census, case_census, "person", ["age_class"], SAMPLING_RATE),
-        ])
+        df_case = pd.concat(
+            [
+                prepare_data(
+                    case_data,
+                    case_census,
+                    case_census,
+                    "household",
+                    ["household_size_class"],
+                    SAMPLING_RATE,
+                ),
+                prepare_data(
+                    case_data,
+                    case_census,
+                    case_census,
+                    "person",
+                    ["age_class"],
+                    SAMPLING_RATE,
+                ),
+            ]
+        )
 
         add_labels(df_case)
 
@@ -67,36 +87,60 @@ def execute(context):
         reference_values = df_case["reference"].values
         mean_values = df_case["mean"].values
 
-        plt.barh(locations, df_case["reference"].values, height = 0.4, label = "Census", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["census"])
-        plt.barh(locations + 0.4, df_case["mean"].values, height = 0.4, label = "Synthetic", align = "edge", linewidth = 0.5, edgecolor = "white", color = plotting.COLORS["synthetic"])
-
-        for index, (min, max) in enumerate(zip(df_case["min"].values, df_case["max"].values)):
+        plt.barh(
+            locations,
+            df_case["reference"].values,
+            height=0.4,
+            label="Census",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["census"],
+        )
+        plt.barh(
+            locations + 0.4,
+            df_case["mean"].values,
+            height=0.4,
+            label="Synthetic",
+            align="edge",
+            linewidth=0.5,
+            edgecolor="white",
+            color=plotting.COLORS["synthetic"],
+        )
+
+        for index, (min, max) in enumerate(
+            zip(df_case["min"].values, df_case["max"].values)
+        ):
             location = index + 0.4 + 0.2
-            plt.plot([min, max], [location, location], "k", linewidth = 1, label = "Range")
+            plt.plot([min, max], [location, location], "k", linewidth=1, label="Range")
 
         plt.gca().yaxis.set_major_locator(tck.FixedLocator(locations + 0.4))
 
         if case_index == 0:
-            plt.gca().yaxis.set_major_formatter(tck.FixedFormatter(df_case["label"].values))
+            plt.gca().yaxis.set_major_formatter(
+                tck.FixedFormatter(df_case["label"].values)
+            )
         else:
             plt.gca().yaxis.set_major_formatter(tck.FixedFormatter([""] * 100))
 
-        plt.gca().xaxis.set_major_formatter(tck.FuncFormatter(lambda x,p: "%dk" % (x // 1000,)))
+        plt.gca().xaxis.set_major_formatter(
+            tck.FuncFormatter(lambda x, p: "%dk" % (x // 1000,))
+        )
 
         plt.grid()
         plt.gca().set_axisbelow(True)
-        plt.gca().yaxis.grid(alpha = 0.0)
+        plt.gca().yaxis.grid(alpha=0.0)
         plt.gca().invert_yaxis()
 
         plt.xlabel("Number of persons / households")
         plt.title(case["title"])
-        #plt.ylim([len(locations) + 2.5, -0.5])
+        # plt.ylim([len(locations) + 2.5, -0.5])
 
         if case_index == 1:
             handles, labels = plt.gca().get_legend_handles_labels()
             handles = [handles[-2], handles[-1], handles[-3]]
             labels = [labels[-2], labels[-1], labels[-3]]
-            plt.legend(handles = handles, labels = labels, loc = (0.05, 0.32), framealpha = 1.0)
+            plt.legend(handles=handles, labels=labels, loc=(0.05, 0.32), framealpha=1.0)
 
     plt.tight_layout()
     plt.savefig("%s/comparison.pdf" % (context.path(),))
diff --git a/documentation/plots/sociodemographics/utils.py b/documentation/plots/sociodemographics/utils.py
index 14bfcb45..849aacb6 100644
--- a/documentation/plots/sociodemographics/utils.py
+++ b/documentation/plots/sociodemographics/utils.py
@@ -1,15 +1,20 @@
 def create_labels(df, marginals):
-    labels = df.apply(lambda x: "%s %s" % (
-        marginals[x["marginal"]]["category_label"],
-        marginals[x["marginal"]]["label"]
-    ), axis = 1)
+    labels = df.apply(
+        lambda x: "%s %s"
+        % (
+            marginals[x["marginal"]]["category_label"],
+            marginals[x["marginal"]]["label"],
+        ),
+        axis=1,
+    )
 
     labels = labels.str.replace("Number of", "No.")
     labels = labels.str.replace("Socioprof. Cat.", "SC")
 
     return labels.values
 
-def filter_marginals(df, marginal_level, marginals, blacklist = set()):
+
+def filter_marginals(df, marginal_level, marginals, blacklist=set()):
     df = df[df["marginal_level"] == marginal_level]
     df = df[df["marginal"].isin(marginals.keys())]
     df = df[~df["marginal"].isin(blacklist)]
diff --git a/documentation/plots/theory/sampling_error.py b/documentation/plots/theory/sampling_error.py
index 9dca3897..dc6ae222 100644
--- a/documentation/plots/theory/sampling_error.py
+++ b/documentation/plots/theory/sampling_error.py
@@ -4,6 +4,7 @@
 import scipy.stats as stats
 import documentation.plotting as plotting
 
+
 def get_count_distribution(ns, w, s):
     l, u = np.floor(w), np.ceil(w)
     p = w - l
@@ -13,6 +14,7 @@ def get_count_distribution(ns, w, s):
 
     return p * Fu + (1 - p) * Fl
 
+
 def get_error_probability(ws, s, q):
     probabilities = []
 
@@ -25,23 +27,25 @@ def get_error_probability(ws, s, q):
 
     return probabilities
 
+
 def configure(context):
     pass
 
+
 def execute(context):
     plotting.setup()
 
     q = 0.01
 
-    plt.figure(figsize = plotting.WIDE_FIGSIZE)
+    plt.figure(figsize=plotting.WIDE_FIGSIZE)
 
     for s, color in zip([0.01, 0.1, 0.25], ["#000000", "#777777", "#cccccc"]):
         ws = np.linspace(0, 2000, 10000)
 
         probs = get_error_probability(ws, s, q)
-        plt.plot(ws, probs, ".", label = "s = %.2f" % s, color = color, markersize = 2)
+        plt.plot(ws, probs, ".", label="s = %.2f" % s, color=color, markersize=2)
 
-    plt.legend(loc = "best")
+    plt.legend(loc="best")
     plt.grid()
     plt.xlabel("Reference weight")
     plt.ylabel("Probability")
diff --git a/documentation/plotting.py b/documentation/plotting.py
index 7900d8de..a23b8102 100644
--- a/documentation/plotting.py
+++ b/documentation/plotting.py
@@ -10,27 +10,28 @@
 DPI = 300
 FONT_SIZE = 8
 
-COLORSET =  palettable.colorbrewer.qualitative.Set2_4.mpl_colors
-COLORSET5 =  palettable.colorbrewer.qualitative.Set2_5.mpl_colors
+COLORSET = palettable.colorbrewer.qualitative.Set2_4.mpl_colors
+COLORSET5 = palettable.colorbrewer.qualitative.Set2_5.mpl_colors
 COLORS = {
     "census": COLORSET[2],
     "entd": COLORSET[0],
     "egt": COLORSET[1],
-    "synthetic": "#cccccc", #COLORSET[3]
+    "synthetic": "#cccccc",  # COLORSET[3]
 }
 
 MODE_LABELS = dict(
-    car = "Car driver",
-    car_passenger = "Car passenger",
-    pt = "Public transport",
-    bike = "Bicycle",
-    walk = "Walking"
+    car="Car driver",
+    car_passenger="Car passenger",
+    pt="Public transport",
+    bike="Bicycle",
+    walk="Walking",
 )
 
+
 def setup():
-    plt.rc("font", family = "serif", size = FONT_SIZE)
-    plt.rc("figure", dpi = DPI, figsize = SHORT_FIGSIZE)
-    plt.rc("legend", fontsize = FONT_SIZE, loc = "best", fancybox = False)
-    plt.rc("grid", linewidth = 0.5)
-    plt.rc("patch", linewidth = 0.5)
-    plt.rc("mathtext", fontset = "cm")
+    plt.rc("font", family="serif", size=FONT_SIZE)
+    plt.rc("figure", dpi=DPI, figsize=SHORT_FIGSIZE)
+    plt.rc("legend", fontsize=FONT_SIZE, loc="best", fancybox=False)
+    plt.rc("grid", linewidth=0.5)
+    plt.rc("patch", linewidth=0.5)
+    plt.rc("mathtext", fontset="cm")
diff --git a/documentation/shapes.py b/documentation/shapes.py
index dd92ff63..a57d8ec5 100644
--- a/documentation/shapes.py
+++ b/documentation/shapes.py
@@ -4,28 +4,34 @@
 import matplotlib.ticker as tck
 import palettable
 
+
 def configure(context):
     context.stage("data.income.municipality")
     context.stage("data.spatial.municipalities")
     context.stage("data.bpe.cleaned")
 
+
 def execute(context):
     df_communes = context.stage("data.spatial.municipalities")
 
     # Spatial income distribution
     df_income = context.stage("data.income.municipality")
-    df_income = df_income[(df_income["attribute"] == "all") & (df_income["value"] == "all")]
-    df_income = pd.merge(df_communes, df_income, how = "inner", on = "commune_id")
+    df_income = df_income[
+        (df_income["attribute"] == "all") & (df_income["value"] == "all")
+    ]
+    df_income = pd.merge(df_communes, df_income, how="inner", on="commune_id")
     df_income["is_imputed"] = df_income["is_imputed"].astype(int)
     df_income["commune_id"] = df_income["commune_id"].astype(str)
     df_income["departement_id"] = df_income["departement_id"].astype(str)
-    df_income.to_file("%s/income.geojson" % context.cache_path, driver = "GeoJSON")
+    df_income.to_file("%s/income.geojson" % context.cache_path, driver="GeoJSON")
 
     # Enterprises
-    df_bpe = context.stage("data.bpe.cleaned")[["enterprise_id", "geometry", "imputed", "commune_id"]].copy()
+    df_bpe = context.stage("data.bpe.cleaned")[
+        ["enterprise_id", "geometry", "imputed", "commune_id"]
+    ].copy()
     df_bpe["imputed"] = df_bpe["imputed"].astype(int)
     df_bpe["commune_id"] = df_bpe["commune_id"].astype(str)
-    df_bpe = df_bpe.iloc[np.random.choice(len(df_bpe), size = 10000, replace = False)]
+    df_bpe = df_bpe.iloc[np.random.choice(len(df_bpe), size=10000, replace=False)]
     df_bpe.to_file("%s/bpe.shp" % context.cache_path)
 
     return context.cache_path
diff --git a/matsim/output.py b/matsim/output.py
index 2f616403..520a9a19 100644
--- a/matsim/output.py
+++ b/matsim/output.py
@@ -1,10 +1,11 @@
 import shutil
 
+
 def configure(context):
     if context.config("run_matsim", True):
         # allow disabling performing one run of the simulation
         context.stage("matsim.simulation.run")
-    
+
     context.stage("matsim.simulation.prepare")
     context.stage("matsim.runtime.eqasim")
 
@@ -14,14 +15,14 @@ def configure(context):
     need_osm = context.config("export_detailed_network", False)
     if need_osm:
         context.stage("matsim.scenario.supply.osm")
-    
 
     context.stage("documentation.meta_output")
 
+
 def execute(context):
     config_path = "%s/%s" % (
         context.path("matsim.simulation.prepare"),
-        context.stage("matsim.simulation.prepare")
+        context.stage("matsim.simulation.prepare"),
     )
 
     file_names = [
@@ -32,23 +33,33 @@ def execute(context):
         "%snetwork.xml.gz" % context.config("output_prefix"),
         "%stransit_schedule.xml.gz" % context.config("output_prefix"),
         "%stransit_vehicles.xml.gz" % context.config("output_prefix"),
-        "%sconfig.xml" % context.config("output_prefix")
+        "%sconfig.xml" % context.config("output_prefix"),
     ]
 
     for name in file_names:
         shutil.copy(
             "%s/%s" % (context.path("matsim.simulation.prepare"), name),
-            "%s/%s" % (context.config("output_path"), name)
+            "%s/%s" % (context.config("output_path"), name),
         )
 
     if context.config("export_detailed_network"):
         shutil.copy(
-            "%s/%s" % (context.path("matsim.scenario.supply.osm"), "detailed_network.csv"),
-            "%s/%s" % (context.config("output_path"), "%sdetailed_network.csv" % context.config("output_prefix"))
+            "%s/%s"
+            % (context.path("matsim.scenario.supply.osm"), "detailed_network.csv"),
+            "%s/%s"
+            % (
+                context.config("output_path"),
+                "%sdetailed_network.csv" % context.config("output_prefix"),
+            ),
         )
-    
+
     if context.config("write_jar"):
         shutil.copy(
-            "%s/%s" % (context.path("matsim.runtime.eqasim"), context.stage("matsim.runtime.eqasim")),
-            "%s/%srun.jar" % (context.config("output_path"), context.config("output_prefix"))
+            "%s/%s"
+            % (
+                context.path("matsim.runtime.eqasim"),
+                context.stage("matsim.runtime.eqasim"),
+            ),
+            "%s/%srun.jar"
+            % (context.config("output_path"), context.config("output_prefix")),
         )
diff --git a/matsim/runtime/eqasim.py b/matsim/runtime/eqasim.py
index 72e4846e..6a315f67 100644
--- a/matsim/runtime/eqasim.py
+++ b/matsim/runtime/eqasim.py
@@ -9,6 +9,7 @@
 DEFAULT_EQASIM_BRANCH = "develop"
 DEFAULT_EQASIM_COMMIT = "ece4932"
 
+
 def configure(context):
     context.stage("matsim.runtime.git")
     context.stage("matsim.runtime.java")
@@ -20,6 +21,7 @@ def configure(context):
     context.config("eqasim_repository", "https://github.com/eqasim-org/eqasim-java.git")
     context.config("eqasim_path", "")
 
+
 def run(context, command, arguments):
     version = context.config("eqasim_version")
 
@@ -27,10 +29,12 @@ def run(context, command, arguments):
     context.stage("matsim.runtime.eqasim")
 
     jar_path = "%s/eqasim-java/ile_de_france/target/ile_de_france-%s.jar" % (
-        context.path("matsim.runtime.eqasim"), version
+        context.path("matsim.runtime.eqasim"),
+        version,
     )
     java.run(context, command, arguments, jar_path)
 
+
 def execute(context):
     version = context.config("eqasim_version")
 
@@ -39,33 +43,61 @@ def execute(context):
         # Clone repository and checkout version
         branch = context.config("eqasim_branch")
 
-        git.run(context, [
-            "clone", "--single-branch", "-b", branch,
-            context.config("eqasim_repository"), "eqasim-java"
-        ])
+        git.run(
+            context,
+            [
+                "clone",
+                "--single-branch",
+                "-b",
+                branch,
+                context.config("eqasim_repository"),
+                "eqasim-java",
+            ],
+        )
 
         # Select the configured commit or tag
         commit = context.config("eqasim_commit")
 
-        git.run(context, [
-            "checkout", commit
-        ], cwd = "{}/eqasim-java".format(context.path()))
+        git.run(
+            context, ["checkout", commit], cwd="{}/eqasim-java".format(context.path())
+        )
 
         # Build eqasim
-        maven.run(context, ["-Pstandalone", "--projects", "ile_de_france", "--also-make", "package", "-DskipTests=true"], cwd = "%s/eqasim-java" % context.path())
-
-        if not os.path.exists("{}/eqasim-java/ile_de_france/target/ile_de_france-{}.jar".format(context.path(), version)):
-            raise RuntimeError("The JAR was not created correctly. Wrong eqasim_version specified?")
+        maven.run(
+            context,
+            [
+                "-Pstandalone",
+                "--projects",
+                "ile_de_france",
+                "--also-make",
+                "package",
+                "-DskipTests=true",
+            ],
+            cwd="%s/eqasim-java" % context.path(),
+        )
+
+        if not os.path.exists(
+            "{}/eqasim-java/ile_de_france/target/ile_de_france-{}.jar".format(
+                context.path(), version
+            )
+        ):
+            raise RuntimeError(
+                "The JAR was not created correctly. Wrong eqasim_version specified?"
+            )
 
     # Special case: We provide the jar directly. This is mainly used for
     # creating input to unit tests of the eqasim-java package.
     else:
         os.makedirs("%s/eqasim-java/ile_de_france/target" % context.path())
-        shutil.copy(context.config("eqasim_path"),
-            "%s/eqasim-java/ile_de_france/target/ile_de_france-%s.jar" % (context.path(), version))
+        shutil.copy(
+            context.config("eqasim_path"),
+            "%s/eqasim-java/ile_de_france/target/ile_de_france-%s.jar"
+            % (context.path(), version),
+        )
 
     return "eqasim-java/ile_de_france/target/ile_de_france-%s.jar" % version
 
+
 def validate(context):
     path = context.config("eqasim_path")
 
@@ -74,12 +106,14 @@ def validate(context):
 
     if not os.path.exists(path):
         raise RuntimeError("Cannot find eqasim at: %s" % path)
-    
+
     if context.config("eqasim_tag") is None:
         if context.config("eqasim_commit") is None:
             raise RuntimeError("Either eqasim commit or tag must be defined")
-        
-    if (context.config("eqasim_tag") is None) == (context.config("eqasim_commit") is None):
+
+    if (context.config("eqasim_tag") is None) == (
+        context.config("eqasim_commit") is None
+    ):
         raise RuntimeError("Eqasim commit and tag must not be defined at the same time")
 
     return os.path.getmtime(path)
diff --git a/matsim/runtime/git.py b/matsim/runtime/git.py
index 08585b00..27e5551f 100644
--- a/matsim/runtime/git.py
+++ b/matsim/runtime/git.py
@@ -1,12 +1,14 @@
 import subprocess as sp
 import shutil
 
+
 def configure(context):
     context.config("git_binary", "git")
 
-def run(context, arguments = [], cwd = None, catch_output = False):
+
+def run(context, arguments=[], cwd=None, catch_output=False):
     """
-        This function calls git.
+    This function calls git.
     """
     # Make sure there is a dependency
     context.stage("matsim.runtime.git")
@@ -14,28 +16,29 @@ def run(context, arguments = [], cwd = None, catch_output = False):
     if cwd is None:
         cwd = context.path()
 
-    command_line = [
-        shutil.which(context.config("git_binary"))
-    ] + arguments
+    command_line = [shutil.which(context.config("git_binary"))] + arguments
 
     if catch_output:
-        return sp.check_output(command_line, cwd = cwd).decode("utf-8").strip()
+        return sp.check_output(command_line, cwd=cwd).decode("utf-8").strip()
 
     else:
-        return_code = sp.check_call(command_line, cwd = cwd)
+        return_code = sp.check_call(command_line, cwd=cwd)
 
         if not return_code == 0:
             raise RuntimeError("Git return code: %d" % return_code)
 
+
 def validate(context):
     if shutil.which(context.config("git_binary")) in ["", None]:
-        raise RuntimeError("Cannot find git binary at: %s" % context.config("git_binary"))
+        raise RuntimeError(
+            "Cannot find git binary at: %s" % context.config("git_binary")
+        )
 
-    if not b"2." in sp.check_output([
-        shutil.which(context.config("git_binary")),
-        "--version"
-    ], stderr = sp.STDOUT):
+    if not b"2." in sp.check_output(
+        [shutil.which(context.config("git_binary")), "--version"], stderr=sp.STDOUT
+    ):
         print("WARNING! Git of at least version 2.x.x is recommended!")
 
+
 def execute(context):
     pass
diff --git a/matsim/runtime/java.py b/matsim/runtime/java.py
index 1cfe5702..94c0f1a5 100644
--- a/matsim/runtime/java.py
+++ b/matsim/runtime/java.py
@@ -1,16 +1,27 @@
 import subprocess as sp
 import os, shutil
 
+
 def configure(context):
     context.config("java_binary", "java")
     context.config("java_memory", "50G")
 
-def run(context, entry_point, arguments = [], class_path = None, vm_arguments = [], cwd = None, memory = None, mode = "raise"):
+
+def run(
+    context,
+    entry_point,
+    arguments=[],
+    class_path=None,
+    vm_arguments=[],
+    cwd=None,
+    memory=None,
+    mode="raise",
+):
     """
-        This function calls java code. There are three modes:
-        - return_code: Returns the return code of the Java call
-        - output: Returns the output of the Java call
-        - raise (default): Raises an exception if the return code is not zero
+    This function calls java code. There are three modes:
+    - return_code: Returns the return code of the Java call
+    - output: Returns the output of the Java call
+    - raise (default): Raises an exception if the return code is not zero
     """
     # Make sure there is a dependency
     context.stage("matsim.runtime.java")
@@ -25,7 +36,7 @@ def run(context, entry_point, arguments = [], class_path = None, vm_arguments =
     vm_arguments = [
         "-Xmx" + memory,
         "-Djava.io.tmpdir=%s" % temp_path,
-        "-Dmatsim.useLocalDtds=true"
+        "-Dmatsim.useLocalDtds=true",
     ] + vm_arguments
 
     # Prepare classpath
@@ -37,38 +48,43 @@ def run(context, entry_point, arguments = [], class_path = None, vm_arguments =
         cwd = context.path()
 
     # Prepare command line
-    command_line = [
-        shutil.which(context.config("java_binary")),
-        "-cp", class_path
-    ] + vm_arguments + [
-        entry_point
-    ] + arguments
+    command_line = (
+        [shutil.which(context.config("java_binary")), "-cp", class_path]
+        + vm_arguments
+        + [entry_point]
+        + arguments
+    )
 
     command_line = list(map(str, command_line))
 
     print("Executing java:", " ".join(command_line))
 
     if mode == "raise" or mode == "return_code":
-        return_code = sp.check_call(command_line, cwd = cwd)
+        return_code = sp.check_call(command_line, cwd=cwd)
 
         if not return_code == 0:
             raise RuntimeError("Java return code: %d" % return_code)
 
         return return_code
     elif mode == "output":
-        return sp.check_output(command_line, cwd = cwd)
+        return sp.check_output(command_line, cwd=cwd)
     else:
-        raise RuntimeError("Mode is expected to be one of 'raise', 'return_code' or 'output'")
+        raise RuntimeError(
+            "Mode is expected to be one of 'raise', 'return_code' or 'output'"
+        )
+
 
 def validate(context):
     if shutil.which(context.config("java_binary")) in ["", None]:
-        raise RuntimeError("Cannot find Java binary at: %s" % context.config("java_binary"))
+        raise RuntimeError(
+            "Cannot find Java binary at: %s" % context.config("java_binary")
+        )
 
-    if not b"11" in sp.check_output([
-        shutil.which(context.config("java_binary")),
-        "-version"
-    ], stderr = sp.STDOUT):
+    if not b"11" in sp.check_output(
+        [shutil.which(context.config("java_binary")), "-version"], stderr=sp.STDOUT
+    ):
         print("WARNING! A Java JDK of at least version 11 is recommended.")
 
+
 def execute(context):
     pass
diff --git a/matsim/runtime/maven.py b/matsim/runtime/maven.py
index a4832617..587dec63 100644
--- a/matsim/runtime/maven.py
+++ b/matsim/runtime/maven.py
@@ -1,13 +1,15 @@
 import subprocess as sp
 import os, shutil
 
+
 def configure(context):
     context.config("maven_binary", "mvn")
     context.config("maven_skip_tests", False)
 
-def run(context, arguments = [], cwd = None):
+
+def run(context, arguments=[], cwd=None):
     """
-        This function calls Maven.
+    This function calls Maven.
     """
     # Make sure there is a dependency
     context.stage("matsim.runtime.maven")
@@ -20,31 +22,32 @@ def run(context, arguments = [], cwd = None):
     if not os.path.exists(temp_path):
         os.mkdir(temp_path)
 
-    vm_arguments = [
-        "-Djava.io.tmpdir=%s" % temp_path
-    ]
+    vm_arguments = ["-Djava.io.tmpdir=%s" % temp_path]
 
     if context.config("maven_skip_tests"):
         vm_arguments.append("-DskipTests=true")
 
-    command_line = [
-        shutil.which(context.config("maven_binary"))
-    ] + vm_arguments + arguments
+    command_line = (
+        [shutil.which(context.config("maven_binary"))] + vm_arguments + arguments
+    )
 
-    return_code = sp.check_call(command_line, cwd = cwd)
+    return_code = sp.check_call(command_line, cwd=cwd)
 
     if not return_code == 0:
         raise RuntimeError("Maven return code: %d" % return_code)
 
+
 def validate(context):
     if shutil.which(context.config("maven_binary")) in ["", None]:
-        raise RuntimeError("Cannot find Maven binary at: %s" % context.config("maven_binary"))
+        raise RuntimeError(
+            "Cannot find Maven binary at: %s" % context.config("maven_binary")
+        )
 
-    if not b"3." in sp.check_output([
-        shutil.which(context.config("maven_binary")),
-        "-version"
-    ], stderr = sp.STDOUT):
+    if not b"3." in sp.check_output(
+        [shutil.which(context.config("maven_binary")), "-version"], stderr=sp.STDOUT
+    ):
         print("WARNING! Maven of at least version 3.x.x is recommended!")
 
+
 def execute(context):
     pass
diff --git a/matsim/runtime/pt2matsim.py b/matsim/runtime/pt2matsim.py
index ef837fd6..62573a33 100644
--- a/matsim/runtime/pt2matsim.py
+++ b/matsim/runtime/pt2matsim.py
@@ -5,6 +5,7 @@
 import matsim.runtime.java as java
 import matsim.runtime.maven as maven
 
+
 def configure(context):
     context.stage("matsim.runtime.git")
     context.stage("matsim.runtime.java")
@@ -13,6 +14,7 @@ def configure(context):
     context.config("pt2matsim_version", "22.3")
     context.config("pt2matsim_branch", "v22.3")
 
+
 def run(context, command, arguments, vm_arguments=[]):
     version = context.config("pt2matsim_version")
 
@@ -20,29 +22,43 @@ def run(context, command, arguments, vm_arguments=[]):
     context.stage("matsim.runtime.pt2matsim")
 
     jar_path = "%s/pt2matsim/target/pt2matsim-%s-shaded.jar" % (
-        context.path("matsim.runtime.pt2matsim"), version
+        context.path("matsim.runtime.pt2matsim"),
+        version,
     )
     java.run(context, command, arguments, jar_path, vm_arguments)
 
+
 def execute(context):
     version = context.config("pt2matsim_version")
     branch = context.config("pt2matsim_branch")
 
     # Clone repository and checkout version
-    git.run(context, [
-        "clone", "https://github.com/matsim-org/pt2matsim.git",
-        "--branch", branch,
-        "--single-branch", "pt2matsim",
-        "--depth", "1"
-    ])
+    git.run(
+        context,
+        [
+            "clone",
+            "https://github.com/matsim-org/pt2matsim.git",
+            "--branch",
+            branch,
+            "--single-branch",
+            "pt2matsim",
+            "--depth",
+            "1",
+        ],
+    )
 
     # Build pt2matsim
-    maven.run(context, ["package", "-DskipTests=true"], cwd = "%s/pt2matsim" % context.path())
+    maven.run(
+        context, ["package", "-DskipTests=true"], cwd="%s/pt2matsim" % context.path()
+    )
     jar_path = "%s/pt2matsim/target/pt2matsim-%s-shaded.jar" % (context.path(), version)
 
     # Test pt2matsim
-    java.run(context, "org.matsim.pt2matsim.run.CreateDefaultOsmConfig", [
-        "test_config.xml"
-    ], jar_path)
+    java.run(
+        context,
+        "org.matsim.pt2matsim.run.CreateDefaultOsmConfig",
+        ["test_config.xml"],
+        jar_path,
+    )
 
     assert os.path.exists("%s/test_config.xml" % context.path())
diff --git a/matsim/scenario/facilities.py b/matsim/scenario/facilities.py
index 4bc223a9..f721fa8c 100644
--- a/matsim/scenario/facilities.py
+++ b/matsim/scenario/facilities.py
@@ -5,28 +5,31 @@
 
 import matsim.writers as writers
 
+
 def configure(context):
     context.stage("synthesis.locations.secondary")
     context.stage("synthesis.population.spatial.home.locations")
     context.stage("synthesis.population.spatial.primary.locations")
 
-HOME_FIELDS = [
-    "household_id", "geometry"
-]
 
-PRIMARY_FIELDS = [
-    "location_id", "geometry", "is_work"
-]
+HOME_FIELDS = ["household_id", "geometry"]
+
+PRIMARY_FIELDS = ["location_id", "geometry", "is_work"]
 
 SECONDARY_FIELDS = [
-    "location_id", "geometry", "offers_leisure", "offers_shop", "offers_other"
+    "location_id",
+    "geometry",
+    "offers_leisure",
+    "offers_shop",
+    "offers_other",
 ]
 
+
 def execute(context):
     output_path = "%s/facilities.xml.gz" % context.path()
 
-    with gzip.open(output_path, 'wb+') as writer:
-        with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
+    with gzip.open(output_path, "wb+") as writer:
+        with io.BufferedWriter(writer, buffer_size=2 * 1024**3) as writer:
             writer = writers.FacilitiesWriter(writer)
             writer.start_facilities()
 
@@ -35,13 +38,16 @@ def execute(context):
             df_homes = context.stage("synthesis.population.spatial.home.locations")
             df_homes = df_homes[HOME_FIELDS]
 
-            with context.progress(total = len(df_homes), label = "Writing home facilities ...") as progress:
-                for item in df_homes.itertuples(index = False):
+            with context.progress(
+                total=len(df_homes), label="Writing home facilities ..."
+            ) as progress:
+                for item in df_homes.itertuples(index=False):
                     geometry = item[HOME_FIELDS.index("geometry")]
 
                     writer.start_facility(
                         "home_%s" % item[HOME_FIELDS.index("household_id")],
-                        geometry.x, geometry.y
+                        geometry.x,
+                        geometry.y,
                     )
 
                     writer.add_activity("home")
@@ -49,7 +55,9 @@ def execute(context):
 
             # Write primary
 
-            df_work, df_education = context.stage("synthesis.population.spatial.primary.locations")
+            df_work, df_education = context.stage(
+                "synthesis.population.spatial.primary.locations"
+            )
 
             df_work = df_work.drop_duplicates("location_id").copy()
             df_education = df_education.drop_duplicates("location_id").copy()
@@ -60,16 +68,21 @@ def execute(context):
             df_locations = pd.concat([df_work, df_education])
             df_locations = df_locations[PRIMARY_FIELDS]
 
-            with context.progress(total = len(df_locations), label = "Writing primary facilities ...") as progress:
-                for item in df_locations.itertuples(index = False):
+            with context.progress(
+                total=len(df_locations), label="Writing primary facilities ..."
+            ) as progress:
+                for item in df_locations.itertuples(index=False):
                     geometry = item[PRIMARY_FIELDS.index("geometry")]
 
                     writer.start_facility(
                         str(item[PRIMARY_FIELDS.index("location_id")]),
-                        geometry.x, geometry.y
+                        geometry.x,
+                        geometry.y,
                     )
 
-                    writer.add_activity("work" if item[PRIMARY_FIELDS.index("is_work")] else "education")
+                    writer.add_activity(
+                        "work" if item[PRIMARY_FIELDS.index("is_work")] else "education"
+                    )
                     writer.end_facility()
 
             # Write secondary
@@ -77,13 +90,16 @@ def execute(context):
             df_locations = context.stage("synthesis.locations.secondary")
             df_locations = df_locations[SECONDARY_FIELDS]
 
-            with context.progress(total = len(df_locations), label = "Writing secondary facilities ...") as progress:
-                for item in df_locations.itertuples(index = False):
+            with context.progress(
+                total=len(df_locations), label="Writing secondary facilities ..."
+            ) as progress:
+                for item in df_locations.itertuples(index=False):
                     geometry = item[SECONDARY_FIELDS.index("geometry")]
 
                     writer.start_facility(
                         item[SECONDARY_FIELDS.index("location_id")],
-                        geometry.x, geometry.y
+                        geometry.x,
+                        geometry.y,
                     )
 
                     for purpose in ("shop", "leisure", "other"):
diff --git a/matsim/scenario/households.py b/matsim/scenario/households.py
index 2f47cfee..0d33bd48 100644
--- a/matsim/scenario/households.py
+++ b/matsim/scenario/households.py
@@ -5,42 +5,69 @@
 
 import matsim.writers as writers
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
 
-FIELDS = ["household_id", "person_id", "household_income", "car_availability", "bike_availability", "census_household_id"]
+
+FIELDS = [
+    "household_id",
+    "person_id",
+    "household_income",
+    "car_availability",
+    "bike_availability",
+    "census_household_id",
+]
+
 
 def add_household(writer, household, member_ids):
     writer.start_household(household[FIELDS.index("household_id")])
     writer.add_members(member_ids)
 
     writer.start_attributes()
-    writer.add_attribute("carAvailability", "java.lang.String", household[FIELDS.index("car_availability")])
-    writer.add_attribute("bikeAvailability", "java.lang.String", household[FIELDS.index("bike_availability")])
-    writer.add_attribute("household_income", "java.lang.Double", household[FIELDS.index("household_income")])
-    writer.add_attribute("censusId", "java.lang.Long", household[FIELDS.index("census_household_id")])
+    writer.add_attribute(
+        "carAvailability",
+        "java.lang.String",
+        household[FIELDS.index("car_availability")],
+    )
+    writer.add_attribute(
+        "bikeAvailability",
+        "java.lang.String",
+        household[FIELDS.index("bike_availability")],
+    )
+    writer.add_attribute(
+        "household_income",
+        "java.lang.Double",
+        household[FIELDS.index("household_income")],
+    )
+    writer.add_attribute(
+        "censusId", "java.lang.Long", household[FIELDS.index("census_household_id")]
+    )
     writer.end_attributes()
 
     writer.end_household()
 
+
 def execute(context):
     output_path = "%s/households.xml.gz" % context.path()
 
     df_persons = context.stage("synthesis.population.enriched")
-    df_persons = df_persons.sort_values(by = ["household_id", "person_id"])
+    df_persons = df_persons.sort_values(by=["household_id", "person_id"])
     df_persons = df_persons[FIELDS]
 
     current_members = []
     current_household_id = None
     current_household = None
 
-    with gzip.open(output_path, 'wb+') as writer:
-        with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
+    with gzip.open(output_path, "wb+") as writer:
+        with io.BufferedWriter(writer, buffer_size=2 * 1024**3) as writer:
             writer = writers.HouseholdsWriter(writer)
             writer.start_households()
 
-            with context.progress(total = len(df_persons), label = "Writing households ...") as progress:
-                for item in df_persons.itertuples(index = False):
+            with context.progress(
+                total=len(df_persons), label="Writing households ..."
+            ) as progress:
+                for item in df_persons.itertuples(index=False):
                     if current_household_id != item[FIELDS.index("household_id")]:
                         if not current_household_id is None:
                             add_household(writer, current_household, current_members)
diff --git a/matsim/scenario/population.py b/matsim/scenario/population.py
index 2fc0fa4d..2f7e52eb 100644
--- a/matsim/scenario/population.py
+++ b/matsim/scenario/population.py
@@ -7,6 +7,7 @@
 import matsim.writers as writers
 from matsim.writers import backlog_iterator
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
 
@@ -16,59 +17,125 @@ def configure(context):
     context.stage("synthesis.population.trips")
     context.stage("synthesis.vehicles.vehicles")
 
+
 PERSON_FIELDS = [
-    "person_id", "household_income", "car_availability", "bike_availability",
-    "census_household_id", "census_person_id", "household_id",
-    "has_license", "has_pt_subscription", "is_passenger",
-    "hts_id", "hts_household_id",
-    "age", "employed", "sex"
+    "person_id",
+    "household_income",
+    "car_availability",
+    "bike_availability",
+    "census_household_id",
+    "census_person_id",
+    "household_id",
+    "has_license",
+    "has_pt_subscription",
+    "is_passenger",
+    "hts_id",
+    "hts_household_id",
+    "age",
+    "employed",
+    "sex",
 ]
 
 ACTIVITY_FIELDS = [
-    "person_id", "start_time", "end_time", "purpose", "geometry", "location_id"
+    "person_id",
+    "start_time",
+    "end_time",
+    "purpose",
+    "geometry",
+    "location_id",
 ]
 
-TRIP_FIELDS = [
-    "person_id", "mode", "departure_time", "travel_time"
-]
+TRIP_FIELDS = ["person_id", "mode", "departure_time", "travel_time"]
+
+VEHICLE_FIELDS = ["owner_id", "vehicle_id", "mode"]
 
-VEHICLE_FIELDS = [
-    "owner_id", "vehicle_id", "mode"
-]
 
 def add_person(writer, person, activities, trips, vehicles):
     writer.start_person(person[PERSON_FIELDS.index("person_id")])
 
     writer.start_attributes()
-    writer.add_attribute("householdId", "java.lang.Integer", person[PERSON_FIELDS.index("household_id")])
-    writer.add_attribute("householdIncome", "java.lang.Double", person[PERSON_FIELDS.index("household_income")])
-
-    writer.add_attribute("carAvailability", "java.lang.String", person[PERSON_FIELDS.index("car_availability")])
-    writer.add_attribute("bikeAvailability", "java.lang.String", person[PERSON_FIELDS.index("bike_availability")])
-
-    writer.add_attribute("censusHouseholdId", "java.lang.Long", person[PERSON_FIELDS.index("census_household_id")])
-    writer.add_attribute("censusPersonId", "java.lang.Long", person[PERSON_FIELDS.index("census_person_id")])
-
-    writer.add_attribute("htsHouseholdId", "java.lang.Long", person[PERSON_FIELDS.index("hts_household_id")])
-    writer.add_attribute("htsPersonId", "java.lang.Long", person[PERSON_FIELDS.index("hts_id")])
-
-    writer.add_attribute("hasPtSubscription", "java.lang.Boolean", person[PERSON_FIELDS.index("has_pt_subscription")])
-    writer.add_attribute("hasLicense", "java.lang.String", writer.yes_no(person[PERSON_FIELDS.index("has_license")]))
-
-    writer.add_attribute("isPassenger", "java.lang.Boolean", person[PERSON_FIELDS.index("is_passenger")])
+    writer.add_attribute(
+        "householdId", "java.lang.Integer", person[PERSON_FIELDS.index("household_id")]
+    )
+    writer.add_attribute(
+        "householdIncome",
+        "java.lang.Double",
+        person[PERSON_FIELDS.index("household_income")],
+    )
+
+    writer.add_attribute(
+        "carAvailability",
+        "java.lang.String",
+        person[PERSON_FIELDS.index("car_availability")],
+    )
+    writer.add_attribute(
+        "bikeAvailability",
+        "java.lang.String",
+        person[PERSON_FIELDS.index("bike_availability")],
+    )
+
+    writer.add_attribute(
+        "censusHouseholdId",
+        "java.lang.Long",
+        person[PERSON_FIELDS.index("census_household_id")],
+    )
+    writer.add_attribute(
+        "censusPersonId",
+        "java.lang.Long",
+        person[PERSON_FIELDS.index("census_person_id")],
+    )
+
+    writer.add_attribute(
+        "htsHouseholdId",
+        "java.lang.Long",
+        person[PERSON_FIELDS.index("hts_household_id")],
+    )
+    writer.add_attribute(
+        "htsPersonId", "java.lang.Long", person[PERSON_FIELDS.index("hts_id")]
+    )
+
+    writer.add_attribute(
+        "hasPtSubscription",
+        "java.lang.Boolean",
+        person[PERSON_FIELDS.index("has_pt_subscription")],
+    )
+    writer.add_attribute(
+        "hasLicense",
+        "java.lang.String",
+        writer.yes_no(person[PERSON_FIELDS.index("has_license")]),
+    )
+
+    writer.add_attribute(
+        "isPassenger", "java.lang.Boolean", person[PERSON_FIELDS.index("is_passenger")]
+    )
 
     writer.add_attribute("age", "java.lang.Integer", person[PERSON_FIELDS.index("age")])
-    writer.add_attribute("employed", "java.lang.String", person[PERSON_FIELDS.index("employed")])
-    writer.add_attribute("sex", "java.lang.String", person[PERSON_FIELDS.index("sex")][0])
-
-    writer.add_attribute("vehicles", "org.matsim.vehicles.PersonVehicles", "{{{content}}}".format(content = ",".join([
-        "\"{mode}\":\"{id}\"".format(mode = v[VEHICLE_FIELDS.index("mode")], id = v[VEHICLE_FIELDS.index("vehicle_id")])
-        for v in vehicles
-    ])))
+    writer.add_attribute(
+        "employed", "java.lang.String", person[PERSON_FIELDS.index("employed")]
+    )
+    writer.add_attribute(
+        "sex", "java.lang.String", person[PERSON_FIELDS.index("sex")][0]
+    )
+
+    writer.add_attribute(
+        "vehicles",
+        "org.matsim.vehicles.PersonVehicles",
+        "{{{content}}}".format(
+            content=",".join(
+                [
+                    '"{mode}":"{id}"'.format(
+                        mode=v[VEHICLE_FIELDS.index("mode")],
+                        id=v[VEHICLE_FIELDS.index("vehicle_id")],
+                    )
+                    for v in vehicles
+                ]
+            )
+        ),
+    )
 
     writer.end_attributes()
 
-    writer.start_plan(selected = True)
+    writer.start_plan(selected=True)
 
     for activity, trip in itertools.zip_longest(activities, trips):
         start_time = activity[ACTIVITY_FIELDS.index("start_time")]
@@ -80,58 +147,71 @@ def add_person(writer, person, activities, trips, vehicles):
             location_id = "home_%s" % person[PERSON_FIELDS.index("household_id")]
 
         location = writer.location(
-            geometry.x, geometry.y,
-            None if location_id == -1 else location_id
+            geometry.x, geometry.y, None if location_id == -1 else location_id
         )
 
         writer.add_activity(
-            type = activity[ACTIVITY_FIELDS.index("purpose")],
-            location = location,
-            start_time = None if np.isnan(start_time) else start_time,
-            end_time = None if np.isnan(end_time) else end_time
+            type=activity[ACTIVITY_FIELDS.index("purpose")],
+            location=location,
+            start_time=None if np.isnan(start_time) else start_time,
+            end_time=None if np.isnan(end_time) else end_time,
         )
 
         if not trip is None:
             writer.add_leg(
-                mode = trip[TRIP_FIELDS.index("mode")],
-                departure_time = trip[TRIP_FIELDS.index("departure_time")],
-                travel_time = trip[TRIP_FIELDS.index("travel_time")]
+                mode=trip[TRIP_FIELDS.index("mode")],
+                departure_time=trip[TRIP_FIELDS.index("departure_time")],
+                travel_time=trip[TRIP_FIELDS.index("travel_time")],
             )
 
     writer.end_plan()
     writer.end_person()
 
+
 def execute(context):
     output_path = "%s/population.xml.gz" % context.path()
 
     df_persons = context.stage("synthesis.population.enriched")
-    df_persons = df_persons.sort_values(by = ["household_id", "person_id"])
+    df_persons = df_persons.sort_values(by=["household_id", "person_id"])
     df_persons = df_persons[PERSON_FIELDS]
 
-    df_activities = context.stage("synthesis.population.activities").sort_values(by = ["person_id", "activity_index"])
-    df_locations = context.stage("synthesis.population.spatial.locations")[[
-        "person_id", "activity_index", "geometry", "location_id"]].sort_values(by = ["person_id", "activity_index"])
+    df_activities = context.stage("synthesis.population.activities").sort_values(
+        by=["person_id", "activity_index"]
+    )
+    df_locations = context.stage("synthesis.population.spatial.locations")[
+        ["person_id", "activity_index", "geometry", "location_id"]
+    ].sort_values(by=["person_id", "activity_index"])
 
-    df_activities = pd.merge(df_activities, df_locations, how = "left", on = ["person_id", "activity_index"])
-    #df_activities["location_id"] = df_activities["location_id"].fillna(-1).astype(int)
+    df_activities = pd.merge(
+        df_activities, df_locations, how="left", on=["person_id", "activity_index"]
+    )
+    # df_activities["location_id"] = df_activities["location_id"].fillna(-1).astype(int)
 
     df_trips = context.stage("synthesis.population.trips")
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
 
     df_vehicles = context.stage("synthesis.vehicles.vehicles")[1]
-    df_vehicles = df_vehicles.sort_values(by = ["owner_id"])
+    df_vehicles = df_vehicles.sort_values(by=["owner_id"])
 
-    with gzip.open(output_path, 'wb+') as writer:
-        with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
+    with gzip.open(output_path, "wb+") as writer:
+        with io.BufferedWriter(writer, buffer_size=2 * 1024**3) as writer:
             writer = writers.PopulationWriter(writer)
             writer.start_population()
 
-            activity_iterator = backlog_iterator(iter(df_activities[ACTIVITY_FIELDS].itertuples(index = False)))
-            trip_iterator = backlog_iterator(iter(df_trips[TRIP_FIELDS].itertuples(index = False)))
-            vehicle_iterator = backlog_iterator(iter(df_vehicles[VEHICLE_FIELDS].itertuples(index = False)))
+            activity_iterator = backlog_iterator(
+                iter(df_activities[ACTIVITY_FIELDS].itertuples(index=False))
+            )
+            trip_iterator = backlog_iterator(
+                iter(df_trips[TRIP_FIELDS].itertuples(index=False))
+            )
+            vehicle_iterator = backlog_iterator(
+                iter(df_vehicles[VEHICLE_FIELDS].itertuples(index=False))
+            )
 
-            with context.progress(total = len(df_persons), label = "Writing population ...") as progress:
-                for person in df_persons.itertuples(index = False):
+            with context.progress(
+                total=len(df_persons), label="Writing population ..."
+            ) as progress:
+                for person in df_persons.itertuples(index=False):
                     person_id = person[PERSON_FIELDS.index("person_id")]
 
                     activities = []
@@ -142,7 +222,10 @@ def execute(context):
                     while activity_iterator.has_next():
                         activity = activity_iterator.next()
 
-                        if not activity[ACTIVITY_FIELDS.index("person_id")] == person_id:
+                        if (
+                            not activity[ACTIVITY_FIELDS.index("person_id")]
+                            == person_id
+                        ):
                             activity_iterator.previous()
                             break
                         else:
diff --git a/matsim/scenario/supply/gtfs.py b/matsim/scenario/supply/gtfs.py
index 0635cc0f..83991cdb 100644
--- a/matsim/scenario/supply/gtfs.py
+++ b/matsim/scenario/supply/gtfs.py
@@ -2,6 +2,7 @@
 
 import matsim.runtime.pt2matsim as pt2matsim
 
+
 def configure(context):
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.pt2matsim")
@@ -10,21 +11,26 @@ def configure(context):
 
     context.config("gtfs_date", "dayWithMostServices")
 
+
 def execute(context):
     gtfs_path = "%s/output" % context.path("data.gtfs.cleaned")
     crs = context.stage("synthesis.population.spatial.home.locations").crs
 
-    pt2matsim.run(context, "org.matsim.pt2matsim.run.Gtfs2TransitSchedule", [
-        gtfs_path,
-        context.config("gtfs_date"), crs,
-        "%s/transit_schedule.xml.gz" % context.path(),
-        "%s/transit_vehicles.xml.gz" % context.path()
-    ])
+    pt2matsim.run(
+        context,
+        "org.matsim.pt2matsim.run.Gtfs2TransitSchedule",
+        [
+            gtfs_path,
+            context.config("gtfs_date"),
+            crs,
+            "%s/transit_schedule.xml.gz" % context.path(),
+            "%s/transit_vehicles.xml.gz" % context.path(),
+        ],
+    )
 
-    assert(os.path.exists("%s/transit_schedule.xml.gz" % context.path()))
-    assert(os.path.exists("%s/transit_vehicles.xml.gz" % context.path()))
+    assert os.path.exists("%s/transit_schedule.xml.gz" % context.path())
+    assert os.path.exists("%s/transit_vehicles.xml.gz" % context.path())
 
     return dict(
-        schedule_path = "transit_schedule.xml.gz",
-        vehicles_path = "transit_vehicles.xml.gz"
+        schedule_path="transit_schedule.xml.gz", vehicles_path="transit_vehicles.xml.gz"
     )
diff --git a/matsim/scenario/supply/osm.py b/matsim/scenario/supply/osm.py
index f723104e..f9ea0485 100644
--- a/matsim/scenario/supply/osm.py
+++ b/matsim/scenario/supply/osm.py
@@ -2,6 +2,7 @@
 
 import matsim.runtime.pt2matsim as pt2matsim
 
+
 def configure(context):
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.pt2matsim")
@@ -10,12 +11,15 @@ def configure(context):
 
     context.config("export_detailed_network", False)
 
+
 def execute(context):
     osm_path = "%s/output.osm.gz" % context.path("data.osm.cleaned")
     crs = context.stage("data.spatial.iris").crs
 
-    pt2matsim.run(context, "org.matsim.pt2matsim.run.CreateDefaultOsmConfig", 
-        arguments=["config_template.xml"]
+    pt2matsim.run(
+        context,
+        "org.matsim.pt2matsim.run.CreateDefaultOsmConfig",
+        arguments=["config_template.xml"],
     )
 
     with open("%s/config_template.xml" % context.path()) as f_read:
@@ -23,17 +27,17 @@ def execute(context):
 
         content = content.replace(
             '<param name="osmFile" value="null" />',
-            '<param name="osmFile" value="%s" />' % osm_path
+            '<param name="osmFile" value="%s" />' % osm_path,
         )
 
         content = content.replace(
             '<param name="outputCoordinateSystem" value="null" />',
-            '<param name="outputCoordinateSystem" value="{}" />'.format(crs)
+            '<param name="outputCoordinateSystem" value="{}" />'.format(crs),
         )
 
         content = content.replace(
             '<param name="outputNetworkFile" value="null" />',
-            '<param name="outputNetworkFile" value="network.xml.gz" />'
+            '<param name="outputNetworkFile" value="network.xml.gz" />',
         )
 
         if context.config("export_detailed_network"):
@@ -43,22 +47,24 @@ def execute(context):
             )
 
         content = content.replace(
-            '</module>',
+            "</module>",
             """
                 <parameterset type="routableSubnetwork">
                     <param name="allowedTransportModes" value="car" />
                     <param name="subnetworkMode" value="car_passenger" />
                 </parameterset>
             </module>
-            """
+            """,
         )
 
         with open("%s/config.xml" % context.path(), "w+") as f_write:
             f_write.write(content)
 
-    pt2matsim.run(context, "org.matsim.pt2matsim.run.Osm2MultimodalNetwork", 
-        arguments=["config.xml"]
+    pt2matsim.run(
+        context,
+        "org.matsim.pt2matsim.run.Osm2MultimodalNetwork",
+        arguments=["config.xml"],
     )
 
-    assert(os.path.exists("%s/network.xml.gz" % context.path()))
+    assert os.path.exists("%s/network.xml.gz" % context.path())
     return "network.xml.gz"
diff --git a/matsim/scenario/supply/processed.py b/matsim/scenario/supply/processed.py
index f75fc130..448df94f 100644
--- a/matsim/scenario/supply/processed.py
+++ b/matsim/scenario/supply/processed.py
@@ -2,6 +2,7 @@
 
 import matsim.runtime.pt2matsim as pt2matsim
 
+
 def configure(context):
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.pt2matsim")
@@ -12,68 +13,71 @@ def configure(context):
     context.config("data_path")
     context.config("processes")
 
+
 def execute(context):
     # Prepare input paths
     network_path = "%s/%s" % (
         context.path("matsim.scenario.supply.osm"),
-        context.stage("matsim.scenario.supply.osm")
+        context.stage("matsim.scenario.supply.osm"),
     )
 
     schedule_path = "%s/%s" % (
         context.path("matsim.scenario.supply.gtfs"),
-        context.stage("matsim.scenario.supply.gtfs")["schedule_path"]
+        context.stage("matsim.scenario.supply.gtfs")["schedule_path"],
     )
 
     # Create and modify config file
-    pt2matsim.run(context, "org.matsim.pt2matsim.run.CreateDefaultPTMapperConfig", [
-        "config_template.xml"
-    ])
+    pt2matsim.run(
+        context,
+        "org.matsim.pt2matsim.run.CreateDefaultPTMapperConfig",
+        ["config_template.xml"],
+    )
 
     with open("%s/config_template.xml" % context.path()) as f_read:
         content = f_read.read()
 
         content = content.replace(
             '<param name="inputNetworkFile" value="" />',
-            '<param name="inputNetworkFile" value="%s" />' % network_path
+            '<param name="inputNetworkFile" value="%s" />' % network_path,
         )
         content = content.replace(
             '<param name="inputScheduleFile" value="" />',
-            '<param name="inputScheduleFile" value="%s" />' % schedule_path
+            '<param name="inputScheduleFile" value="%s" />' % schedule_path,
         )
         content = content.replace(
             '<param name="numOfThreads" value="2" />',
-            '<param name="numOfThreads" value="%d" />' % context.config("processes")
+            '<param name="numOfThreads" value="%d" />' % context.config("processes"),
         )
         content = content.replace(
             '<param name="outputNetworkFile" value="" />',
-            '<param name="outputNetworkFile" value="network.xml.gz" />'
+            '<param name="outputNetworkFile" value="network.xml.gz" />',
         )
         content = content.replace(
             '<param name="outputScheduleFile" value="" />',
-            '<param name="outputScheduleFile" value="schedule.xml.gz" />'
+            '<param name="outputScheduleFile" value="schedule.xml.gz" />',
         )
         content = content.replace(
             '<param name="outputStreetNetworkFile" value="" />',
-            '<param name="outputStreetNetworkFile" value="road_network.xml.gz" />'
+            '<param name="outputStreetNetworkFile" value="road_network.xml.gz" />',
         )
         content = content.replace(
             '<param name="modesToKeepOnCleanUp" value="car" />',
-            '<param name="modesToKeepOnCleanUp" value="car,car_passenger,truck" />'
+            '<param name="modesToKeepOnCleanUp" value="car,car_passenger,truck" />',
         )
 
         with open("%s/config.xml" % context.path(), "w+") as f_write:
             f_write.write(content)
 
     # Run mapping process
-    pt2matsim.run(context, "org.matsim.pt2matsim.run.PublicTransitMapper", [
-        "config.xml"
-    ])
+    pt2matsim.run(
+        context, "org.matsim.pt2matsim.run.PublicTransitMapper", ["config.xml"]
+    )
 
-    assert(os.path.exists("%s/network.xml.gz" % context.path()))
-    assert(os.path.exists("%s/schedule.xml.gz" % context.path()))
+    assert os.path.exists("%s/network.xml.gz" % context.path())
+    assert os.path.exists("%s/schedule.xml.gz" % context.path())
 
     return dict(
-        network_path = "network.xml.gz",
-        schedule_path = "schedule.xml.gz",
-        #plausibility_path = "allPlausibilityWarnings.xml.gz"
+        network_path="network.xml.gz",
+        schedule_path="schedule.xml.gz",
+        # plausibility_path = "allPlausibilityWarnings.xml.gz"
     )
diff --git a/matsim/scenario/vehicles.py b/matsim/scenario/vehicles.py
index 63205fc3..9530bbdc 100644
--- a/matsim/scenario/vehicles.py
+++ b/matsim/scenario/vehicles.py
@@ -5,52 +5,59 @@
 
 import matsim.writers as writers
 
+
 def configure(context):
     context.stage("synthesis.vehicles.vehicles")
 
+
 TYPE_FIELDS = ["type_id", "nb_seats", "length", "width", "pce", "mode"]
 VEHICLE_FIELDS = ["vehicle_id", "type_id", "critair", "technology", "age", "euro"]
 
+
 def execute(context):
     output_path = "%s/vehicles.xml.gz" % context.path()
 
     df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles")
 
-    with gzip.open(output_path, 'wb+') as writer:
-        with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
+    with gzip.open(output_path, "wb+") as writer:
+        with io.BufferedWriter(writer, buffer_size=2 * 1024**3) as writer:
             writer = writers.VehiclesWriter(writer)
             writer.start_vehicles()
 
-            with context.progress(total = len(df_vehicle_types), label = "Writing vehicles types ...") as progress:
+            with context.progress(
+                total=len(df_vehicle_types), label="Writing vehicles types ..."
+            ) as progress:
                 for type in df_vehicle_types.to_dict(orient="records"):
                     writer.add_type(
                         type["type_id"],
                         length=type["length"],
                         width=type["width"],
-                        engine_attributes = {
+                        engine_attributes={
                             "HbefaVehicleCategory": type["hbefa_cat"],
                             "HbefaTechnology": type["hbefa_tech"],
                             "HbefaSizeClass": type["hbefa_size"],
-                            "HbefaEmissionsConcept": type["hbefa_emission"]
-                        }
+                            "HbefaEmissionsConcept": type["hbefa_emission"],
+                        },
                     )
                     progress.update()
 
-            with context.progress(total = len(df_vehicles), label = "Writing vehicles ...") as progress:
+            with context.progress(
+                total=len(df_vehicles), label="Writing vehicles ..."
+            ) as progress:
                 for vehicle in df_vehicles.to_dict(orient="records"):
 
                     writer.add_vehicle(
                         vehicle["vehicle_id"],
                         vehicle["type_id"],
-                        attributes = {
+                        attributes={
                             "critair": vehicle["critair"],
                             "technology": vehicle["technology"],
                             "age": vehicle["age"],
-                            "euro": vehicle["euro"]
-                        }
+                            "euro": vehicle["euro"],
+                        },
                     )
                     progress.update()
 
             writer.end_vehicles()
 
-    return "vehicles.xml.gz"
\ No newline at end of file
+    return "vehicles.xml.gz"
diff --git a/matsim/simulation/prepare.py b/matsim/simulation/prepare.py
index 7a73e6d8..39176e31 100644
--- a/matsim/simulation/prepare.py
+++ b/matsim/simulation/prepare.py
@@ -3,9 +3,10 @@
 
 import matsim.runtime.eqasim as eqasim
 
+
 def configure(context):
     context.config("mode_choice", False)
-    
+
     context.stage("matsim.scenario.population")
     context.stage("matsim.scenario.households")
     context.stage("matsim.scenario.vehicles")
@@ -26,148 +27,245 @@ def configure(context):
 
     context.config("output_prefix", "ile_de_france_")
 
+
 def execute(context):
     # Prepare input files
     facilities_path = "%s/%s" % (
         context.path("matsim.scenario.facilities"),
-        context.stage("matsim.scenario.facilities")
+        context.stage("matsim.scenario.facilities"),
     )
 
     population_path = "%s/%s" % (
         context.path("matsim.scenario.population"),
-        context.stage("matsim.scenario.population")
+        context.stage("matsim.scenario.population"),
     )
 
     network_path = "%s/%s" % (
         context.path("matsim.scenario.supply.processed"),
-        context.stage("matsim.scenario.supply.processed")["network_path"]
+        context.stage("matsim.scenario.supply.processed")["network_path"],
     )
 
-    eqasim.run(context, "org.eqasim.core.scenario.preparation.RunPreparation", [
-        "--input-facilities-path", facilities_path,
-        "--output-facilities-path", "%sfacilities.xml.gz" % context.config("output_prefix"),
-        "--input-population-path", population_path,
-        "--output-population-path", "prepared_population.xml.gz",
-        "--input-network-path", network_path,
-        "--output-network-path", "%snetwork.xml.gz" % context.config("output_prefix"),
-        "--threads", context.config("processes")
-    ])
+    eqasim.run(
+        context,
+        "org.eqasim.core.scenario.preparation.RunPreparation",
+        [
+            "--input-facilities-path",
+            facilities_path,
+            "--output-facilities-path",
+            "%sfacilities.xml.gz" % context.config("output_prefix"),
+            "--input-population-path",
+            population_path,
+            "--output-population-path",
+            "prepared_population.xml.gz",
+            "--input-network-path",
+            network_path,
+            "--output-network-path",
+            "%snetwork.xml.gz" % context.config("output_prefix"),
+            "--threads",
+            context.config("processes"),
+        ],
+    )
 
-    assert os.path.exists("%s/%sfacilities.xml.gz" % (context.path(), context.config("output_prefix")))
+    assert os.path.exists(
+        "%s/%sfacilities.xml.gz" % (context.path(), context.config("output_prefix"))
+    )
     assert os.path.exists("%s/prepared_population.xml.gz" % context.path())
-    assert os.path.exists("%s/%snetwork.xml.gz" % (context.path(), context.config("output_prefix")))
+    assert os.path.exists(
+        "%s/%snetwork.xml.gz" % (context.path(), context.config("output_prefix"))
+    )
 
     # Copy remaining input files
     households_path = "%s/%s" % (
         context.path("matsim.scenario.households"),
-        context.stage("matsim.scenario.households")
+        context.stage("matsim.scenario.households"),
+    )
+    shutil.copy(
+        households_path,
+        "%s/%shouseholds.xml.gz"
+        % (context.cache_path, context.config("output_prefix")),
     )
-    shutil.copy(households_path, "%s/%shouseholds.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     transit_schedule_path = "%s/%s" % (
         context.path("matsim.scenario.supply.processed"),
-        context.stage("matsim.scenario.supply.processed")["schedule_path"]
+        context.stage("matsim.scenario.supply.processed")["schedule_path"],
+    )
+    shutil.copy(
+        transit_schedule_path,
+        "%s/%stransit_schedule.xml.gz"
+        % (context.cache_path, context.config("output_prefix")),
     )
-    shutil.copy(transit_schedule_path, "%s/%stransit_schedule.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     transit_vehicles_path = "%s/%s" % (
         context.path("matsim.scenario.supply.gtfs"),
-        context.stage("matsim.scenario.supply.gtfs")["vehicles_path"]
+        context.stage("matsim.scenario.supply.gtfs")["vehicles_path"],
+    )
+    shutil.copy(
+        transit_vehicles_path,
+        "%s/%stransit_vehicles.xml.gz"
+        % (context.cache_path, context.config("output_prefix")),
     )
-    shutil.copy(transit_vehicles_path, "%s/%stransit_vehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     vehicles_path = "%s/%s" % (
         context.path("matsim.scenario.vehicles"),
-        context.stage("matsim.scenario.vehicles")
+        context.stage("matsim.scenario.vehicles"),
+    )
+    shutil.copy(
+        vehicles_path,
+        "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")),
     )
-    shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     # Generate base configuration
-    eqasim.run(context, "org.eqasim.core.scenario.config.RunGenerateConfig", [
-        "--sample-size", context.config("sampling_rate"),
-        "--threads", context.config("processes"),
-        "--prefix", context.config("output_prefix"),
-        "--random-seed", context.config("random_seed"),
-        "--output-path", "generic_config.xml"
-    ])
+    eqasim.run(
+        context,
+        "org.eqasim.core.scenario.config.RunGenerateConfig",
+        [
+            "--sample-size",
+            context.config("sampling_rate"),
+            "--threads",
+            context.config("processes"),
+            "--prefix",
+            context.config("output_prefix"),
+            "--random-seed",
+            context.config("random_seed"),
+            "--output-path",
+            "generic_config.xml",
+        ],
+    )
     assert os.path.exists("%s/generic_config.xml" % context.path())
 
     # Adapt config for Île-de-France
-    eqasim.run(context, "org.eqasim.ile_de_france.scenario.RunAdaptConfig", [
-        "--input-path", "generic_config.xml",
-        "--output-path", "%sconfig.xml" % context.config("output_prefix"),
-        "--prefix", context.config("output_prefix")
-    ])
-    assert os.path.exists("%s/%sconfig.xml" % (context.path(), context.config("output_prefix")))
+    eqasim.run(
+        context,
+        "org.eqasim.ile_de_france.scenario.RunAdaptConfig",
+        [
+            "--input-path",
+            "generic_config.xml",
+            "--output-path",
+            "%sconfig.xml" % context.config("output_prefix"),
+            "--prefix",
+            context.config("output_prefix"),
+        ],
+    )
+    assert os.path.exists(
+        "%s/%sconfig.xml" % (context.path(), context.config("output_prefix"))
+    )
 
     # Add urban attributes to population and network
     # (but only if Paris is included in the scenario!)
     df_codes = context.stage("data.spatial.codes")
 
     if "75" in df_codes["departement_id"].unique().astype(str):
-        df_shape = context.stage("data.spatial.departments")[["departement_id", "geometry"]].rename(
-            columns = dict(departement_id = "id")
-        )
+        df_shape = context.stage("data.spatial.departments")[
+            ["departement_id", "geometry"]
+        ].rename(columns=dict(departement_id="id"))
         df_shape["id"] = df_shape["id"].astype(str)
 
         if "75" in df_shape["id"].unique():
             df_shape.to_file("%s/departments.shp" % context.path())
 
-            eqasim.run(context, "org.eqasim.core.scenario.spatial.RunImputeSpatialAttribute", [
-                "--input-population-path", "prepared_population.xml.gz",
-                "--output-population-path", "prepared_population.xml.gz",
-                "--input-network-path", "%snetwork.xml.gz" % context.config("output_prefix"),
-                "--output-network-path", "%snetwork.xml.gz" % context.config("output_prefix"),
-                "--shape-path", "departments.shp",
-                "--shape-attribute", "id",
-                "--shape-value", "75",
-                "--attribute", "isUrban"
-            ])
-
-            eqasim.run(context, "org.eqasim.core.scenario.spatial.RunAdjustCapacity", [
-                "--input-path", "%snetwork.xml.gz" % context.config("output_prefix"),
-                "--output-path", "%snetwork.xml.gz" % context.config("output_prefix"),
-                "--shape-path", "departments.shp",
-                "--shape-attribute", "id",
-                "--shape-value", "75",
-                "--factor", str(0.8)
-            ])
-
-    
+            eqasim.run(
+                context,
+                "org.eqasim.core.scenario.spatial.RunImputeSpatialAttribute",
+                [
+                    "--input-population-path",
+                    "prepared_population.xml.gz",
+                    "--output-population-path",
+                    "prepared_population.xml.gz",
+                    "--input-network-path",
+                    "%snetwork.xml.gz" % context.config("output_prefix"),
+                    "--output-network-path",
+                    "%snetwork.xml.gz" % context.config("output_prefix"),
+                    "--shape-path",
+                    "departments.shp",
+                    "--shape-attribute",
+                    "id",
+                    "--shape-value",
+                    "75",
+                    "--attribute",
+                    "isUrban",
+                ],
+            )
+
+            eqasim.run(
+                context,
+                "org.eqasim.core.scenario.spatial.RunAdjustCapacity",
+                [
+                    "--input-path",
+                    "%snetwork.xml.gz" % context.config("output_prefix"),
+                    "--output-path",
+                    "%snetwork.xml.gz" % context.config("output_prefix"),
+                    "--shape-path",
+                    "departments.shp",
+                    "--shape-attribute",
+                    "id",
+                    "--shape-value",
+                    "75",
+                    "--factor",
+                    str(0.8),
+                ],
+            )
+
     # Optionally, perform mode choice
     if context.config("mode_choice"):
-        eqasim.run(context, "org.eqasim.core.standalone_mode_choice.RunStandaloneModeChoice", [
-            "--config-path", "%sconfig.xml" % context.config("output_prefix"),
-            "--config:standaloneModeChoice.outputDirectory", "mode_choice",
-            "--config:global.numberOfThreads", context.config("processes"),
-            "--write-output-csv-trips", "true",
-            "--skip-scenario-check", "true",
-            "--config:plans.inputPlansFile", "prepared_population.xml.gz",
-            "--eqasim-configurator-class", "org.eqasim.ile_de_france.IDFConfigurator",
-            "--mode-choice-configurator-class", "org.eqasim.ile_de_france.IDFStandaloneModeChoiceConfigurator"
-        ])
+        eqasim.run(
+            context,
+            "org.eqasim.core.standalone_mode_choice.RunStandaloneModeChoice",
+            [
+                "--config-path",
+                "%sconfig.xml" % context.config("output_prefix"),
+                "--config:standaloneModeChoice.outputDirectory",
+                "mode_choice",
+                "--config:global.numberOfThreads",
+                context.config("processes"),
+                "--write-output-csv-trips",
+                "true",
+                "--skip-scenario-check",
+                "true",
+                "--config:plans.inputPlansFile",
+                "prepared_population.xml.gz",
+                "--eqasim-configurator-class",
+                "org.eqasim.ile_de_france.IDFConfigurator",
+                "--mode-choice-configurator-class",
+                "org.eqasim.ile_de_france.IDFStandaloneModeChoiceConfigurator",
+            ],
+        )
 
         assert os.path.exists("%s/mode_choice/output_plans.xml.gz" % context.path())
         assert os.path.exists("%s/mode_choice/output_trips.csv" % context.path())
         assert os.path.exists("%s/mode_choice/output_pt_legs.csv" % context.path())
 
-        shutil.copy("%s/mode_choice/output_plans.xml.gz" % context.path(),
-                    "%s/%spopulation.xml.gz" % (context.path(), context.config("output_prefix")))
+        shutil.copy(
+            "%s/mode_choice/output_plans.xml.gz" % context.path(),
+            "%s/%spopulation.xml.gz"
+            % (context.path(), context.config("output_prefix")),
+        )
     else:
         # Route population
-        eqasim.run(context, "org.eqasim.core.scenario.routing.RunPopulationRouting", [
-            "--config-path", "%sconfig.xml" % context.config("output_prefix"),
-            "--output-path", "%spopulation.xml.gz" % context.config("output_prefix"),
-            "--threads", context.config("processes"),
-            "--config:plans.inputPlansFile", "prepared_population.xml.gz"
-        ])
+        eqasim.run(
+            context,
+            "org.eqasim.core.scenario.routing.RunPopulationRouting",
+            [
+                "--config-path",
+                "%sconfig.xml" % context.config("output_prefix"),
+                "--output-path",
+                "%spopulation.xml.gz" % context.config("output_prefix"),
+                "--threads",
+                context.config("processes"),
+                "--config:plans.inputPlansFile",
+                "prepared_population.xml.gz",
+            ],
+        )
 
-    assert os.path.exists("%s/%spopulation.xml.gz" % (context.path(), context.config("output_prefix")))
+    assert os.path.exists(
+        "%s/%spopulation.xml.gz" % (context.path(), context.config("output_prefix"))
+    )
 
     # Validate scenario
-    eqasim.run(context, "org.eqasim.core.scenario.validation.RunScenarioValidator", [
-        "--config-path", "%sconfig.xml" % context.config("output_prefix")
-    ])
+    eqasim.run(
+        context,
+        "org.eqasim.core.scenario.validation.RunScenarioValidator",
+        ["--config-path", "%sconfig.xml" % context.config("output_prefix")],
+    )
 
     # Cleanup
     os.remove("%s/prepared_population.xml.gz" % context.path())
diff --git a/matsim/simulation/run.py b/matsim/simulation/run.py
index fb6773b2..69da3376 100644
--- a/matsim/simulation/run.py
+++ b/matsim/simulation/run.py
@@ -3,23 +3,33 @@
 
 import matsim.runtime.eqasim as eqasim
 
+
 def configure(context):
     context.stage("matsim.simulation.prepare")
 
     context.stage("matsim.runtime.java")
     context.stage("matsim.runtime.eqasim")
 
+
 def execute(context):
     config_path = "%s/%s" % (
         context.path("matsim.simulation.prepare"),
-        context.stage("matsim.simulation.prepare")
+        context.stage("matsim.simulation.prepare"),
     )
 
     # Run routing
-    eqasim.run(context, "org.eqasim.ile_de_france.RunSimulation", [
-        "--config-path", config_path,
-        "--config:controler.lastIteration", str(1),
-        "--config:controler.writeEventsInterval", str(1),
-        "--config:controler.writePlansInterval", str(1),
-    ])
+    eqasim.run(
+        context,
+        "org.eqasim.ile_de_france.RunSimulation",
+        [
+            "--config-path",
+            config_path,
+            "--config:controler.lastIteration",
+            str(1),
+            "--config:controler.writeEventsInterval",
+            str(1),
+            "--config:controler.writePlansInterval",
+            str(1),
+        ],
+    )
     assert os.path.exists("%s/simulation_output/output_events.xml.gz" % context.path())
diff --git a/matsim/writers.py b/matsim/writers.py
index da99084f..94a5ced9 100644
--- a/matsim/writers.py
+++ b/matsim/writers.py
@@ -1,6 +1,7 @@
 import numpy as np
 from xml.sax.saxutils import escape
 
+
 class XmlWriter:
     def __init__(self, writer):
         self.writer = writer
@@ -37,22 +38,26 @@ def time(self, time):
         time = int(time)
         hours = time // 3600
         minutes = (time % 3600) // 60
-        seconds = (time % 60)
+        seconds = time % 60
         return "%02d:%02d:%02d" % (hours, minutes, seconds)
 
-    def location(self, x, y, facility_id = None):
+    def location(self, x, y, facility_id=None):
         return (x, y, None if facility_id is None else facility_id)
 
+
 def _write_preface_attributes(writer, attributes):
     if len(attributes) > 0:
-        writer._write_line('<attributes>')
+        writer._write_line("<attributes>")
         writer.indent += 1
 
         for item in attributes.items():
-            writer._write_line('<attribute name="%s" class="java.lang.String">%s</attribute>' % item)
+            writer._write_line(
+                '<attribute name="%s" class="java.lang.String">%s</attribute>' % item
+            )
 
         writer.indent -= 1
-        writer._write_line('</attributes>')
+        writer._write_line("</attributes>")
+
 
 class PopulationWriter(XmlWriter):
     POPULATION_SCOPE = 0
@@ -64,11 +69,13 @@ class PopulationWriter(XmlWriter):
     def __init__(self, writer):
         XmlWriter.__init__(self, writer)
 
-    def start_population(self, attributes = {}):
+    def start_population(self, attributes={}):
         self._require_scope(None)
         self._write_line('<?xml version="1.0" encoding="utf-8"?>')
-        self._write_line('<!DOCTYPE population SYSTEM "http://www.matsim.org/files/dtd/population_v6.dtd">')
-        self._write_line('<population>')
+        self._write_line(
+            '<!DOCTYPE population SYSTEM "http://www.matsim.org/files/dtd/population_v6.dtd">'
+        )
+        self._write_line("<population>")
 
         self.scope = self.POPULATION_SCOPE
         self.indent += 1
@@ -78,7 +85,7 @@ def start_population(self, attributes = {}):
     def end_population(self):
         self._require_scope(self.POPULATION_SCOPE)
         self.indent -= 1
-        self._write_line('</population>')
+        self._write_line("</population>")
         self.scope = self.FINISHED_SCOPE
 
     def start_person(self, person_id):
@@ -91,11 +98,11 @@ def end_person(self):
         self._require_scope(self.PERSON_SCOPE)
         self.indent -= 1
         self.scope = self.POPULATION_SCOPE
-        self._write_line('</person>')
+        self._write_line("</person>")
 
     def start_attributes(self):
         # We don't require any scope here because attributes can be almost anywhere
-        self._write_line('<attributes>')
+        self._write_line("<attributes>")
         self.indent += 1
         # And we need to remember which scope we were in before starting the attributes
         self._pre_attributes_scope = self.scope
@@ -106,13 +113,13 @@ def end_attributes(self):
         self.indent -= 1
         # Resetting the scope that we were in before starting the attributes
         self.scope = self._pre_attributes_scope
-        self._write_line('</attributes>')
+        self._write_line("</attributes>")
 
     def add_attribute(self, name, type, value):
         self._require_scope(self.ATTRIBUTES_SCOPE)
-        self._write_line('<attribute name="%s" class="%s">%s</attribute>' % (
-            name, type, value
-        ))
+        self._write_line(
+            '<attribute name="%s" class="%s">%s</attribute>' % (name, type, value)
+        )
 
     def start_plan(self, selected):
         self._require_scope(self.PERSON_SCOPE)
@@ -124,33 +131,37 @@ def end_plan(self):
         self._require_scope(self.PLAN_SCOPE)
         self.indent -= 1
         self.scope = self.PERSON_SCOPE
-        self._write_line('</plan>')
+        self._write_line("</plan>")
 
-    def add_activity(self, type, location, start_time = None, end_time = None):
+    def add_activity(self, type, location, start_time=None, end_time=None):
         self._require_scope(self.PLAN_SCOPE)
 
         self._write_indent()
-        self._write('<activity ')
+        self._write("<activity ")
         self._write('type="%s" ' % type)
         self._write('x="%f" y="%f" ' % (location[0], location[1]))
-        if location[2] is not None: self._write('facility="%s" ' % str(location[2]))
-        if start_time is not None: self._write('start_time="%s" ' % self.time(start_time))
-        if end_time is not None: self._write('end_time="%s" ' % self.time(end_time))
-        self._write('/>\n')
+        if location[2] is not None:
+            self._write('facility="%s" ' % str(location[2]))
+        if start_time is not None:
+            self._write('start_time="%s" ' % self.time(start_time))
+        if end_time is not None:
+            self._write('end_time="%s" ' % self.time(end_time))
+        self._write("/>\n")
 
     def add_leg(self, mode, departure_time, travel_time):
         self._require_scope(self.PLAN_SCOPE)
 
         self._write_indent()
-        self._write('<leg ')
+        self._write("<leg ")
         self._write('mode="%s" ' % mode)
         self._write('dep_time="%s" ' % self.time(departure_time))
         self._write('trav_time="%s" ' % self.time(travel_time))
-        self._write('>\n')
+        self._write(">\n")
         self.start_attributes()
-        self.add_attribute('routingMode', 'java.lang.String', mode)
+        self.add_attribute("routingMode", "java.lang.String", mode)
         self.end_attributes()
-        self._write_line('</leg>')
+        self._write_line("</leg>")
+
 
 class HouseholdsWriter(XmlWriter):
     HOUSEHOLDS_SCOPE = 0
@@ -161,10 +172,12 @@ class HouseholdsWriter(XmlWriter):
     def __init__(self, writer):
         XmlWriter.__init__(self, writer)
 
-    def start_households(self, attributes = {}):
+    def start_households(self, attributes={}):
         self._require_scope(None)
         self._write_line('<?xml version="1.0" encoding="utf-8"?>')
-        self._write_line('<households xmlns="http://www.matsim.org/files/dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.matsim.org/files/dtd http://www.matsim.org/files/dtd/households_v1.0.xsd">')
+        self._write_line(
+            '<households xmlns="http://www.matsim.org/files/dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.matsim.org/files/dtd http://www.matsim.org/files/dtd/households_v1.0.xsd">'
+        )
 
         self.scope = self.HOUSEHOLDS_SCOPE
         self.indent += 1
@@ -173,7 +186,7 @@ def start_households(self, attributes = {}):
 
     def end_households(self):
         self._require_scope(self.HOUSEHOLDS_SCOPE)
-        self._write_line('</households>')
+        self._write_line("</households>")
         self.scope = self.FINISHED_SCOPE
 
     def start_household(self, household_id):
@@ -186,11 +199,11 @@ def end_household(self):
         self._require_scope(self.HOUSEHOLD_SCOPE)
         self.indent -= 1
         self.scope = self.HOUSEHOLDS_SCOPE
-        self._write_line('</household>')
+        self._write_line("</household>")
 
     def start_attributes(self):
         self._require_scope(self.HOUSEHOLD_SCOPE)
-        self._write_line('<attributes>')
+        self._write_line("<attributes>")
         self.indent += 1
         self.scope = self.ATTRIBUTES_SCOPE
 
@@ -198,26 +211,28 @@ def end_attributes(self):
         self._require_scope(self.ATTRIBUTES_SCOPE)
         self.indent -= 1
         self.scope = self.HOUSEHOLD_SCOPE
-        self._write_line('</attributes>')
+        self._write_line("</attributes>")
 
     def add_attribute(self, name, type, value):
         self._require_scope(self.ATTRIBUTES_SCOPE)
-        self._write_line('<attribute name="%s" class="%s">%s</attribute>' % (
-            name, type, value
-        ))
+        self._write_line(
+            '<attribute name="%s" class="%s">%s</attribute>' % (name, type, value)
+        )
 
     def add_members(self, person_ids):
         self._require_scope(self.HOUSEHOLD_SCOPE)
-        self._write_line('<members>')
+        self._write_line("<members>")
         self.indent += 1
-        for person_id in person_ids: self._write_line('<personId refId="%s" />' % person_id)
+        for person_id in person_ids:
+            self._write_line('<personId refId="%s" />' % person_id)
         self.indent -= 1
-        self._write_line('</members>')
+        self._write_line("</members>")
 
     def add_income(self, income):
         self._require_scope(self.HOUSEHOLD_SCOPE)
         self._write_line('<income currency="CHF" period="month">%f</income>' % income)
 
+
 class FacilitiesWriter(XmlWriter):
     FACILITIES_SCOPE = 0
     FINISHED_SCOPE = 1
@@ -226,11 +241,13 @@ class FacilitiesWriter(XmlWriter):
     def __init__(self, writer):
         XmlWriter.__init__(self, writer)
 
-    def start_facilities(self, attributes = {}):
+    def start_facilities(self, attributes={}):
         self._require_scope(None)
         self._write_line('<?xml version="1.0" encoding="utf-8"?>')
-        self._write_line('<!DOCTYPE facilities SYSTEM "http://www.matsim.org/files/dtd/facilities_v1.dtd">')
-        self._write_line('<facilities>')
+        self._write_line(
+            '<!DOCTYPE facilities SYSTEM "http://www.matsim.org/files/dtd/facilities_v1.dtd">'
+        )
+        self._write_line("<facilities>")
 
         self.scope = self.FACILITIES_SCOPE
         self.indent += 1
@@ -240,14 +257,12 @@ def start_facilities(self, attributes = {}):
     def end_facilities(self):
         self._require_scope(self.FACILITIES_SCOPE)
         self.indent -= 1
-        self._write_line('</facilities>')
+        self._write_line("</facilities>")
         self.scope = self.FINISHED_SCOPE
 
     def start_facility(self, facility_id, x, y):
         self._require_scope(self.FACILITIES_SCOPE)
-        self._write_line('<facility id="%s" x="%f" y="%f">' % (
-            str(facility_id), x, y
-        ))
+        self._write_line('<facility id="%s" x="%f" y="%f">' % (str(facility_id), x, y))
 
         self.indent += 1
         self.scope = self.FACILITY_SCOPE
@@ -256,7 +271,7 @@ def end_facility(self):
         self._require_scope(self.FACILITY_SCOPE)
         self.indent -= 1
         self.scope = self.FACILITIES_SCOPE
-        self._write_line('</facility>')
+        self._write_line("</facility>")
 
     def add_activity(self, purpose):
         self._require_scope(self.FACILITY_SCOPE)
@@ -270,10 +285,12 @@ class VehiclesWriter(XmlWriter):
     def __init__(self, writer):
         XmlWriter.__init__(self, writer)
 
-    def start_vehicles(self, attributes = {}):
+    def start_vehicles(self, attributes={}):
         self._require_scope(None)
         self._write_line('<?xml version="1.0" encoding="utf-8"?>')
-        self._write_line('<vehicleDefinitions xmlns="http://www.matsim.org/files/dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.matsim.org/files/dtd http://www.matsim.org/files/dtd/vehicleDefinitions_v2.0.xsd">')
+        self._write_line(
+            '<vehicleDefinitions xmlns="http://www.matsim.org/files/dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.matsim.org/files/dtd http://www.matsim.org/files/dtd/vehicleDefinitions_v2.0.xsd">'
+        )
 
         self.scope = self.VEHICLES_SCOPE
         self.indent += 1
@@ -283,40 +300,58 @@ def start_vehicles(self, attributes = {}):
     def end_vehicles(self):
         self._require_scope(self.VEHICLES_SCOPE)
         self.indent -= 1
-        self._write_line('</vehicleDefinitions>')
+        self._write_line("</vehicleDefinitions>")
         self.scope = self.FINISHED_SCOPE
 
-    def add_type(self, vehicle_type_id, nb_seats = 4, length = 5.0, width = 1.0, pce = 1.0, mode = "car", attributes = {}, engine_attributes = {}):
+    def add_type(
+        self,
+        vehicle_type_id,
+        nb_seats=4,
+        length=5.0,
+        width=1.0,
+        pce=1.0,
+        mode="car",
+        attributes={},
+        engine_attributes={},
+    ):
         self._require_scope(self.VEHICLES_SCOPE)
         self._write_line('<vehicleType id="%s">' % str(vehicle_type_id))
 
         self.indent += 1
 
         if len(attributes) > 0:
-            self._write_line('<attributes>')
+            self._write_line("<attributes>")
             self.indent += 1
             for key, item in attributes.items():
-                self._write_line('<attribute name="%s" class="java.lang.String">%s</attribute>' % (key, escape(item)))
+                self._write_line(
+                    '<attribute name="%s" class="java.lang.String">%s</attribute>'
+                    % (key, escape(item))
+                )
             self.indent -= 1
-            self._write_line('</attributes>')
+            self._write_line("</attributes>")
 
         if not np.isnan(nb_seats):
-            self._write_line('<capacity seats="%d" standingRoomInPersons="0" />' % nb_seats)
+            self._write_line(
+                '<capacity seats="%d" standingRoomInPersons="0" />' % nb_seats
+            )
 
         self._write_line('<length meter="%f"/>' % length)
         self._write_line('<width meter="%f"/>' % width)
 
         if len(engine_attributes) > 0:
-            self._write_line('<engineInformation>')
+            self._write_line("<engineInformation>")
             self.indent += 1
-            self._write_line('<attributes>')
+            self._write_line("<attributes>")
             self.indent += 1
             for key, item in engine_attributes.items():
-                self._write_line('<attribute name="%s" class="java.lang.String">%s</attribute>' % (key, escape(item)))
+                self._write_line(
+                    '<attribute name="%s" class="java.lang.String">%s</attribute>'
+                    % (key, escape(item))
+                )
             self.indent -= 1
-            self._write_line('</attributes>')
+            self._write_line("</attributes>")
             self.indent -= 1
-            self._write_line('</engineInformation>')
+            self._write_line("</engineInformation>")
 
         if not np.isnan(pce):
             self._write_line('<passengerCarEquivalents pce="%f"/>' % pce)
@@ -324,29 +359,35 @@ def add_type(self, vehicle_type_id, nb_seats = 4, length = 5.0, width = 1.0, pce
         self._write_line('<networkMode networkMode="%s"/>' % mode)
 
         self.indent -= 1
-        self._write_line('</vehicleType>')
-
+        self._write_line("</vehicleType>")
 
-    def add_vehicle(self, vehicle_id, type_id, attributes = {}):
+    def add_vehicle(self, vehicle_id, type_id, attributes={}):
         self._require_scope(self.VEHICLES_SCOPE)
 
         if len(attributes) > 0:
-            self._write_line('<vehicle id="%s" type="%s">' % (str(vehicle_id), str(type_id)))
+            self._write_line(
+                '<vehicle id="%s" type="%s">' % (str(vehicle_id), str(type_id))
+            )
             self.indent += 1
-            self._write_line('<attributes>')
+            self._write_line("<attributes>")
             self.indent += 1
             for key, item in attributes.items():
-                self._write_line('<attribute name="%s" class="java.lang.String">%s</attribute>' % (str(key), str(item)))
+                self._write_line(
+                    '<attribute name="%s" class="java.lang.String">%s</attribute>'
+                    % (str(key), str(item))
+                )
             self.indent -= 1
-            self._write_line('</attributes>')
+            self._write_line("</attributes>")
             self.indent -= 1
-            self._write_line('</vehicle>')
+            self._write_line("</vehicle>")
         else:
-            self._write_line('<vehicle id="%s" type="%s" />' % (str(vehicle_id), str(type_id)))
+            self._write_line(
+                '<vehicle id="%s" type="%s" />' % (str(vehicle_id), str(type_id))
+            )
 
 
 class backlog_iterator:
-    def __init__(self, iterable, backlog = 1):
+    def __init__(self, iterable, backlog=1):
         self.iterable = iterable
         self.forward_log = []
         self.backward_log = [None] * (backlog + 1)
diff --git a/scripts/verify_data.py b/scripts/verify_data.py
index 93b77d4f..55a37b2f 100644
--- a/scripts/verify_data.py
+++ b/scripts/verify_data.py
@@ -1,34 +1,43 @@
 import requests
 import time
 
-# The goal of this script is to verify the availability of the data 
+# The goal of this script is to verify the availability of the data
 # that is needed to set up the pipeline
 
-sleep_time = 5 # seconds
-timeout = 30 # seconds
+sleep_time = 5  # seconds
+timeout = 30  # seconds
 retries = 3
 
+
 class Report:
     def __init__(self):
         self.sources = []
 
     def register(self, name, url):
-        self.sources.append({ "name": name, "url": url })
+        self.sources.append({"name": name, "url": url})
 
     def validate(self):
         failed = []
 
         with requests.Session() as session:
-            session.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0" })
+            session.headers.update(
+                {
+                    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0"
+                }
+            )
             for index, source in enumerate(self.sources):
-                print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"]))
-                
+                print(
+                    "[{}/{}] Checking {} ...".format(
+                        index + 1, len(self.sources), source["name"]
+                    )
+                )
+
                 retry = 0
                 success = False
 
                 while not success and retry < retries:
                     try:
-                        response = session.head(source["url"], timeout = timeout)
+                        response = session.head(source["url"], timeout=timeout)
                         source["status"] = response.status_code
                         success = True
                     except TimeoutError:
@@ -38,54 +47,59 @@ def validate(self):
                         print(e)
 
                     retry += 1
-                    print("  Status {} (retry {}/{})".format(source["status"], retry, retries))
-                    
+                    print(
+                        "  Status {} (retry {}/{})".format(
+                            source["status"], retry, retries
+                        )
+                    )
+
                     time.sleep(sleep_time)
 
                 if source["status"] != 200:
                     failed.append(source["name"])
-        
+
         print("Done.")
         print("Missing: ", len(failed))
         print(failed)
 
         return len(failed) == 0
 
+
 report = Report()
 
 report.register(
     "Census data (RP 2019)",
-    "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVI_csv.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVI_csv.zip",
 )
 
 report.register(
     "Population totals (RP 2019)",
-    "https://www.insee.fr/fr/statistiques/fichier/6543200/base-ic-evol-struct-pop-2019.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6543200/base-ic-evol-struct-pop-2019.zip",
 )
 
 report.register(
     "Origin-destination data (RP-MOBPRO 2019)",
-    "https://www.insee.fr/fr/statistiques/fichier/6456056/RP2019_mobpro_csv.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6456056/RP2019_mobpro_csv.zip",
 )
 
 report.register(
     "Origin-destination data (RP-MOBSCO 2019)",
-    "https://www.insee.fr/fr/statistiques/fichier/6456052/RP2019_mobsco_csv.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6456052/RP2019_mobsco_csv.zip",
 )
 
 report.register(
     "Income tax data (Filosofi 2019), municipalities",
-    "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-COMMUNES.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-COMMUNES.zip",
 )
 
 report.register(
     "Income tax data (Filosofi 2019), administrative",
-    "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-SUPRA.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/6036907/indic-struct-distrib-revenu-2019-SUPRA.zip",
 )
 
 report.register(
     "Service and facility census (BPE 2021)",
-    "https://www.insee.fr/fr/statistiques/fichier/3568638/bpe21_ensemble_xy_csv.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/3568638/bpe21_ensemble_xy_csv.zip",
 )
 
 entd_sources = [
@@ -94,60 +108,66 @@ def validate(self):
     (2556, "Q_menage"),
     (2565, "Q_individu"),
     (2566, "Q_ind_lieu_teg"),
-    (2568, "K_deploc")
+    (2568, "K_deploc"),
 ]
 
 for identifier, name in entd_sources:
     report.register(
         "National household travel survey (ENTD 2008), {}".format(name),
-        "https://www.statistiques.developpement-durable.gouv.fr/media/{}/download?inline".format(identifier)
+        "https://www.statistiques.developpement-durable.gouv.fr/media/{}/download?inline".format(
+            identifier
+        ),
     )
 
 report.register(
     "IRIS zoning system (2021)",
-    "https://data.geopf.fr/telechargement/download/CONTOURS-IRIS/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z"
+    "https://data.geopf.fr/telechargement/download/CONTOURS-IRIS/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z",
 )
 
 report.register(
     "Zoning registry (2021)",
-    "https://www.insee.fr/fr/statistiques/fichier/7708995/reference_IRIS_geo2021.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/7708995/reference_IRIS_geo2021.zip",
 )
 
 report.register(
     "Enterprise census (SIRENE), Etablissement",
-    "https://files.data.gouv.fr/insee-sirene/StockEtablissement_utf8.zip"
+    "https://files.data.gouv.fr/insee-sirene/StockEtablissement_utf8.zip",
 )
 
 report.register(
     "Enterprise census (SIRENE), Unité Legale",
-    "https://files.data.gouv.fr/insee-sirene/StockUniteLegale_utf8.zip"
+    "https://files.data.gouv.fr/insee-sirene/StockUniteLegale_utf8.zip",
 )
 
 report.register(
     "Enterprise census (SIRENE), Géolocalisé",
-    "https://files.data.gouv.fr/insee-sirene-geo/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip"
+    "https://files.data.gouv.fr/insee-sirene-geo/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip",
 )
 
 for department in (75, 77, 78, 91, 92, 93, 94, 95):
     report.register(
         "Buildings database (BD TOPO), {}".format(department),
-        "https://data.geopf.fr/telechargement/download/BDTOPO/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D0{}_2022-03-15/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D0{}_2022-03-15.7z".format(department, department)
+        "https://data.geopf.fr/telechargement/download/BDTOPO/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D0{}_2022-03-15/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D0{}_2022-03-15.7z".format(
+            department, department
+        ),
     )
 
 for department in (75, 77, 78, 91, 92, 93, 94, 95):
     report.register(
         "Adresses database (BAN), {}".format(department),
-        "https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-{}.csv.gz".format(department)
+        "https://adresse.data.gouv.fr/data/ban/adresses/latest/csv/adresses-{}.csv.gz".format(
+            department
+        ),
     )
 
 report.register(
     "Population projections",
-    "https://www.insee.fr/fr/statistiques/fichier/5894093/00_central.xlsx"
+    "https://www.insee.fr/fr/statistiques/fichier/5894093/00_central.xlsx",
 )
 
 report.register(
     "Urban type",
-    "https://www.insee.fr/fr/statistiques/fichier/4802589/UU2020_au_01-01-2023.zip"
+    "https://www.insee.fr/fr/statistiques/fichier/4802589/UU2020_au_01-01-2023.zip",
 )
 
 exit(0 if report.validate() else 1)
diff --git a/synthesis/locations/education.py b/synthesis/locations/education.py
index 45a32a70..3a0998b6 100644
--- a/synthesis/locations/education.py
+++ b/synthesis/locations/education.py
@@ -3,13 +3,15 @@
 import pandas as pd
 import geopandas as gpd
 
+
 def configure(context):
     context.stage("data.spatial.municipalities")
 
-    if context.config("education_location_source","bpe") == "addresses":
-        context.stage("data.external.education", alias = "location_source")
+    if context.config("education_location_source", "bpe") == "addresses":
+        context.stage("data.external.education", alias="location_source")
     else:
-        context.stage("data.bpe.cleaned", alias = "location_source")
+        context.stage("data.bpe.cleaned", alias="location_source")
+
 
 EDUCATION_WEIGHT_MAP = [
     ("C101", 100),  # Preschools
@@ -25,6 +27,7 @@ def configure(context):
     ("C501", 2000),  # University
 ]
 
+
 def fake_education(missing_communes, c, df_locations, df_zones):
     # Fake education destinations as the centroid of zones that have no other destinations
     print(
@@ -50,49 +53,80 @@ def fake_education(missing_communes, c, df_locations, df_zones):
 
     return df_added
 
+
 def execute(context):
     df_locations = context.stage("location_source")
 
     df_locations = df_locations[df_locations["activity_type"] == "education"]
-    df_locations = df_locations[["education_type", "commune_id","weight", "geometry"]].copy()
+    df_locations = df_locations[
+        ["education_type", "commune_id", "weight", "geometry"]
+    ].copy()
     df_locations["fake"] = False
 
     # Add education destinations to the centroid of zones that have no other destinations
     df_zones = context.stage("data.spatial.municipalities")
 
-    required_communes = set(df_zones["commune_id"].unique())  
-        
-    if context.config("education_location_source") != 'bpe': # either weighted or addresses
+    required_communes = set(df_zones["commune_id"].unique())
+
+    if (
+        context.config("education_location_source") != "bpe"
+    ):  # either weighted or addresses
         for prefix, weight in EDUCATION_WEIGHT_MAP:
-            df_locations.loc[df_locations["education_type"]==prefix, "weight"] = (
+            df_locations.loc[df_locations["education_type"] == prefix, "weight"] = (
                 weight
-            )  
-    if context.config("education_location_source") != 'bpe' :
+            )
+    if context.config("education_location_source") != "bpe":
 
-         
         # Add education destinations in function of level education
         for c in ["C1", "C2", "C3"]:
-            missing_communes = required_communes - set(df_locations[df_locations["education_type"].str.startswith(c)]["commune_id"].unique())
+            missing_communes = required_communes - set(
+                df_locations[df_locations["education_type"].str.startswith(c)][
+                    "commune_id"
+                ].unique()
+            )
 
             if len(missing_communes) > 0:
-                df_locations = pd.concat([df_locations,fake_education(missing_communes, c, df_locations, df_zones)])
-        
+                df_locations = pd.concat(
+                    [
+                        df_locations,
+                        fake_education(missing_communes, c, df_locations, df_zones),
+                    ]
+                )
+
         # Add education destinations for last level education
-        missing_communes = required_communes - set(df_locations[~(df_locations["education_type"].str.startswith(("C1", "C2", "C3")))]["commune_id"].unique())
+        missing_communes = required_communes - set(
+            df_locations[
+                ~(df_locations["education_type"].str.startswith(("C1", "C2", "C3")))
+            ]["commune_id"].unique()
+        )
 
         if len(missing_communes) > 0:
 
-           df_locations = pd.concat([df_locations,fake_education(missing_communes, "C4", df_locations, df_zones)])
-    else :
+            df_locations = pd.concat(
+                [
+                    df_locations,
+                    fake_education(missing_communes, "C4", df_locations, df_zones),
+                ]
+            )
+    else:
 
         missing_communes = required_communes - set(df_locations["commune_id"].unique())
         if len(missing_communes) > 0:
 
-            df_locations = pd.concat([df_locations,fake_education(missing_communes, "C0", df_locations, df_zones)])
-    df_locations["education_type"] = df_locations["education_type"].str[:2].astype("category")
+            df_locations = pd.concat(
+                [
+                    df_locations,
+                    fake_education(missing_communes, "C0", df_locations, df_zones),
+                ]
+            )
+    df_locations["education_type"] = (
+        df_locations["education_type"].str[:2].astype("category")
+    )
 
     # Define identifiers
-    df_locations["location_id"]= np.arange(len(df_locations))
+    df_locations["location_id"] = np.arange(len(df_locations))
     df_locations["location_id"] = "edu_" + df_locations["location_id"].astype(str)
-    
-    return df_locations[["location_id","education_type", "commune_id","weight","fake", "geometry"]]
+
+    return df_locations[
+        ["location_id", "education_type", "commune_id", "weight", "fake", "geometry"]
+    ]
diff --git a/synthesis/locations/home/addresses.py b/synthesis/locations/home/addresses.py
index 01410a13..34a29455 100644
--- a/synthesis/locations/home/addresses.py
+++ b/synthesis/locations/home/addresses.py
@@ -18,57 +18,65 @@
 If no adresses matches a building, its centroid is taken as the unique address.
 """
 
+
 def configure(context):
     context.stage("data.bdtopo.raw")
-    
+
     context.config("home_address_buffer", 5.0)
 
     context.config("home_location_weight", "housing")
     if context.config("home_location_source", "addresses") == "addresses":
         context.stage("data.ban.raw")
 
+
 def execute(context):
     # Load buildings
     df_buildings = context.stage("data.bdtopo.raw")
-    print("Number of buildings:", + len(df_buildings))
+    print("Number of buildings:", +len(df_buildings))
 
     if context.config("home_location_source") == "buildings":
-        df_addresses = pd.DataFrame({
-            "building_id": [], "housing": [], "geometry": []
-        })
+        df_addresses = pd.DataFrame({"building_id": [], "housing": [], "geometry": []})
 
-    else: # addresses
+    else:  # addresses
         # Load addresses
         df_addresses = context.stage("data.ban.raw")[["geometry"]].copy()
-        print("Number of addresses:", + len(df_addresses))
+        print("Number of addresses:", +len(df_addresses))
 
         # Buffer buildings to capture adresses in their vicinity
         df_buffer = df_buildings[["building_id", "housing", "geometry"]].copy()
         df_buffer["geometry"] = df_buffer.buffer(context.config("home_address_buffer"))
 
         # Find close-by addresses
-        df_addresses = gpd.sjoin(df_addresses, df_buffer, predicate = "within")[[
-            "building_id", "housing", "geometry"]]
-    
+        df_addresses = gpd.sjoin(df_addresses, df_buffer, predicate="within")[
+            ["building_id", "housing", "geometry"]
+        ]
+
     # Create missing addresses by using centroids
-    df_missing = df_buildings[~df_buildings["building_id"].isin(df_addresses["building_id"])].copy()
+    df_missing = df_buildings[
+        ~df_buildings["building_id"].isin(df_addresses["building_id"])
+    ].copy()
     df_missing["geometry"] = df_missing["geometry"].centroid
     df_missing = df_missing[["building_id", "housing", "geometry"]]
 
     # Put together matched and missing addresses
     df_addresses = pd.concat([df_addresses, df_missing])
-    df_addresses = gpd.GeoDataFrame(df_addresses, crs = df_buildings.crs).rename(columns={"building_id":"home_location_id"})
+    df_addresses = gpd.GeoDataFrame(df_addresses, crs=df_buildings.crs).rename(
+        columns={"building_id": "home_location_id"}
+    )
 
     # Obtain weights for all addresses
     if context.config("home_location_weight") == "housing":
-        df_count = df_addresses.groupby("home_location_id").size().reset_index(name = "count")
-        df_addresses = pd.merge(df_addresses, df_count, on = "home_location_id")
+        df_count = (
+            df_addresses.groupby("home_location_id").size().reset_index(name="count")
+        )
+        df_addresses = pd.merge(df_addresses, df_count, on="home_location_id")
         df_addresses["weight"] = df_addresses["housing"] / df_addresses["count"]
     else:
         df_addresses["weight"] = 1.0
-    
+
     return df_addresses[["home_location_id", "weight", "geometry"]]
 
+
 def validate(context):
-    assert context.config("home_location_source") in ("addresses", "buildings","tiles")
+    assert context.config("home_location_source") in ("addresses", "buildings", "tiles")
     assert context.config("home_location_weight") in ("uniform", "housing")
diff --git a/synthesis/locations/home/locations.py b/synthesis/locations/home/locations.py
index 391748ec..40b012d2 100644
--- a/synthesis/locations/home/locations.py
+++ b/synthesis/locations/home/locations.py
@@ -7,27 +7,30 @@
 home activities.
 """
 
+
 def configure(context):
     context.stage("data.spatial.iris")
     if context.config("home_location_source", "addresses") == "tiles":
-        context.stage("data.tiles.raw", alias = "location_source")
+        context.stage("data.tiles.raw", alias="location_source")
     else:
-        context.stage("synthesis.locations.home.addresses", alias = "location_source")
+        context.stage("synthesis.locations.home.addresses", alias="location_source")
+
 
 def execute(context):
     # Find required IRIS
     df_iris = context.stage("data.spatial.iris")
     required_iris = set(df_iris["iris_id"].unique())
-    
+
     # Load all addresses and add IRIS information
     df_addresses = context.stage("location_source")
 
     print("Imputing IRIS into addresses ...")
-   
-    df_addresses = gpd.sjoin(df_addresses,
-        df_iris[["iris_id", "commune_id", "geometry"]], predicate = "within")
+
+    df_addresses = gpd.sjoin(
+        df_addresses, df_iris[["iris_id", "commune_id", "geometry"]], predicate="within"
+    )
     del df_addresses["index_right"]
-    
+
     df_addresses.loc[df_addresses["iris_id"].isna(), "iris_id"] = "unknown"
     df_addresses["iris_id"] = df_addresses["iris_id"].astype("category")
 
@@ -37,21 +40,30 @@ def execute(context):
     missing_iris = required_iris - set(df_addresses["iris_id"].unique())
 
     if len(missing_iris) > 0:
-        print("Adding homes at the centroid of %d/%d IRIS without BDTOPO observations" % (
-            len(missing_iris), len(required_iris)))
+        print(
+            "Adding homes at the centroid of %d/%d IRIS without BDTOPO observations"
+            % (len(missing_iris), len(required_iris))
+        )
 
         df_added = []
         for iris_id in sorted(missing_iris):
-            centroid = df_iris[df_iris["iris_id"] == iris_id]["geometry"].centroid.iloc[0]
+            centroid = df_iris[df_iris["iris_id"] == iris_id]["geometry"].centroid.iloc[
+                0
+            ]
 
-            df_added.append({
-                "iris_id": iris_id, "geometry": centroid,
-                "commune_id": iris_id[:5],
-                "weight" : 1,
-                "home_location_id": -1
-            })
+            df_added.append(
+                {
+                    "iris_id": iris_id,
+                    "geometry": centroid,
+                    "commune_id": iris_id[:5],
+                    "weight": 1,
+                    "home_location_id": -1,
+                }
+            )
 
-        df_added = gpd.GeoDataFrame(pd.DataFrame.from_records(df_added), crs = df_addresses.crs)
+        df_added = gpd.GeoDataFrame(
+            pd.DataFrame.from_records(df_added), crs=df_addresses.crs
+        )
         df_added["fake"] = True
 
         df_addresses = pd.concat([df_addresses, df_added])
diff --git a/synthesis/locations/home/output.py b/synthesis/locations/home/output.py
index 54c33ec6..926170bd 100644
--- a/synthesis/locations/home/output.py
+++ b/synthesis/locations/home/output.py
@@ -1,5 +1,6 @@
 import geopandas as gpd
 
+
 def configure(context):
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
@@ -7,19 +8,26 @@ def configure(context):
     context.stage("data.bdtopo.raw")
     context.stage("synthesis.locations.home.locations")
 
+
 def execute(context):
     # Load data
-    df_buildings = context.stage("data.bdtopo.raw")[[
-        "building_id", "housing", "geometry"]]
-    
-    df_locations = context.stage("synthesis.locations.home.locations")[[
-        "location_id", "weight", "building_id", "geometry"]]
+    df_buildings = context.stage("data.bdtopo.raw")[
+        ["building_id", "housing", "geometry"]
+    ]
+
+    df_locations = context.stage("synthesis.locations.home.locations")[
+        ["location_id", "weight", "building_id", "geometry"]
+    ]
 
     # Write into same file with multiple layers
-    df_buildings.to_file("%s/%shousing.gpkg" % (
-        context.config("output_path"), context.config("output_prefix")
-    ), layer = "buildings")
+    df_buildings.to_file(
+        "%s/%shousing.gpkg"
+        % (context.config("output_path"), context.config("output_prefix")),
+        layer="buildings",
+    )
 
-    df_locations.to_file("%s/%shousing.gpkg" % (
-        context.config("output_path"), context.config("output_prefix")
-    ), layer = "addresses")
+    df_locations.to_file(
+        "%s/%shousing.gpkg"
+        % (context.config("output_path"), context.config("output_prefix")),
+        layer="addresses",
+    )
diff --git a/synthesis/locations/secondary.py b/synthesis/locations/secondary.py
index c5446359..40bef2c1 100644
--- a/synthesis/locations/secondary.py
+++ b/synthesis/locations/secondary.py
@@ -3,20 +3,24 @@
 import pandas as pd
 import geopandas as gpd
 
+
 def configure(context):
     context.stage("data.bpe.cleaned")
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    df_locations = context.stage("data.bpe.cleaned")[[
-        "enterprise_id", "activity_type", "commune_id", "geometry"
-    ]].copy()
+    df_locations = context.stage("data.bpe.cleaned")[
+        ["enterprise_id", "activity_type", "commune_id", "geometry"]
+    ].copy()
     df_locations["destination_id"] = np.arange(len(df_locations))
 
     # Attach attributes for activity types
     df_locations["offers_leisure"] = df_locations["activity_type"] == "leisure"
     df_locations["offers_shop"] = df_locations["activity_type"] == "shop"
-    df_locations["offers_other"] = ~(df_locations["offers_leisure"] | df_locations["offers_shop"])
+    df_locations["offers_other"] = ~(
+        df_locations["offers_leisure"] | df_locations["offers_shop"]
+    )
 
     # Define new IDs
     df_locations["location_id"] = np.arange(len(df_locations))
diff --git a/synthesis/locations/work.py b/synthesis/locations/work.py
index 0fc9bcee..c4178244 100644
--- a/synthesis/locations/work.py
+++ b/synthesis/locations/work.py
@@ -11,14 +11,16 @@
 place at their centroid to be in line with INSEE OD data.
 """
 
+
 def configure(context):
     context.stage("data.sirene.localized")
     context.stage("data.spatial.municipalities")
 
+
 def execute(context):
-    df_workplaces = context.stage("data.sirene.localized")[[
-        "commune_id", "minimum_employees", "maximum_employees", "geometry"
-    ]].copy()
+    df_workplaces = context.stage("data.sirene.localized")[
+        ["commune_id", "minimum_employees", "maximum_employees", "geometry"]
+    ].copy()
 
     # Use minimum number of employees as weight
     df_workplaces["employees"] = df_workplaces["minimum_employees"]
@@ -30,19 +32,29 @@ def execute(context):
     missing_communes = required_communes - set(df_workplaces["commune_id"].unique())
 
     if len(missing_communes) > 0:
-        print("Adding work places at the centroid of %d/%d communes without SIRENE observations" % (
-            len(missing_communes), len(required_communes)))
+        print(
+            "Adding work places at the centroid of %d/%d communes without SIRENE observations"
+            % (len(missing_communes), len(required_communes))
+        )
 
         df_added = []
 
         for commune_id in missing_communes:
-            centroid = df_zones[df_zones["commune_id"] == commune_id]["geometry"].centroid.iloc[0]
+            centroid = df_zones[df_zones["commune_id"] == commune_id][
+                "geometry"
+            ].centroid.iloc[0]
 
-            df_added.append({
-                "commune_id": commune_id, "employees": 1.0, "geometry": centroid,
-            })
+            df_added.append(
+                {
+                    "commune_id": commune_id,
+                    "employees": 1.0,
+                    "geometry": centroid,
+                }
+            )
 
-        df_added = gpd.GeoDataFrame(pd.DataFrame.from_records(df_added), crs = df_workplaces.crs)
+        df_added = gpd.GeoDataFrame(
+            pd.DataFrame.from_records(df_added), crs=df_workplaces.crs
+        )
         df_added["fake"] = True
 
         df_workplaces = pd.concat([df_workplaces, df_added])
diff --git a/synthesis/output.py b/synthesis/output.py
index 84c52a36..eeea93fc 100644
--- a/synthesis/output.py
+++ b/synthesis/output.py
@@ -7,6 +7,7 @@
 import math
 import numpy as np
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
 
@@ -22,7 +23,7 @@ def configure(context):
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
     context.config("output_formats", ["csv", "gpkg"])
-    
+
     if context.config("mode_choice", False):
         context.stage("matsim.simulation.prepare")
 
@@ -33,8 +34,9 @@ def validate(context):
     if not os.path.isdir(output_path):
         raise RuntimeError("Output directory must exist: %s" % output_path)
 
+
 def clean_gpkg(path):
-    '''
+    """
     Make GPKG files time and OS independent.
 
     In GeoPackage metadata:
@@ -42,21 +44,28 @@ def clean_gpkg(path):
     - round coordinates.
 
     This allow for comparison of output digests between runs and between OS.
-    '''
+    """
     conn = sqlite3.connect(path)
     cur = conn.cursor()
     for table_name, min_x, min_y, max_x, max_y in cur.execute(
         "SELECT table_name, min_x, min_y, max_x, max_y FROM gpkg_contents"
     ):
         cur.execute(
-            "UPDATE gpkg_contents " +
-            "SET last_change='2000-01-01T00:00:00Z', min_x=?, min_y=?, max_x=?, max_y=? " +
-            "WHERE table_name=?",
-            (math.floor(min_x), math.floor(min_y), math.ceil(max_x), math.ceil(max_y), table_name)
+            "UPDATE gpkg_contents "
+            + "SET last_change='2000-01-01T00:00:00Z', min_x=?, min_y=?, max_x=?, max_y=? "
+            + "WHERE table_name=?",
+            (
+                math.floor(min_x),
+                math.floor(min_y),
+                math.ceil(max_x),
+                math.ceil(max_y),
+                table_name,
+            ),
         )
     conn.commit()
     conn.close()
 
+
 def execute(context):
     output_path = context.config("output_path")
     output_prefix = context.config("output_prefix")
@@ -64,121 +73,237 @@ def execute(context):
 
     # Prepare persons
     df_persons = context.stage("synthesis.population.enriched").rename(
-        columns = { "has_license": "has_driving_license" }
+        columns={"has_license": "has_driving_license"}
     )
 
-    df_persons = df_persons[[
-        "person_id", "household_id",
-        "age", "employed", "sex", "socioprofessional_class",
-        "has_driving_license", "has_pt_subscription",
-        "census_person_id", "hts_id"
-    ]]
+    df_persons = df_persons[
+        [
+            "person_id",
+            "household_id",
+            "age",
+            "employed",
+            "sex",
+            "socioprofessional_class",
+            "has_driving_license",
+            "has_pt_subscription",
+            "census_person_id",
+            "hts_id",
+        ]
+    ]
     if "csv" in output_formats:
-        df_persons.to_csv("%s/%spersons.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+        df_persons.to_csv(
+            "%s/%spersons.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
     if "parquet" in output_formats:
         df_persons.to_parquet("%s/%spersons.parquet" % (output_path, output_prefix))
 
     # Prepare activities
     df_activities = context.stage("synthesis.population.activities").rename(
-        columns = { "trip_index": "following_trip_index" }
+        columns={"trip_index": "following_trip_index"}
     )
 
     df_activities = pd.merge(
-        df_activities, df_persons[["person_id", "household_id"]], on = "person_id")
+        df_activities, df_persons[["person_id", "household_id"]], on="person_id"
+    )
 
-    df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(1)
+    df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(
+        1
+    )
     df_activities.loc[df_activities["is_first"], "preceding_trip_index"] = -1
-    df_activities["preceding_trip_index"] = df_activities["preceding_trip_index"].astype(int)
+    df_activities["preceding_trip_index"] = df_activities[
+        "preceding_trip_index"
+    ].astype(int)
     # Prepare spatial data sets
-    df_locations = context.stage("synthesis.population.spatial.locations")[[
-        "person_id",  "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
-    ]]
+    df_locations = context.stage("synthesis.population.spatial.locations")[
+        [
+            "person_id",
+            "iris_id",
+            "commune_id",
+            "departement_id",
+            "region_id",
+            "activity_index",
+            "geometry",
+        ]
+    ]
 
-    df_activities = pd.merge(df_activities, df_locations[[
-        "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
-    ]], how = "left", on = ["person_id", "activity_index"])
+    df_activities = pd.merge(
+        df_activities,
+        df_locations[
+            [
+                "person_id",
+                "iris_id",
+                "commune_id",
+                "departement_id",
+                "region_id",
+                "activity_index",
+                "geometry",
+            ]
+        ],
+        how="left",
+        on=["person_id", "activity_index"],
+    )
 
     # Prepare spatial activities
-    df_spatial = gpd.GeoDataFrame(df_activities[[
-            "person_id", "household_id", "activity_index",
-            "iris_id", "commune_id","departement_id","region_id",
-            "preceding_trip_index", "following_trip_index",
-            "purpose", "start_time", "end_time",
-            "is_first", "is_last", "geometry"
-        ]], crs = df_locations.crs)
-    df_spatial = df_spatial.astype({'purpose': 'str', "departement_id": 'str'})
+    df_spatial = gpd.GeoDataFrame(
+        df_activities[
+            [
+                "person_id",
+                "household_id",
+                "activity_index",
+                "iris_id",
+                "commune_id",
+                "departement_id",
+                "region_id",
+                "preceding_trip_index",
+                "following_trip_index",
+                "purpose",
+                "start_time",
+                "end_time",
+                "is_first",
+                "is_last",
+                "geometry",
+            ]
+        ],
+        crs=df_locations.crs,
+    )
+    df_spatial = df_spatial.astype({"purpose": "str", "departement_id": "str"})
 
     # Write activities
-    df_activities = df_activities[[
-        "person_id", "household_id", "activity_index",
-        "iris_id", "commune_id","departement_id","region_id",
-        "preceding_trip_index", "following_trip_index",
-        "purpose", "start_time", "end_time",
-        "is_first", "is_last"
-    ]]
+    df_activities = df_activities[
+        [
+            "person_id",
+            "household_id",
+            "activity_index",
+            "iris_id",
+            "commune_id",
+            "departement_id",
+            "region_id",
+            "preceding_trip_index",
+            "following_trip_index",
+            "purpose",
+            "start_time",
+            "end_time",
+            "is_first",
+            "is_last",
+        ]
+    ]
 
     if "csv" in output_formats:
-        df_activities.to_csv("%s/%sactivities.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+        df_activities.to_csv(
+            "%s/%sactivities.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
     if "parquet" in output_formats:
-        df_activities.to_parquet("%s/%sactivities.parquet" % (output_path, output_prefix))
+        df_activities.to_parquet(
+            "%s/%sactivities.parquet" % (output_path, output_prefix)
+        )
 
     # Prepare households
-    df_households = context.stage("synthesis.population.enriched").rename(
-        columns = { "household_income": "income" }
-    ).drop_duplicates("household_id")
-
-    df_households = pd.merge(df_households,df_activities[df_activities["purpose"] == "home"][["household_id",
-        "iris_id", "commune_id","departement_id","region_id"]].drop_duplicates("household_id"),how="left")
-    df_households = df_households[[
-        "household_id","iris_id", "commune_id", "departement_id","region_id",
-        "car_availability", "bike_availability",
-        "number_of_vehicles", "number_of_bikes",
-        "income",
-        "census_household_id"
-    ]]
+    df_households = (
+        context.stage("synthesis.population.enriched")
+        .rename(columns={"household_income": "income"})
+        .drop_duplicates("household_id")
+    )
+
+    df_households = pd.merge(
+        df_households,
+        df_activities[df_activities["purpose"] == "home"][
+            ["household_id", "iris_id", "commune_id", "departement_id", "region_id"]
+        ].drop_duplicates("household_id"),
+        how="left",
+    )
+    df_households = df_households[
+        [
+            "household_id",
+            "iris_id",
+            "commune_id",
+            "departement_id",
+            "region_id",
+            "car_availability",
+            "bike_availability",
+            "number_of_vehicles",
+            "number_of_bikes",
+            "income",
+            "census_household_id",
+        ]
+    ]
     if "csv" in output_formats:
-        df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+        df_households.to_csv(
+            "%s/%shouseholds.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
     if "parquet" in output_formats:
-        df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix))
+        df_households.to_parquet(
+            "%s/%shouseholds.parquet" % (output_path, output_prefix)
+        )
 
     # Prepare trips
     df_trips = context.stage("synthesis.population.trips").rename(
-        columns = {
-            "is_first_trip": "is_first",
-            "is_last_trip": "is_last"
-        }
+        columns={"is_first_trip": "is_first", "is_last_trip": "is_last"}
     )
 
     df_trips["preceding_activity_index"] = df_trips["trip_index"]
     df_trips["following_activity_index"] = df_trips["trip_index"] + 1
 
-    df_trips = df_trips[[
-        "person_id", "trip_index",
-        "preceding_activity_index", "following_activity_index",
-        "departure_time", "arrival_time",
-        "preceding_purpose", "following_purpose",
-        "is_first", "is_last"
-    ]]
+    df_trips = df_trips[
+        [
+            "person_id",
+            "trip_index",
+            "preceding_activity_index",
+            "following_activity_index",
+            "departure_time",
+            "arrival_time",
+            "preceding_purpose",
+            "following_purpose",
+            "is_first",
+            "is_last",
+        ]
+    ]
 
     if context.config("mode_choice"):
         df_mode_choice = pd.read_csv(
-            "{}/mode_choice/output_trips.csv".format(context.path("matsim.simulation.prepare"), output_prefix),
-            delimiter = ";")
+            "{}/mode_choice/output_trips.csv".format(
+                context.path("matsim.simulation.prepare"), output_prefix
+            ),
+            delimiter=";",
+        )
 
         df_mode_choice = df_mode_choice.rename(columns={"person_trip_id": "trip_index"})
         columns_to_keep = ["person_id", "trip_index"]
-        columns_to_keep.extend([c for c in df_trips.columns if c not in df_mode_choice.columns])
+        columns_to_keep.extend(
+            [c for c in df_trips.columns if c not in df_mode_choice.columns]
+        )
         df_trips = df_trips[columns_to_keep]
-        df_trips = pd.merge(df_trips, df_mode_choice, on = [
-            "person_id", "trip_index"], how="left", validate = "one_to_one")
+        df_trips = pd.merge(
+            df_trips,
+            df_mode_choice,
+            on=["person_id", "trip_index"],
+            how="left",
+            validate="one_to_one",
+        )
 
-        shutil.copy("%s/mode_choice/output_pt_legs.csv" % (context.path("matsim.simulation.prepare")),
-                    "%s/%spt_legs.csv" % (output_path, output_prefix))
+        shutil.copy(
+            "%s/mode_choice/output_pt_legs.csv"
+            % (context.path("matsim.simulation.prepare")),
+            "%s/%spt_legs.csv" % (output_path, output_prefix),
+        )
 
-        assert not np.any(df_trips["mode"].isna())                                 
+        assert not np.any(df_trips["mode"].isna())
 
     if "csv" in output_formats:
-        df_trips.to_csv("%s/%strips.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+        df_trips.to_csv(
+            "%s/%strips.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
     if "parquet" in output_formats:
         df_trips.to_csv("%s/%strips.parquet" % (output_path, output_prefix))
 
@@ -186,30 +311,48 @@ def execute(context):
     df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles")
 
     if "csv" in output_formats:
-        df_vehicle_types.to_csv("%s/%svehicle_types.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
-        df_vehicles.to_csv("%s/%svehicles.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+        df_vehicle_types.to_csv(
+            "%s/%svehicle_types.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
+        df_vehicles.to_csv(
+            "%s/%svehicles.csv" % (output_path, output_prefix),
+            sep=";",
+            index=None,
+            lineterminator="\n",
+        )
     if "parquet" in output_formats:
-        df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix))
+        df_vehicle_types.to_parquet(
+            "%s/%svehicle_types.parquet" % (output_path, output_prefix)
+        )
         df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix))
 
-
     if "gpkg" in output_formats:
         path = "%s/%sactivities.gpkg" % (output_path, output_prefix)
-        df_spatial.to_file(path, driver = "GPKG")
+        df_spatial.to_file(path, driver="GPKG")
         clean_gpkg(path)
     if "geoparquet" in output_formats:
         path = "%s/%sactivities.geoparquet" % (output_path, output_prefix)
         df_spatial.to_parquet(path)
 
     # Write spatial homes
-    df_spatial_homes = df_spatial[
-        df_spatial["purpose"] == "home"
-    ].drop_duplicates("household_id")[[
-        "household_id","iris_id", "commune_id","departement_id","region_id", "geometry"
-    ]]
+    df_spatial_homes = df_spatial[df_spatial["purpose"] == "home"].drop_duplicates(
+        "household_id"
+    )[
+        [
+            "household_id",
+            "iris_id",
+            "commune_id",
+            "departement_id",
+            "region_id",
+            "geometry",
+        ]
+    ]
     if "gpkg" in output_formats:
         path = "%s/%shomes.gpkg" % (output_path, output_prefix)
-        df_spatial_homes.to_file(path, driver = "GPKG")
+        df_spatial_homes.to_file(path, driver="GPKG")
         clean_gpkg(path)
     if "geoparquet" in output_formats:
         path = "%s/%shomes.geoparquet" % (output_path, output_prefix)
@@ -217,8 +360,12 @@ def execute(context):
 
     # Write spatial commutes
     df_spatial = pd.merge(
-        df_spatial[df_spatial["purpose"] == "home"].drop_duplicates("person_id")[["person_id", "geometry"]].rename(columns = { "geometry": "home_geometry" }),
-        df_spatial[df_spatial["purpose"] == "work"].drop_duplicates("person_id")[["person_id", "geometry"]].rename(columns = { "geometry": "work_geometry" })
+        df_spatial[df_spatial["purpose"] == "home"]
+        .drop_duplicates("person_id")[["person_id", "geometry"]]
+        .rename(columns={"geometry": "home_geometry"}),
+        df_spatial[df_spatial["purpose"] == "work"]
+        .drop_duplicates("person_id")[["person_id", "geometry"]]
+        .rename(columns={"geometry": "work_geometry"}),
     )
 
     df_spatial["geometry"] = [
@@ -226,38 +373,50 @@ def execute(context):
         for od in zip(df_spatial["home_geometry"], df_spatial["work_geometry"])
     ]
 
-    df_spatial = df_spatial.drop(columns = ["home_geometry", "work_geometry"])
+    df_spatial = df_spatial.drop(columns=["home_geometry", "work_geometry"])
     if "gpkg" in output_formats:
         path = "%s/%scommutes.gpkg" % (output_path, output_prefix)
-        df_spatial.to_file(path, driver = "GPKG")
+        df_spatial.to_file(path, driver="GPKG")
         clean_gpkg(path)
     if "geoparquet" in output_formats:
         path = "%s/%scommutes.geoparquet" % (output_path, output_prefix)
         df_spatial.to_parquet(path)
 
     # Write spatial trips
-    df_spatial = pd.merge(df_trips, df_locations[[
-        "person_id", "activity_index", "geometry"
-    ]].rename(columns = {
-        "activity_index": "preceding_activity_index",
-        "geometry": "preceding_geometry"
-    }), how = "left", on = ["person_id", "preceding_activity_index"])
-
-    df_spatial = pd.merge(df_spatial, df_locations[[
-        "person_id", "activity_index", "geometry"
-    ]].rename(columns = {
-        "activity_index": "following_activity_index",
-        "geometry": "following_geometry"
-    }), how = "left", on = ["person_id", "following_activity_index"])
+    df_spatial = pd.merge(
+        df_trips,
+        df_locations[["person_id", "activity_index", "geometry"]].rename(
+            columns={
+                "activity_index": "preceding_activity_index",
+                "geometry": "preceding_geometry",
+            }
+        ),
+        how="left",
+        on=["person_id", "preceding_activity_index"],
+    )
+
+    df_spatial = pd.merge(
+        df_spatial,
+        df_locations[["person_id", "activity_index", "geometry"]].rename(
+            columns={
+                "activity_index": "following_activity_index",
+                "geometry": "following_geometry",
+            }
+        ),
+        how="left",
+        on=["person_id", "following_activity_index"],
+    )
 
     df_spatial["geometry"] = [
         geo.LineString(od)
-        for od in zip(df_spatial["preceding_geometry"], df_spatial["following_geometry"])
+        for od in zip(
+            df_spatial["preceding_geometry"], df_spatial["following_geometry"]
+        )
     ]
 
-    df_spatial = df_spatial.drop(columns = ["preceding_geometry", "following_geometry"])
+    df_spatial = df_spatial.drop(columns=["preceding_geometry", "following_geometry"])
 
-    df_spatial = gpd.GeoDataFrame(df_spatial, crs = df_locations.crs)
+    df_spatial = gpd.GeoDataFrame(df_spatial, crs=df_locations.crs)
     df_spatial["following_purpose"] = df_spatial["following_purpose"].astype(str)
     df_spatial["preceding_purpose"] = df_spatial["preceding_purpose"].astype(str)
 
@@ -266,7 +425,7 @@ def execute(context):
 
     if "gpkg" in output_formats:
         path = "%s/%strips.gpkg" % (output_path, output_prefix)
-        df_spatial.to_file(path, driver = "GPKG")
+        df_spatial.to_file(path, driver="GPKG")
         clean_gpkg(path)
     if "geoparquet" in output_formats:
         path = "%s/%strips.geoparquet" % (output_path, output_prefix)
diff --git a/synthesis/population/activities.py b/synthesis/population/activities.py
index 27d3367a..e0c9590f 100644
--- a/synthesis/population/activities.py
+++ b/synthesis/population/activities.py
@@ -7,15 +7,22 @@
 Transforms the synthetic trip table into a synthetic activity table.
 """
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
     context.stage("synthesis.population.trips")
 
+
 def execute(context):
     df_activities = context.stage("synthesis.population.trips")
 
     # Add trip count
-    counts = df_activities.groupby("person_id").size().reset_index(name = "trip_count")["trip_count"].values
+    counts = (
+        df_activities.groupby("person_id")
+        .size()
+        .reset_index(name="trip_count")["trip_count"]
+        .values
+    )
     df_activities["trip_count"] = np.hstack([[count] * count for count in counts])
 
     # Shift times and types of trips to arrive at activities
@@ -43,14 +50,40 @@ def execute(context):
     df_last["activity_index"] = df_last["trip_count"]
     df_last["trip_index"] = -1
 
-    df_activities = pd.concat([
-        df_activities[["person_id", "activity_index", "trip_index", "purpose", "start_time", "end_time", "is_first", "is_last"]],
-        df_last[["person_id", "activity_index", "trip_index", "purpose", "start_time", "end_time", "is_first", "is_last"]]
-    ]).sort_values(by = ["person_id", "activity_index"])
+    df_activities = pd.concat(
+        [
+            df_activities[
+                [
+                    "person_id",
+                    "activity_index",
+                    "trip_index",
+                    "purpose",
+                    "start_time",
+                    "end_time",
+                    "is_first",
+                    "is_last",
+                ]
+            ],
+            df_last[
+                [
+                    "person_id",
+                    "activity_index",
+                    "trip_index",
+                    "purpose",
+                    "start_time",
+                    "end_time",
+                    "is_first",
+                    "is_last",
+                ]
+            ],
+        ]
+    ).sort_values(by=["person_id", "activity_index"])
 
     # Add activities for people without trips
     df_missing = context.stage("synthesis.population.enriched")
-    df_missing = df_missing[~df_missing["person_id"].isin(df_activities["person_id"])][["person_id"]]
+    df_missing = df_missing[~df_missing["person_id"].isin(df_activities["person_id"])][
+        ["person_id"]
+    ]
 
     df_missing["activity_index"] = 0
     df_missing["trip_index"] = -1
diff --git a/synthesis/population/enriched.py b/synthesis/population/enriched.py
index 15fc5649..22d83427 100644
--- a/synthesis/population/enriched.py
+++ b/synthesis/population/enriched.py
@@ -13,27 +13,38 @@
 This stage fuses census data with HTS data.
 """
 
+
 def configure(context):
     context.stage("synthesis.population.matched")
     context.stage("synthesis.population.sampled")
     context.stage("synthesis.population.income.selected")
 
     hts = context.config("hts")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def execute(context):
     # Select population columns
-    df_population = context.stage("synthesis.population.sampled")[[
-        "person_id", "household_id",
-        "census_person_id", "census_household_id",
-        "age", "sex", "employed", "studies",
-        "number_of_vehicles", "household_size", "consumption_units",
-        "socioprofessional_class"
-    ]]
+    df_population = context.stage("synthesis.population.sampled")[
+        [
+            "person_id",
+            "household_id",
+            "census_person_id",
+            "census_household_id",
+            "age",
+            "sex",
+            "employed",
+            "studies",
+            "number_of_vehicles",
+            "household_size",
+            "consumption_units",
+            "socioprofessional_class",
+        ]
+    ]
 
     # Attach matching information
     df_matching = context.stage("synthesis.population.matched")
-    df_population = pd.merge(df_population, df_matching, on = "person_id")
+    df_population = pd.merge(df_population, df_matching, on="person_id")
 
     initial_size = len(df_population)
     initial_person_ids = len(df_population["person_id"].unique())
@@ -41,22 +52,40 @@ def execute(context):
 
     # Attach person and household attributes from HTS
     df_hts_households, df_hts_persons, _ = context.stage("hts")
-    df_hts_persons = df_hts_persons.rename(columns = { "person_id": "hts_id", "household_id": "hts_household_id" })
-    df_hts_households = df_hts_households.rename(columns = { "household_id": "hts_household_id" })
-
-    df_population = pd.merge(df_population, df_hts_persons[[
-        "hts_id", "hts_household_id", "has_license", "has_pt_subscription", "is_passenger"
-    ]], on = "hts_id")
-
-    df_population = pd.merge(df_population, df_hts_households[[
-        "hts_household_id", "number_of_bikes"
-    ]], on = "hts_household_id")
+    df_hts_persons = df_hts_persons.rename(
+        columns={"person_id": "hts_id", "household_id": "hts_household_id"}
+    )
+    df_hts_households = df_hts_households.rename(
+        columns={"household_id": "hts_household_id"}
+    )
+
+    df_population = pd.merge(
+        df_population,
+        df_hts_persons[
+            [
+                "hts_id",
+                "hts_household_id",
+                "has_license",
+                "has_pt_subscription",
+                "is_passenger",
+            ]
+        ],
+        on="hts_id",
+    )
+
+    df_population = pd.merge(
+        df_population,
+        df_hts_households[["hts_household_id", "number_of_bikes"]],
+        on="hts_household_id",
+    )
 
     # Attach income
     df_income = context.stage("synthesis.population.income.selected")
-    df_population = pd.merge(df_population, df_income[[
-        "household_id", "household_income"
-    ]], on = "household_id")
+    df_population = pd.merge(
+        df_population,
+        df_income[["household_id", "household_income"]],
+        on="household_id",
+    )
 
     # Check consistency
     final_size = len(df_population)
@@ -68,28 +97,55 @@ def execute(context):
     assert initial_household_ids == final_household_ids
 
     # Add car availability
-    df_number_of_cars = df_population[["household_id", "number_of_vehicles"]].drop_duplicates("household_id")
-    df_number_of_licenses = df_population[["household_id", "has_license"]].groupby("household_id").sum().reset_index().rename(columns = { "has_license": "number_of_licenses" })
+    df_number_of_cars = df_population[
+        ["household_id", "number_of_vehicles"]
+    ].drop_duplicates("household_id")
+    df_number_of_licenses = (
+        df_population[["household_id", "has_license"]]
+        .groupby("household_id")
+        .sum()
+        .reset_index()
+        .rename(columns={"has_license": "number_of_licenses"})
+    )
     df_car_availability = pd.merge(df_number_of_cars, df_number_of_licenses)
 
     df_car_availability["car_availability"] = "all"
-    df_car_availability.loc[df_car_availability["number_of_vehicles"] < df_car_availability["number_of_licenses"], "car_availability"] = "some"
-    df_car_availability.loc[df_car_availability["number_of_vehicles"] == 0, "car_availability"] = "none"
-    df_car_availability["car_availability"] = df_car_availability["car_availability"].astype("category")
-
-    df_population = pd.merge(df_population, df_car_availability[["household_id", "car_availability"]])
+    df_car_availability.loc[
+        df_car_availability["number_of_vehicles"]
+        < df_car_availability["number_of_licenses"],
+        "car_availability",
+    ] = "some"
+    df_car_availability.loc[
+        df_car_availability["number_of_vehicles"] == 0, "car_availability"
+    ] = "none"
+    df_car_availability["car_availability"] = df_car_availability[
+        "car_availability"
+    ].astype("category")
+
+    df_population = pd.merge(
+        df_population, df_car_availability[["household_id", "car_availability"]]
+    )
 
     # Add bike availability
     df_population["bike_availability"] = "all"
-    df_population.loc[df_population["number_of_bikes"] < df_population["household_size"], "bike_availability"] = "some"
-    df_population.loc[df_population["number_of_bikes"] == 0, "bike_availability"] = "none"
-    df_population["bike_availability"] = df_population["bike_availability"].astype("category")
-    
+    df_population.loc[
+        df_population["number_of_bikes"] < df_population["household_size"],
+        "bike_availability",
+    ] = "some"
+    df_population.loc[df_population["number_of_bikes"] == 0, "bike_availability"] = (
+        "none"
+    )
+    df_population["bike_availability"] = df_population["bike_availability"].astype(
+        "category"
+    )
+
     # Add age range for education
     df_population["age_range"] = "higher_education"
-    df_population.loc[df_population["age"]<=10,"age_range"] = "primary_school"
-    df_population.loc[df_population["age"].between(11,14),"age_range"] = "middle_school"
-    df_population.loc[df_population["age"].between(15,17),"age_range"] = "high_school"
+    df_population.loc[df_population["age"] <= 10, "age_range"] = "primary_school"
+    df_population.loc[df_population["age"].between(11, 14), "age_range"] = (
+        "middle_school"
+    )
+    df_population.loc[df_population["age"].between(15, 17), "age_range"] = "high_school"
     df_population["age_range"] = df_population["age_range"].astype("category")
-    
+
     return df_population
diff --git a/synthesis/population/income/bhepop2.py b/synthesis/population/income/bhepop2.py
index 6aa6b7fb..17f3ae28 100644
--- a/synthesis/population/income/bhepop2.py
+++ b/synthesis/population/income/bhepop2.py
@@ -1,6 +1,9 @@
 import numpy as np
 import pandas as pd
-from synthesis.population.income.utils import income_uniform_sample, MAXIMUM_INCOME_FACTOR
+from synthesis.population.income.utils import (
+    income_uniform_sample,
+    MAXIMUM_INCOME_FACTOR,
+)
 from bhepop2.tools import add_household_size_attribute, add_household_type_attribute
 from bhepop2.sources.marginal_distributions import QuantitativeMarginalDistributions
 from bhepop2.enrichment.bhepop2 import Bhepop2Enrichment
@@ -55,15 +58,17 @@ def _sample_income(context, args):
             "Filosofi",
             attribute_selection=[
                 "size",  # modalities: ["1_pers", "2_pers", "3_pers", "4_pers", "5_pers_or_more"]
-                "family_comp"  # modalities: ["Single_man", "Single_wom", "Couple_without_child", "Couple_with_child", "Single_parent", "complex_hh"]
+                "family_comp",  # modalities: ["Single_man", "Single_wom", "Couple_without_child", "Couple_with_child", "Single_parent", "complex_hh"]
             ],
             abs_minimum=0,
             relative_maximum=MAXIMUM_INCOME_FACTOR,
-            delta_min=1000
+            delta_min=1000,
         )
 
         # create enrichment class
-        enrich_class = Bhepop2Enrichment(df_selected, source, feature_name=INCOME_COLUMN, seed=random_seed)
+        enrich_class = Bhepop2Enrichment(
+            df_selected, source, feature_name=INCOME_COLUMN, seed=random_seed
+        )
 
         # evaluate feature values on the population
         pop = enrich_class.assign_feature_values()
@@ -84,7 +89,12 @@ def _sample_income(context, args):
         # get global distribution of the commune
         distrib_all = distribs[distribs["modality"] == "all"]
         assert len(distrib_all) == 1
-        centiles = list(distrib_all[["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9"]].iloc[0].values / 12)
+        centiles = list(
+            distrib_all[["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9"]]
+            .iloc[0]
+            .values
+            / 12
+        )
 
         incomes = income_uniform_sample(random, centiles, len(df_selected))
 
@@ -102,29 +112,39 @@ def execute(context):
     df_population = add_household_size_attribute(df_population)
     df_population = add_household_type_attribute(df_population)
 
-    df_households = df_population[[
-        "household_id", "consumption_units", "size", "family_comp"
-    ]].drop_duplicates("household_id")
+    df_households = df_population[
+        ["household_id", "consumption_units", "size", "family_comp"]
+    ].drop_duplicates("household_id")
 
-    df_homes = context.stage("synthesis.population.spatial.home.zones")[[
-        "household_id", "commune_id"
-    ]]
+    df_homes = context.stage("synthesis.population.spatial.home.zones")[
+        ["household_id", "commune_id"]
+    ]
 
     df_households = pd.merge(df_households, df_homes)
 
     commune_ids = df_households["commune_id"].unique()
-    random_seeds = random.randint(10000, size = len(commune_ids))
+    random_seeds = random.randint(10000, size=len(commune_ids))
 
     # Perform sampling per commune
-    with context.progress(label = "Imputing income ...", total = len(commune_ids)) as progress:
-        with context.parallel(dict(households = df_households, income = df_income)) as parallel:
-
-            for f, incomes, method in parallel.imap(_sample_income, zip(commune_ids, random_seeds)):
-                df_households.loc[f, "household_income"] = incomes * df_households.loc[f, "consumption_units"]
+    with context.progress(
+        label="Imputing income ...", total=len(commune_ids)
+    ) as progress:
+        with context.parallel(
+            dict(households=df_households, income=df_income)
+        ) as parallel:
+
+            for f, incomes, method in parallel.imap(
+                _sample_income, zip(commune_ids, random_seeds)
+            ):
+                df_households.loc[f, "household_income"] = (
+                    incomes * df_households.loc[f, "consumption_units"]
+                )
                 df_households.loc[f, "method"] = method
 
     # Cleanup
-    df_households = df_households[["household_id", "household_income", "consumption_units"]]
+    df_households = df_households[
+        ["household_id", "household_income", "consumption_units"]
+    ]
     assert len(df_households) == len(df_households["household_id"].unique())
 
     return df_households
diff --git a/synthesis/population/income/selected.py b/synthesis/population/income/selected.py
index 24d9abc5..43395d57 100644
--- a/synthesis/population/income/selected.py
+++ b/synthesis/population/income/selected.py
@@ -1,14 +1,13 @@
-
 def configure(context):
     method = context.config("income_assignation_method", "uniform")
 
     if method == "uniform":
-        context.stage("synthesis.population.income.uniform", alias = "income")
+        context.stage("synthesis.population.income.uniform", alias="income")
     elif method == "bhepop2":
-        context.stage("synthesis.population.income.bhepop2", alias = "income")
+        context.stage("synthesis.population.income.bhepop2", alias="income")
     else:
         raise RuntimeError("Unknown income assignation method : %s" % method)
 
+
 def execute(context):
     return context.stage("income")
-
diff --git a/synthesis/population/income/uniform.py b/synthesis/population/income/uniform.py
index f3fdd758..918f2aaf 100644
--- a/synthesis/population/income/uniform.py
+++ b/synthesis/population/income/uniform.py
@@ -12,6 +12,7 @@
 income distribution and a random income within the selected stratum is chosen.
 """
 
+
 def configure(context):
     context.stage("data.income.municipality")
     context.stage("synthesis.population.sampled")
@@ -29,38 +30,56 @@ def _sample_income(context, args):
     f = df_households["commune_id"] == commune_id
     df_selected = df_households[f]
 
-    centiles = list(df_income[df_income["commune_id"] == commune_id][["q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9"]].iloc[0].values / 12)
+    centiles = list(
+        df_income[df_income["commune_id"] == commune_id][
+            ["q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9"]
+        ]
+        .iloc[0]
+        .values
+        / 12
+    )
 
     incomes = income_uniform_sample(random, centiles, len(df_selected))
 
     return f, incomes
 
+
 def execute(context):
     random = np.random.RandomState(context.config("random_seed"))
 
     # Load data
     df_income = context.stage("data.income.municipality")
-    df_income = df_income[(df_income["attribute"] == "all") & (df_income["value"] == "all")]
+    df_income = df_income[
+        (df_income["attribute"] == "all") & (df_income["value"] == "all")
+    ]
 
-    df_households = context.stage("synthesis.population.sampled")[[
-        "household_id", "consumption_units"
-    ]].drop_duplicates("household_id")
+    df_households = context.stage("synthesis.population.sampled")[
+        ["household_id", "consumption_units"]
+    ].drop_duplicates("household_id")
 
-    df_homes = context.stage("synthesis.population.spatial.home.zones")[[
-        "household_id", "commune_id"
-    ]]
+    df_homes = context.stage("synthesis.population.spatial.home.zones")[
+        ["household_id", "commune_id"]
+    ]
 
     df_households = pd.merge(df_households, df_homes)
 
     # Perform sampling per commune
-    with context.parallel(dict(households = df_households, income = df_income)) as parallel:
+    with context.parallel(dict(households=df_households, income=df_income)) as parallel:
         commune_ids = df_households["commune_id"].unique()
-        random_seeds = random.randint(10000, size = len(commune_ids))
+        random_seeds = random.randint(10000, size=len(commune_ids))
 
-        for f, incomes in context.progress(parallel.imap(_sample_income, zip(commune_ids, random_seeds)), label = "Imputing income ...", total = len(commune_ids)):
-            df_households.loc[f, "household_income"] = incomes * df_households.loc[f, "consumption_units"]
+        for f, incomes in context.progress(
+            parallel.imap(_sample_income, zip(commune_ids, random_seeds)),
+            label="Imputing income ...",
+            total=len(commune_ids),
+        ):
+            df_households.loc[f, "household_income"] = (
+                incomes * df_households.loc[f, "consumption_units"]
+            )
 
     # Cleanup
-    df_households = df_households[["household_id", "household_income", "consumption_units"]]
+    df_households = df_households[
+        ["household_id", "household_income", "consumption_units"]
+    ]
     assert len(df_households) == len(df_households["household_id"].unique())
     return df_households
diff --git a/synthesis/population/income/utils.py b/synthesis/population/income/utils.py
index b937417b..22a0ea98 100644
--- a/synthesis/population/income/utils.py
+++ b/synthesis/population/income/utils.py
@@ -23,6 +23,8 @@ def income_uniform_sample(random_state, deciles, size):
     indices = random_state.randint(10, size=size)
     lower_bounds, upper_bounds = deciles[indices], deciles[indices + 1]
 
-    incomes = lower_bounds + random_state.random_sample(size=size) * (upper_bounds - lower_bounds)
+    incomes = lower_bounds + random_state.random_sample(size=size) * (
+        upper_bounds - lower_bounds
+    )
 
     return incomes
diff --git a/synthesis/population/matched.py b/synthesis/population/matched.py
index 5ab5bed0..09022a6a 100644
--- a/synthesis/population/matched.py
+++ b/synthesis/population/matched.py
@@ -20,10 +20,14 @@
 }
 
 DEFAULT_MATCHING_ATTRIBUTES = [
-    "sex", "any_cars", "age_class", "socioprofessional_class",
-    "departement_id"
+    "sex",
+    "any_cars",
+    "age_class",
+    "socioprofessional_class",
+    "departement_id",
 ]
 
+
 def configure(context):
     context.config("processes")
     context.config("random_seed")
@@ -34,9 +38,10 @@ def configure(context):
     context.stage("synthesis.population.income.selected")
 
     hts = context.config("hts")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
-@numba.jit(nopython = True) # Already parallelized parallel = True)
+@numba.jit(nopython=True)  # Already parallelized parallel = True)
 def sample_indices(uniform, cdf, selected_indices):
     indices = np.arange(len(uniform))
 
@@ -45,7 +50,18 @@ def sample_indices(uniform, cdf, selected_indices):
 
     return selected_indices[indices]
 
-def statistical_matching(progress, df_source, source_identifier, weight, df_target, target_identifier, columns, random_seed = 0, minimum_observations = 0):
+
+def statistical_matching(
+    progress,
+    df_source,
+    source_identifier,
+    weight,
+    df_target,
+    target_identifier,
+    columns,
+    random_seed=0,
+    minimum_observations=0,
+):
     random = np.random.RandomState(random_seed)
 
     # Reduce data frames
@@ -53,21 +69,27 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
     df_target = df_target[[target_identifier] + columns].copy()
 
     # Sort data frames
-    df_source = df_source.sort_values(by = columns)
-    df_target = df_target.sort_values(by = columns)
+    df_source = df_source.sort_values(by=columns)
+    df_target = df_target.sort_values(by=columns)
 
     # Find unique values for all columns
     unique_values = {}
 
     for column in columns:
-        unique_values[column] = list(sorted(set(df_source[column].unique()) | set(df_target[column].unique())))
+        unique_values[column] = list(
+            sorted(set(df_source[column].unique()) | set(df_target[column].unique()))
+        )
 
     # Generate filters for all columns and values
     source_filters, target_filters = {}, {}
 
     for column, column_unique_values in unique_values.items():
-        source_filters[column] = [df_source[column].values == value for value in column_unique_values]
-        target_filters[column] = [df_target[column].values == value for value in column_unique_values]
+        source_filters[column] = [
+            df_source[column].values == value for value in column_unique_values
+        ]
+        target_filters[column] = [
+            df_target[column].values == value for value in column_unique_values
+        ]
 
     # Define search order
     source_filters = [source_filters[column] for column in columns]
@@ -75,10 +97,10 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
 
     # Perform matching
     weights = df_source[weight].values
-    assigned_indices = np.ones((len(df_target),), dtype = int) * -1
-    unassigned_mask = np.ones((len(df_target),), dtype = bool)
-    assigned_levels = np.ones((len(df_target),), dtype = int) * -1
-    uniform = random.random_sample(size = (len(df_target),))
+    assigned_indices = np.ones((len(df_target),), dtype=int) * -1
+    unassigned_mask = np.ones((len(df_target),), dtype=bool)
+    assigned_levels = np.ones((len(df_target),), dtype=int) * -1
+    uniform = random.random_sample(size=(len(df_target),))
 
     column_indices = [np.arange(len(unique_values[column])) for column in columns]
 
@@ -87,8 +109,13 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
 
         if np.count_nonzero(unassigned_mask) > 0:
             for column_index in itertools.product(*level_column_indices):
-                f_source = np.logical_and.reduce([source_filters[i][k] for i, k in enumerate(column_index)])
-                f_target = np.logical_and.reduce([target_filters[i][k] for i, k in enumerate(column_index)] + [unassigned_mask])
+                f_source = np.logical_and.reduce(
+                    [source_filters[i][k] for i, k in enumerate(column_index)]
+                )
+                f_target = np.logical_and.reduce(
+                    [target_filters[i][k] for i, k in enumerate(column_index)]
+                    + [unassigned_mask]
+                )
 
                 selected_indices = np.nonzero(f_source)[0]
                 requested_samples = np.count_nonzero(f_target)
@@ -103,7 +130,9 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
                 cdf = np.cumsum(selected_weights)
                 cdf /= cdf[-1]
 
-                assigned_indices[f_target] = sample_indices(uniform[f_target], cdf, selected_indices)
+                assigned_indices[f_target] = sample_indices(
+                    uniform[f_target], cdf, selected_indices
+                )
                 assigned_levels[f_target] = level
                 unassigned_mask[f_target] = False
 
@@ -113,13 +142,17 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
     cdf = np.cumsum(weights)
     cdf /= cdf[-1]
 
-    assigned_indices[unassigned_mask] = sample_indices(uniform[unassigned_mask], cdf, np.arange(len(weights)))
+    assigned_indices[unassigned_mask] = sample_indices(
+        uniform[unassigned_mask], cdf, np.arange(len(weights))
+    )
     assigned_levels[unassigned_mask] = 0
 
     progress.update(np.count_nonzero(unassigned_mask))
 
     if np.count_nonzero(unassigned_mask) > 0:
-        raise RuntimeError("Some target observations could not be matched. Minimum observations configured too high?")
+        raise RuntimeError(
+            "Some target observations could not be matched. Minimum observations configured too high?"
+        )
 
     assert np.count_nonzero(unassigned_mask) == 0
     assert np.count_nonzero(assigned_indices == -1) == 0
@@ -130,6 +163,7 @@ def statistical_matching(progress, df_source, source_identifier, weight, df_targ
 
     return df_target, assigned_levels
 
+
 def _run_parallel_statistical_matching(context, args):
     # Pass arguments
     df_target, random_seed = args
@@ -142,28 +176,56 @@ def _run_parallel_statistical_matching(context, args):
     columns = context.data("columns")
     minimum_observations = context.data("minimum_observations")
 
-    return statistical_matching(context.progress, df_source, source_identifier, weight, df_target, target_identifier, columns, random_seed, minimum_observations)
-
-def parallel_statistical_matching(context, df_source, source_identifier, weight, df_target, target_identifier, columns, minimum_observations = 0):
+    return statistical_matching(
+        context.progress,
+        df_source,
+        source_identifier,
+        weight,
+        df_target,
+        target_identifier,
+        columns,
+        random_seed,
+        minimum_observations,
+    )
+
+
+def parallel_statistical_matching(
+    context,
+    df_source,
+    source_identifier,
+    weight,
+    df_target,
+    target_identifier,
+    columns,
+    minimum_observations=0,
+):
     random_seed = context.config("random_seed")
     processes = context.config("processes")
 
     random = np.random.RandomState(random_seed)
     chunks = np.array_split(df_target, processes)
 
-    with context.progress(label = "Statistical matching ...", total = len(df_target)):
-        with context.parallel({
-            "df_source": df_source, "source_identifier": source_identifier, "weight": weight,
-            "target_identifier": target_identifier, "columns": columns,
-            "minimum_observations": minimum_observations
-        }) as parallel:
-                random_seeds = random.randint(10000, size = len(chunks))
-                results = parallel.map(_run_parallel_statistical_matching, zip(chunks, random_seeds))
+    with context.progress(label="Statistical matching ...", total=len(df_target)):
+        with context.parallel(
+            {
+                "df_source": df_source,
+                "source_identifier": source_identifier,
+                "weight": weight,
+                "target_identifier": target_identifier,
+                "columns": columns,
+                "minimum_observations": minimum_observations,
+            }
+        ) as parallel:
+            random_seeds = random.randint(10000, size=len(chunks))
+            results = parallel.map(
+                _run_parallel_statistical_matching, zip(chunks, random_seeds)
+            )
+
+            levels = np.hstack([r[1] for r in results])
+            df_target = pd.concat([r[0] for r in results])
 
-                levels = np.hstack([r[1] for r in results])
-                df_target = pd.concat([r[0] for r in results])
+            return df_target, levels
 
-                return df_target, levels
 
 def execute(context):
     hts = context.config("hts")
@@ -178,18 +240,25 @@ def execute(context):
 
     try:
         default_index = columns.index("*default*")
-        columns[default_index:default_index + 1] = DEFAULT_MATCHING_ATTRIBUTES
-    except ValueError: pass
+        columns[default_index : default_index + 1] = DEFAULT_MATCHING_ATTRIBUTES
+    except ValueError:
+        pass
 
     # Define matching attributes
     AGE_BOUNDARIES = [14, 29, 44, 59, 74, 1000]
 
     if "age_class" in columns:
-        df_target["age_class"] = np.digitize(df_target["age"], AGE_BOUNDARIES, right = True)
-        df_source["age_class"] = np.digitize(df_source["age"], AGE_BOUNDARIES, right = True)
+        df_target["age_class"] = np.digitize(
+            df_target["age"], AGE_BOUNDARIES, right=True
+        )
+        df_source["age_class"] = np.digitize(
+            df_source["age"], AGE_BOUNDARIES, right=True
+        )
 
     if "income_class" in columns:
-        df_income = context.stage("synthesis.population.income.selected")[["household_id", "household_income"]]
+        df_income = context.stage("synthesis.population.income.selected")[
+            ["household_id", "household_income"]
+        ]
 
         df_target = pd.merge(df_target, df_income)
         df_target["income_class"] = INCOME_CLASS[hts](df_target)
@@ -199,30 +268,47 @@ def execute(context):
         df_source["any_cars"] = df_source["number_of_vehicles"] > 0
 
     # Perform statistical matching
-    df_source = df_source.rename(columns = { "person_id": "hts_id" })
+    df_source = df_source.rename(columns={"person_id": "hts_id"})
 
     for column in columns:
         if not column in df_source:
-            raise RuntimeError("Attribute not available in source (HTS) for matching: {}".format(column))
+            raise RuntimeError(
+                "Attribute not available in source (HTS) for matching: {}".format(
+                    column
+                )
+            )
 
         if not column in df_target:
-            raise RuntimeError("Attribute not available in target (census) for matching: {}".format(column))
+            raise RuntimeError(
+                "Attribute not available in target (census) for matching: {}".format(
+                    column
+                )
+            )
 
     df_assignment, levels = parallel_statistical_matching(
         context,
-        df_source, "hts_id", "person_weight",
-        df_target, "person_id",
+        df_source,
+        "hts_id",
+        "person_weight",
+        df_target,
+        "person_id",
         columns,
-        minimum_observations = context.config("matching_minimum_observations"))
+        minimum_observations=context.config("matching_minimum_observations"),
+    )
 
-    df_target = pd.merge(df_target, df_assignment, on = "person_id")
+    df_target = pd.merge(df_target, df_assignment, on="person_id")
     assert len(df_target) == len(df_assignment)
 
-    context.set_info("matched_counts", {
-        count: np.count_nonzero(levels >= count) for count in range(len(columns) + 1)
-    })
+    context.set_info(
+        "matched_counts",
+        {count: np.count_nonzero(levels >= count) for count in range(len(columns) + 1)},
+    )
 
     for count in range(len(columns) + 1):
-        print("%d matched levels:" % count, np.count_nonzero(levels >= count), "%.2f%%" % (100 * np.count_nonzero(levels >= count) / len(df_target),))
+        print(
+            "%d matched levels:" % count,
+            np.count_nonzero(levels >= count),
+            "%.2f%%" % (100 * np.count_nonzero(levels >= count) / len(df_target),),
+        )
 
     return df_target[["person_id", "hts_id"]]
diff --git a/synthesis/population/projection/ipu.py b/synthesis/population/projection/ipu.py
index 580ce007..e19bc82a 100644
--- a/synthesis/population/projection/ipu.py
+++ b/synthesis/population/projection/ipu.py
@@ -5,10 +5,12 @@
 This stage reweights the census data set according to the projection data for a different year.
 """
 
+
 def configure(context):
     context.stage("data.census.cleaned")
     context.stage("data.census.projection")
 
+
 def execute(context):
     df_census = context.stage("data.census.cleaned")
     projection = context.stage("data.census.projection")
@@ -17,7 +19,9 @@ def execute(context):
     adjust_projection(projection)
 
     # Prepare indexing
-    df_households = df_census[["household_id", "household_size", "weight"]].drop_duplicates("household_id")
+    df_households = df_census[
+        ["household_id", "household_size", "weight"]
+    ].drop_duplicates("household_id")
     df_households["household_index"] = np.arange(len(df_households))
     df_census = pd.merge(df_census, df_households[["household_id", "household_index"]])
 
@@ -33,7 +37,11 @@ def execute(context):
 
     # Proccesing age ...
     df_marginal = projection["age"]
-    for index, row in context.progress(df_marginal.iterrows(), label = "Processing attribute: age", total = len(df_marginal)):
+    for index, row in context.progress(
+        df_marginal.iterrows(),
+        label="Processing attribute: age",
+        total=len(df_marginal),
+    ):
         f = df_census["age"] == row["age"]
         assert np.count_nonzero(f) > 0
 
@@ -42,10 +50,14 @@ def execute(context):
         attribute_membership.append(df_counts.index.values)
         attribute_counts.append(df_counts.values)
         attributes.append("age={}".format(row["age"]))
-    
+
     # Processing sex ...
     df_marginal = projection["sex"]
-    for index, row in context.progress(df_marginal.iterrows(), label = "Processing attribute: sex", total = len(df_marginal)):
+    for index, row in context.progress(
+        df_marginal.iterrows(),
+        label="Processing attribute: sex",
+        total=len(df_marginal),
+    ):
         f = df_census["sex"] == row["sex"]
         f &= (df_census["age"] > 0) & (df_census["age"] <= 104)
         assert np.count_nonzero(f) > 0
@@ -58,7 +70,11 @@ def execute(context):
 
     # Processing age x sex ...
     df_marginal = projection["cross"]
-    for index, row in context.progress(df_marginal.iterrows(), label = "Processing attributes: sex x age", total = len(df_marginal)):
+    for index, row in context.progress(
+        df_marginal.iterrows(),
+        label="Processing attributes: sex x age",
+        total=len(df_marginal),
+    ):
         f = (df_census["sex"] == row["sex"]) & (df_census["age"] == row["age"])
         assert np.count_nonzero(f) > 0
 
@@ -71,7 +87,7 @@ def execute(context):
     # Processing total ...
     f = (df_census["age"] > 0) & (df_census["age"] <= 104)
     assert np.count_nonzero(f) > 0
-    
+
     df_counts = df_census.loc[f, "household_index"].value_counts()
     attribute_targets.append(projection["total"]["projection"].values[0])
     attribute_membership.append(df_counts.index.values)
@@ -86,37 +102,49 @@ def execute(context):
     maximum_iterations = 100
 
     for iteration in range(maximum_iterations):
-        factors = []    
+        factors = []
         for k in np.arange(len(attributes)):
             selection = attribute_membership[k]
-        
+
             target = attribute_targets[k]
-            current = np.sum(update[selection] * household_weights[selection] * attribute_counts[k])
-        
+            current = np.sum(
+                update[selection] * household_weights[selection] * attribute_counts[k]
+            )
+
             factor = target / current
             factors.append(factor)
-                
+
             update[selection] *= factor
 
-        print("IPU it={} min={} max={}".format(iteration, np.min(factors), np.max(factors)))
+        print(
+            "IPU it={} min={} max={}".format(
+                iteration, np.min(factors), np.max(factors)
+            )
+        )
 
         converged = np.abs(1 - np.max(factors)) < convergence_threshold
         converged &= np.abs(1 - np.min(factors)) < convergence_threshold
-        if converged: break
+        if converged:
+            break
 
     # Check that the applied factors in the last iteration are sufficiently small
     assert converged
 
-    print("IPF updates min={} max={} mean={}".format(np.min(update), np.max(update), np.mean(update)))
+    print(
+        "IPF updates min={} max={} mean={}".format(
+            np.min(update), np.max(update), np.mean(update)
+        )
+    )
 
     # Update the weights
     df_households["weight"] *= update
-    
+
     return df_households[["household_id", "weight"]]
 
+
 def adjust_projection(projection):
     # The projection data contains information on zero-year old persons. However, there is a big difference between the
-    # RP data and the projection, probably because RP is fixed to a certain reference date and not all of them are 
+    # RP data and the projection, probably because RP is fixed to a certain reference date and not all of them are
     # registered. We, in particular, see that there is a large jump between 0 years and 1 years.
     # Therefore, we exclude the zero-year persons from the projection. This, however, means adapting all the marginals.
     # Also, exclude everything that is 105+
@@ -131,18 +159,16 @@ def adjust_projection(projection):
         if row["age"] == 0 or row["age"] == "105+":
             f_sex = df_sex["sex"] == row["sex"]
 
-            df_sex.loc[f_sex, "projection"] = df_sex.loc[f_sex, "projection"] - row["projection"]
+            df_sex.loc[f_sex, "projection"] = (
+                df_sex.loc[f_sex, "projection"] - row["projection"]
+            )
             df_total["projection"] = df_total["projection"] - row["projection"]
-    
+
     projection["sex"] = df_sex
     projection["total"] = df_total
 
     # Remove zero old years from cross distribution
-    projection["cross"] = df_cross[
-        (df_cross["age"] != 0) & (df_cross["age"] != "105+")
-    ]
+    projection["cross"] = df_cross[(df_cross["age"] != 0) & (df_cross["age"] != "105+")]
 
     # Remove zero old years from age distribution
-    projection["age"] = df_age[
-        (df_age["age"] != 0) & (df_age["age"] != "105+")
-    ]
+    projection["age"] = df_age[(df_age["age"] != 0) & (df_age["age"] != "105+")]
diff --git a/synthesis/population/projection/reweighted.py b/synthesis/population/projection/reweighted.py
index 9863e6a3..5450c4e3 100644
--- a/synthesis/population/projection/reweighted.py
+++ b/synthesis/population/projection/reweighted.py
@@ -5,18 +5,20 @@
 This stage reweights the census data set according to the projection data for a different year.
 """
 
+
 def configure(context):
     context.stage("data.census.filtered")
     context.stage("synthesis.population.projection.ipu")
 
+
 def execute(context):
     df_census = context.stage("data.census.filtered")
     df_weights = context.stage("synthesis.population.projection.ipu")
 
     initial_size = len(df_census)
 
-    df_census = df_census.drop(columns = "weight")
-    df_census = pd.merge(df_census, df_weights, on = "household_id")
+    df_census = df_census.drop(columns="weight")
+    df_census = pd.merge(df_census, df_weights, on="household_id")
 
     final_size = len(df_census)
     assert initial_size == final_size
diff --git a/synthesis/population/sampled.py b/synthesis/population/sampled.py
index c4a33592..a2a7ae55 100644
--- a/synthesis/population/sampled.py
+++ b/synthesis/population/sampled.py
@@ -8,25 +8,31 @@
 through the 'sampling_rate' configuration option.
 """
 
+
 def configure(context):
     if context.config("projection_year", None) is None:
-        context.stage("data.census.filtered", alias = "source")
+        context.stage("data.census.filtered", alias="source")
     else:
-        context.stage("synthesis.population.projection.reweighted", alias = "source")
+        context.stage("synthesis.population.projection.reweighted", alias="source")
 
     context.config("random_seed")
     context.config("sampling_rate")
 
+
 def execute(context):
-    df_census = context.stage("source").sort_values(by = "household_id").copy()
+    df_census = context.stage("source").sort_values(by="household_id").copy()
 
     sampling_rate = context.config("sampling_rate")
     random = np.random.RandomState(context.config("random_seed"))
 
     # Perform stochastic rounding for the population (and scale weights)
-    df_rounding = df_census[["household_id", "weight", "household_size"]].drop_duplicates("household_id")
+    df_rounding = df_census[
+        ["household_id", "weight", "household_size"]
+    ].drop_duplicates("household_id")
     df_rounding["multiplicator"] = np.floor(df_rounding["weight"])
-    df_rounding["multiplicator"] += random.random_sample(len(df_rounding)) <= (df_rounding["weight"] - df_rounding["multiplicator"])
+    df_rounding["multiplicator"] += random.random_sample(len(df_rounding)) <= (
+        df_rounding["weight"] - df_rounding["multiplicator"]
+    )
     df_rounding["multiplicator"] = df_rounding["multiplicator"].astype(int)
 
     # Multiply households (use same multiplicator for all household members)
@@ -50,7 +56,9 @@ def execute(context):
 
     household_sizes = np.repeat(household_sizes, household_multiplicators)
     household_count = np.sum(household_multiplicators)
-    df_census.loc[:, "household_id"] = np.repeat(np.arange(household_count), household_sizes)
+    df_census.loc[:, "household_id"] = np.repeat(
+        np.arange(household_count), household_sizes
+    )
 
     # Select sample from 100% population
     selector = random.random_sample(household_count) < sampling_rate
diff --git a/synthesis/population/spatial/commute_distance.py b/synthesis/population/spatial/commute_distance.py
index 49064d89..c9f0c2e6 100644
--- a/synthesis/population/spatial/commute_distance.py
+++ b/synthesis/population/spatial/commute_distance.py
@@ -1,28 +1,32 @@
 import pandas as pd
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
     context.stage("data.hts.commute_distance")
 
+
 def execute(context):
     df_matching = context.stage("synthesis.population.enriched")
     df_commute_distance = context.stage("data.hts.commute_distance")
 
     df_work = pd.merge(
         df_matching[["person_id", "hts_id"]],
-        df_commute_distance["work"][["person_id", "commute_distance"]].rename(columns = dict(person_id = "hts_id")),
-        how = "left"
+        df_commute_distance["work"][["person_id", "commute_distance"]].rename(
+            columns=dict(person_id="hts_id")
+        ),
+        how="left",
     )
 
     df_education = pd.merge(
         df_matching[["person_id", "hts_id"]],
-        df_commute_distance["education"][["person_id", "commute_distance"]].rename(columns = dict(person_id = "hts_id")),
-        how = "left"
+        df_commute_distance["education"][["person_id", "commute_distance"]].rename(
+            columns=dict(person_id="hts_id")
+        ),
+        how="left",
     )
 
     assert len(df_work) == len(df_matching)
     assert len(df_education) == len(df_matching)
 
-    return dict(
-        work = df_work, education = df_education
-    )
+    return dict(work=df_work, education=df_education)
diff --git a/synthesis/population/spatial/home/locations.py b/synthesis/population/spatial/home/locations.py
index 9347e5ec..0604941b 100644
--- a/synthesis/population/spatial/home/locations.py
+++ b/synthesis/population/spatial/home/locations.py
@@ -3,13 +3,15 @@
 import pandas as pd
 import geopandas as gpd
 
+
 def configure(context):
     context.stage("synthesis.population.spatial.home.zones")
     context.stage("synthesis.locations.home.locations")
     context.config("home_location_source", "addresses")
-    
+
     context.config("random_seed")
 
+
 def _sample_locations(context, args):
     # Extract data sets
     df_locations = context.data("df_locations")
@@ -35,33 +37,39 @@ def _sample_locations(context, args):
     cdf = np.cumsum(df_locations["weight"].values)
     cdf /= cdf[-1]
 
-    indices = np.array([np.count_nonzero(cdf < u) 
-        for u in random.random_sample(size = home_count)])
-    
+    indices = np.array(
+        [np.count_nonzero(cdf < u) for u in random.random_sample(size=home_count)]
+    )
+
     # Apply selection
     df_homes["geometry"] = df_locations.iloc[indices]["geometry"].values
     df_homes["home_location_id"] = df_locations.iloc[indices]["home_location_id"].values
-    
+
     # Update progress
     context.progress.update()
 
-    return gpd.GeoDataFrame(df_homes, crs = df_locations.crs)
+    return gpd.GeoDataFrame(df_homes, crs=df_locations.crs)
+
 
 def execute(context):
     random = np.random.RandomState(context.config("random_seed"))
 
     df_homes = context.stage("synthesis.population.spatial.home.zones")
     df_locations = context.stage("synthesis.locations.home.locations")
-                   
+
     # Sample locations for home
     unique_iris_ids = sorted(set(df_homes["iris_id"].unique()))
 
-    with context.progress(label = "Sampling home locations ...", total = len(unique_iris_ids)):
-        with context.parallel(dict(
-            df_locations = df_locations, df_homes = df_homes
-        )) as parallel:
-            seeds = random.randint(10000, size = len(unique_iris_ids))
-            df_homes = pd.concat(parallel.map(_sample_locations, zip(unique_iris_ids, seeds)))
+    with context.progress(
+        label="Sampling home locations ...", total=len(unique_iris_ids)
+    ):
+        with context.parallel(
+            dict(df_locations=df_locations, df_homes=df_homes)
+        ) as parallel:
+            seeds = random.randint(10000, size=len(unique_iris_ids))
+            df_homes = pd.concat(
+                parallel.map(_sample_locations, zip(unique_iris_ids, seeds))
+            )
     out = ["household_id", "commune_id", "home_location_id", "geometry"]
-        
+
     return df_homes[out]
diff --git a/synthesis/population/spatial/home/zones.py b/synthesis/population/spatial/home/zones.py
index 2964fdc5..4ae6da82 100644
--- a/synthesis/population/spatial/home/zones.py
+++ b/synthesis/population/spatial/home/zones.py
@@ -12,6 +12,7 @@
 has less than 200 inhabitants to the second case.
 """
 
+
 def configure(context):
     context.stage("synthesis.population.sampled")
 
@@ -21,66 +22,100 @@ def configure(context):
 
     context.config("random_seed")
 
+
 def execute(context):
     random = np.random.RandomState(context.config("random_seed"))
 
-    df_households = context.stage("synthesis.population.sampled").drop_duplicates("household_id")[[
-        "household_id", "commune_id", "iris_id", "departement_id"
-    ]].copy().set_index("household_id")
+    df_households = (
+        context.stage("synthesis.population.sampled")
+        .drop_duplicates("household_id")[
+            ["household_id", "commune_id", "iris_id", "departement_id"]
+        ]
+        .copy()
+        .set_index("household_id")
+    )
 
     f_has_commune = df_households["commune_id"] != "undefined"
     f_has_iris = df_households["iris_id"] != "undefined"
 
     # Fix missing communes (we select from those without IRIS)
-    df_municipalities = context.stage("data.spatial.municipalities").set_index("commune_id")
-    df_municipalities["population"] = context.stage("data.spatial.population").groupby("commune_id")["population"].sum()
+    df_municipalities = context.stage("data.spatial.municipalities").set_index(
+        "commune_id"
+    )
+    df_municipalities["population"] = (
+        context.stage("data.spatial.population")
+        .groupby("commune_id")["population"]
+        .sum()
+    )
 
     df_households["commune_id"] = df_households["commune_id"].cat.add_categories(
-        sorted(set(df_municipalities.index.unique()) - set(df_households["commune_id"].cat.categories)))
+        sorted(
+            set(df_municipalities.index.unique())
+            - set(df_households["commune_id"].cat.categories)
+        )
+    )
 
     departements = df_households[~f_has_commune]["departement_id"].unique()
 
-    for departement_id in context.progress(departements, label = "Fixing missing communes ..."):
+    for departement_id in context.progress(
+        departements, label="Fixing missing communes ..."
+    ):
         df_candidates = df_municipalities[
-            ~df_municipalities["has_iris"] &
-            (df_municipalities["departement_id"].astype(str) == departement_id)]
+            ~df_municipalities["has_iris"]
+            & (df_municipalities["departement_id"].astype(str) == departement_id)
+        ]
 
         df_target = df_households[
-            ~f_has_commune &
-            (df_households["departement_id"] == departement_id)].copy()
+            ~f_has_commune & (df_households["departement_id"] == departement_id)
+        ].copy()
 
         weights = df_candidates["population"].values.astype(float)
         weights /= np.sum(weights)
 
-        indices = np.repeat(np.arange(weights.shape[0]), random.multinomial(len(df_target), weights))
-        df_target["commune_id"] = df_candidates.reset_index()["commune_id"].iloc[indices].values
+        indices = np.repeat(
+            np.arange(weights.shape[0]), random.multinomial(len(df_target), weights)
+        )
+        df_target["commune_id"] = (
+            df_candidates.reset_index()["commune_id"].iloc[indices].values
+        )
 
         df_households.loc[df_target.index, "commune_id"] = df_target["commune_id"]
 
     # Fix missing IRIS (we select from those with <200 inhabitants)
     df_iris = context.stage("data.spatial.iris").set_index("iris_id")
-    df_iris["population"] = context.stage("data.spatial.population").set_index("iris_id")["population"]
+    df_iris["population"] = context.stage("data.spatial.population").set_index(
+        "iris_id"
+    )["population"]
 
     df_households["iris_id"] = df_households["iris_id"].cat.add_categories(
-        sorted(set(df_iris.index.unique()) - set(df_households["iris_id"].cat.categories)))
+        sorted(
+            set(df_iris.index.unique()) - set(df_households["iris_id"].cat.categories)
+        )
+    )
 
     communes = df_households[~f_has_iris & f_has_commune]["commune_id"].unique()
 
-    for commune_id in context.progress(communes, label = "Fixing missing IRIS ..."):
+    for commune_id in context.progress(communes, label="Fixing missing IRIS ..."):
         df_candidates = df_iris[
-            (df_iris["population"] <= 200) &
-            (df_iris["commune_id"].astype(str) == commune_id)]
+            (df_iris["population"] <= 200)
+            & (df_iris["commune_id"].astype(str) == commune_id)
+        ]
 
         df_target = df_households[
-            f_has_commune & ~f_has_iris &
-            (df_households["commune_id"] == commune_id)].copy()
+            f_has_commune & ~f_has_iris & (df_households["commune_id"] == commune_id)
+        ].copy()
 
         weights = df_candidates["population"].values.astype(float)
-        if (weights == 0.0).all(): weights += 1.0
+        if (weights == 0.0).all():
+            weights += 1.0
         weights /= np.sum(weights)
 
-        indices = np.repeat(np.arange(weights.shape[0]), random.multinomial(len(df_target), weights))
-        df_target["iris_id"] = df_candidates.reset_index()["iris_id"].iloc[indices].values
+        indices = np.repeat(
+            np.arange(weights.shape[0]), random.multinomial(len(df_target), weights)
+        )
+        df_target["iris_id"] = (
+            df_candidates.reset_index()["iris_id"].iloc[indices].values
+        )
 
         df_households.loc[df_target.index, "iris_id"] = df_target["iris_id"]
 
@@ -90,14 +125,20 @@ def execute(context):
     # Now there are some people left who don't have an IRIS, because the commune
     # is not covered in IRIS. Hence, we drive the commune-based IRIS for them.
     f = df_households["iris_id"] == "undefined"
-    df_households.loc[f, "iris_id"] = df_households.loc[f, "commune_id"].astype(str) + "0000"
+    df_households.loc[f, "iris_id"] = (
+        df_households.loc[f, "commune_id"].astype(str) + "0000"
+    )
 
     # Finally, make sure that we have no invalid codes
-    invalid_communes = set(df_households["commune_id"].unique()) - set(df_municipalities.index.unique())
+    invalid_communes = set(df_households["commune_id"].unique()) - set(
+        df_municipalities.index.unique()
+    )
     invalid_iris = set(df_households["iris_id"].unique()) - set(df_iris.index.unique())
 
     assert len(invalid_communes) == 0
     assert len(invalid_iris) == 0
     assert np.count_nonzero(df_households["iris_id"] == "undefined") == 0
 
-    return df_households.reset_index()[["household_id", "departement_id", "commune_id", "iris_id"]]
+    return df_households.reset_index()[
+        ["household_id", "departement_id", "commune_id", "iris_id"]
+    ]
diff --git a/synthesis/population/spatial/locations.py b/synthesis/population/spatial/locations.py
index 2397e095..adc9bb2d 100644
--- a/synthesis/population/spatial/locations.py
+++ b/synthesis/population/spatial/locations.py
@@ -2,6 +2,7 @@
 import geopandas as gpd
 import numpy as np
 
+
 def configure(context):
     context.stage("synthesis.population.spatial.home.locations")
     context.stage("synthesis.population.spatial.primary.locations")
@@ -11,57 +12,94 @@ def configure(context):
     context.stage("synthesis.population.sampled")
     context.stage("data.spatial.iris")
 
+
 def execute(context):
     df_home = context.stage("synthesis.population.spatial.home.locations")
-    df_work, df_education = context.stage("synthesis.population.spatial.primary.locations")
+    df_work, df_education = context.stage(
+        "synthesis.population.spatial.primary.locations"
+    )
     df_secondary = context.stage("synthesis.population.spatial.secondary.locations")[0]
 
-    df_persons = context.stage("synthesis.population.sampled")[["person_id", "household_id"]]
-    df_locations = context.stage("synthesis.population.activities")[["person_id", "activity_index", "purpose"]]
+    df_persons = context.stage("synthesis.population.sampled")[
+        ["person_id", "household_id"]
+    ]
+    df_locations = context.stage("synthesis.population.activities")[
+        ["person_id", "activity_index", "purpose"]
+    ]
 
     # Home locations
     df_home_locations = df_locations[df_locations["purpose"] == "home"]
-    df_home_locations = pd.merge(df_home_locations, df_persons, on = "person_id")
-    df_home_locations = pd.merge(df_home_locations, df_home[["household_id", "geometry"]], on = "household_id")
+    df_home_locations = pd.merge(df_home_locations, df_persons, on="person_id")
+    df_home_locations = pd.merge(
+        df_home_locations, df_home[["household_id", "geometry"]], on="household_id"
+    )
     df_home_locations["location_id"] = -1
-    df_home_locations = df_home_locations[["person_id", "activity_index", "location_id", "geometry"]]
+    df_home_locations = df_home_locations[
+        ["person_id", "activity_index", "location_id", "geometry"]
+    ]
 
     # Work locations
     df_work_locations = df_locations[df_locations["purpose"] == "work"]
-    df_work_locations = pd.merge(df_work_locations, df_work[["person_id", "location_id", "geometry"]], on = "person_id")
-    df_work_locations = df_work_locations[["person_id", "activity_index", "location_id", "geometry"]]
+    df_work_locations = pd.merge(
+        df_work_locations,
+        df_work[["person_id", "location_id", "geometry"]],
+        on="person_id",
+    )
+    df_work_locations = df_work_locations[
+        ["person_id", "activity_index", "location_id", "geometry"]
+    ]
     assert not df_work_locations["geometry"].isna().any()
 
     # Education locations
     df_education_locations = df_locations[df_locations["purpose"] == "education"]
-    df_education_locations = pd.merge(df_education_locations, df_education[["person_id", "location_id", "geometry"]], on = "person_id")
-    df_education_locations = df_education_locations[["person_id", "activity_index", "location_id", "geometry"]]
+    df_education_locations = pd.merge(
+        df_education_locations,
+        df_education[["person_id", "location_id", "geometry"]],
+        on="person_id",
+    )
+    df_education_locations = df_education_locations[
+        ["person_id", "activity_index", "location_id", "geometry"]
+    ]
     assert not df_education_locations["geometry"].isna().any()
 
     # Secondary locations
-    df_secondary_locations = df_locations[~df_locations["purpose"].isin(("home", "work", "education"))].copy()
-    df_secondary_locations = pd.merge(df_secondary_locations, df_secondary[[
-        "person_id", "activity_index", "location_id", "geometry"
-    ]], on = ["person_id", "activity_index"], how = "left")
-    df_secondary_locations = df_secondary_locations[["person_id", "activity_index", "location_id", "geometry"]]
+    df_secondary_locations = df_locations[
+        ~df_locations["purpose"].isin(("home", "work", "education"))
+    ].copy()
+    df_secondary_locations = pd.merge(
+        df_secondary_locations,
+        df_secondary[["person_id", "activity_index", "location_id", "geometry"]],
+        on=["person_id", "activity_index"],
+        how="left",
+    )
+    df_secondary_locations = df_secondary_locations[
+        ["person_id", "activity_index", "location_id", "geometry"]
+    ]
     assert not df_secondary_locations["geometry"].isna().any()
 
     # Validation
     initial_count = len(df_locations)
-    df_locations = pd.concat([df_home_locations, df_work_locations, df_education_locations, df_secondary_locations])
+    df_locations = pd.concat(
+        [
+            df_home_locations,
+            df_work_locations,
+            df_education_locations,
+            df_secondary_locations,
+        ]
+    )
 
-    df_locations = df_locations.sort_values(by = ["person_id", "activity_index"])
+    df_locations = df_locations.sort_values(by=["person_id", "activity_index"])
     final_count = len(df_locations)
 
     assert initial_count == final_count
 
     assert not df_locations["geometry"].isna().any()
-    df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs)
+    df_locations = gpd.GeoDataFrame(df_locations, crs=df_home.crs)
 
     # add municipalities
     df_iris = context.stage("data.spatial.iris")
-    df_iris = gpd.GeoDataFrame(df_iris, crs = df_home.crs)
+    df_iris = gpd.GeoDataFrame(df_iris, crs=df_home.crs)
 
-    df_locations = gpd.sjoin(df_locations,df_iris,how="left")
+    df_locations = gpd.sjoin(df_locations, df_iris, how="left")
 
     return df_locations
diff --git a/synthesis/population/spatial/primary/candidates.py b/synthesis/population/spatial/primary/candidates.py
index 7af9963c..811bb5be 100644
--- a/synthesis/population/spatial/primary/candidates.py
+++ b/synthesis/population/spatial/primary/candidates.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import numpy as np
 
+
 def configure(context):
     context.stage("data.od.weighted")
 
@@ -15,11 +16,14 @@ def configure(context):
     context.config("random_seed")
     context.config("education_location_source", "bpe")
 
+
 EDUCATION_MAPPING = {
     "primary_school": ["C1"],
     "middle_school": ["C2"],
     "high_school": ["C3"],
-    "higher_education": ["C4", "C5", "C6"]}
+    "higher_education": ["C4", "C5", "C6"],
+}
+
 
 def sample_destination_municipalities(context, arguments):
     # Load data
@@ -37,6 +41,7 @@ def sample_destination_municipalities(context, arguments):
     context.progress.update()
     return df_od[["origin_id", "destination_id", "count"]]
 
+
 def sample_locations(context, arguments):
     # Load data
     destination_id, random_seed = arguments
@@ -45,7 +50,7 @@ def sample_locations(context, arguments):
     # Prepare state
     random = np.random.RandomState(random_seed)
     df_locations = df_locations[df_locations["commune_id"] == destination_id]
-    
+
     # Determine demand
     df_flow = df_flow[df_flow["destination_id"] == destination_id]
     count = df_flow["count"].sum()
@@ -55,40 +60,45 @@ def sample_locations(context, arguments):
 
     if "weight" in df_locations:
         weight = df_locations["weight"].values / df_locations["weight"].sum()
-    
+
     location_counts = random.multinomial(count, weight)
     location_ids = df_locations["location_id"].values
     location_ids = np.repeat(location_ids, location_counts)
 
-    # Shuffle, as otherwise it is likely that *all* copies 
+    # Shuffle, as otherwise it is likely that *all* copies
     # of the first location id go to the first origin, and so on
     random.shuffle(location_ids)
 
     # Construct a data set for all commutes to this zone
     origin_id = np.repeat(df_flow["origin_id"].values, df_flow["count"].values)
 
-    df_result = pd.DataFrame.from_records(dict(
-        origin_id = origin_id,
-        location_id = location_ids
-    ))
+    df_result = pd.DataFrame.from_records(
+        dict(origin_id=origin_id, location_id=location_ids)
+    )
     df_result["destination_id"] = destination_id
 
     return df_result
 
-def process(context, purpose, random, df_persons, df_od, df_locations,step_name):
+
+def process(context, purpose, random, df_persons, df_od, df_locations, step_name):
     df_persons = df_persons[df_persons["has_%s_trip" % purpose]]
 
     # Sample commute flows based on population
-    df_demand = df_persons.groupby("commune_id").size().reset_index(name = "count")
+    df_demand = df_persons.groupby("commune_id").size().reset_index(name="count")
     df_demand["random_seed"] = random.randint(0, int(1e6), len(df_demand))
     df_demand = df_demand[["commune_id", "count", "random_seed"]]
     df_demand = df_demand[df_demand["count"] > 0]
 
     df_flow = []
 
-    with context.progress(label = "Sampling %s municipalities" % step_name, total = len(df_demand)) as progress:
-        with context.parallel(dict(df_od = df_od)) as parallel:
-            for df_partial in parallel.imap_unordered(sample_destination_municipalities, df_demand.itertuples(index = False, name = None)):
+    with context.progress(
+        label="Sampling %s municipalities" % step_name, total=len(df_demand)
+    ) as progress:
+        with context.parallel(dict(df_od=df_od)) as parallel:
+            for df_partial in parallel.imap_unordered(
+                sample_destination_municipalities,
+                df_demand.itertuples(index=False, name=None),
+            ):
                 df_flow.append(df_partial)
 
     df_flow = pd.concat(df_flow).sort_values(["origin_id", "destination_id"])
@@ -99,30 +109,45 @@ def process(context, purpose, random, df_persons, df_od, df_locations,step_name)
 
     df_result = []
 
-    with context.progress(label = "Sampling %s destinations" % purpose, total = len(df_demand)) as progress:
-        with context.parallel(dict(df_locations = df_locations, df_flow = df_flow)) as parallel:
-            for df_partial in parallel.imap_unordered(sample_locations, zip(unique_ids, random_seeds)):
+    with context.progress(
+        label="Sampling %s destinations" % purpose, total=len(df_demand)
+    ) as progress:
+        with context.parallel(
+            dict(df_locations=df_locations, df_flow=df_flow)
+        ) as parallel:
+            for df_partial in parallel.imap_unordered(
+                sample_locations, zip(unique_ids, random_seeds)
+            ):
                 df_result.append(df_partial)
 
     df_result = pd.concat(df_result).sort_values(["origin_id", "destination_id"])
 
     return df_result[["origin_id", "destination_id", "location_id"]]
 
+
 def execute(context):
     # Prepare population data
-    df_persons = context.stage("synthesis.population.enriched")[["person_id", "household_id", "age_range"]].copy()
+    df_persons = context.stage("synthesis.population.enriched")[
+        ["person_id", "household_id", "age_range"]
+    ].copy()
     df_trips = context.stage("synthesis.population.trips")
 
-    df_persons["has_work_trip"] = df_persons["person_id"].isin(df_trips[
-        (df_trips["following_purpose"] == "work") | (df_trips["preceding_purpose"] == "work")
-    ]["person_id"])
-    
-    df_persons["has_education_trip"] = df_persons["person_id"].isin(df_trips[
-        (df_trips["following_purpose"] == "education") | (df_trips["preceding_purpose"] == "education")
-    ]["person_id"])
+    df_persons["has_work_trip"] = df_persons["person_id"].isin(
+        df_trips[
+            (df_trips["following_purpose"] == "work")
+            | (df_trips["preceding_purpose"] == "work")
+        ]["person_id"]
+    )
+
+    df_persons["has_education_trip"] = df_persons["person_id"].isin(
+        df_trips[
+            (df_trips["following_purpose"] == "education")
+            | (df_trips["preceding_purpose"] == "education")
+        ]["person_id"]
+    )
 
     df_homes = context.stage("synthesis.population.spatial.home.zones")
-    df_persons = pd.merge(df_persons, df_homes, on = "household_id")
+    df_persons = pd.merge(df_persons, df_homes, on="household_id")
 
     # Prepare spatial data
     df_work_od, df_education_od = context.stage("data.od.weighted")
@@ -132,27 +157,50 @@ def execute(context):
 
     df_locations = context.stage("synthesis.locations.work")
     df_locations["weight"] = df_locations["employees"]
-    df_work = process(context, "work", random, df_persons,
-        df_work_od, df_locations, "work"
+    df_work = process(
+        context, "work", random, df_persons, df_work_od, df_locations, "work"
     )
 
     df_locations = context.stage("synthesis.locations.education")
-    if context.config("education_location_source") == 'bpe':
-        df_education = process(context, "education", random, df_persons, df_education_od, df_locations,"education")
-    else :
+    if context.config("education_location_source") == "bpe":
+        df_education = process(
+            context,
+            "education",
+            random,
+            df_persons,
+            df_education_od,
+            df_locations,
+            "education",
+        )
+    else:
         df_education = []
         for prefix, education_type in EDUCATION_MAPPING.items():
             df_education.append(
-                process(context, "education", random,
-                    df_persons[df_persons["age_range"]==prefix],
-                    df_education_od[df_education_od["age_range"]==prefix],df_locations[df_locations["education_type"].isin(education_type)],prefix)
+                process(
+                    context,
+                    "education",
+                    random,
+                    df_persons[df_persons["age_range"] == prefix],
+                    df_education_od[df_education_od["age_range"] == prefix],
+                    df_locations[df_locations["education_type"].isin(education_type)],
+                    prefix,
+                )
             )
         df_education = pd.concat(df_education)
 
     return dict(
-        work_candidates = df_work,
-        education_candidates = df_education,
-        persons = df_persons[df_persons["has_work_trip"] | df_persons["has_education_trip"]][[
-            "person_id", "household_id", "age_range", "commune_id", "has_work_trip", "has_education_trip"
-        ]]
+        work_candidates=df_work,
+        education_candidates=df_education,
+        persons=df_persons[
+            df_persons["has_work_trip"] | df_persons["has_education_trip"]
+        ][
+            [
+                "person_id",
+                "household_id",
+                "age_range",
+                "commune_id",
+                "has_work_trip",
+                "has_education_trip",
+            ]
+        ],
     )
diff --git a/synthesis/population/spatial/primary/locations.py b/synthesis/population/spatial/primary/locations.py
index 136e18ac..92c1c5f1 100644
--- a/synthesis/population/spatial/primary/locations.py
+++ b/synthesis/population/spatial/primary/locations.py
@@ -3,6 +3,7 @@
 import geopandas as gpd
 from .candidates import EDUCATION_MAPPING
 
+
 def configure(context):
     context.stage("synthesis.population.spatial.primary.candidates")
     context.stage("synthesis.population.spatial.commute_distance")
@@ -16,17 +17,20 @@ def configure(context):
 def define_distance_ordering(df_persons, df_candidates, progress):
     indices = []
 
-    f_available = np.ones((len(df_candidates),), dtype = bool)
+    f_available = np.ones((len(df_candidates),), dtype=bool)
     costs = np.ones((len(df_candidates),)) * np.inf
 
-    commute_coordinates = np.vstack([
-        df_candidates["geometry"].x.values,
-        df_candidates["geometry"].y.values
-    ]).T
+    commute_coordinates = np.vstack(
+        [df_candidates["geometry"].x.values, df_candidates["geometry"].y.values]
+    ).T
 
-    for home_coordinate, commute_distance in zip(df_persons["home_location"], df_persons["commute_distance"]):
+    for home_coordinate, commute_distance in zip(
+        df_persons["home_location"], df_persons["commute_distance"]
+    ):
         home_coordinate = np.array([home_coordinate.x, home_coordinate.y])
-        distances = np.sqrt(np.sum((commute_coordinates[f_available] - home_coordinate)**2, axis = 1))
+        distances = np.sqrt(
+            np.sum((commute_coordinates[f_available] - home_coordinate) ** 2, axis=1)
+        )
         costs[f_available] = np.abs(distances - commute_distance)
 
         selected_index = np.argmin(costs)
@@ -40,20 +44,25 @@ def define_distance_ordering(df_persons, df_candidates, progress):
 
     return indices
 
+
 def define_random_ordering(df_persons, df_candidates, progress):
     progress.update(len(df_candidates))
     return np.arange(len(df_candidates))
 
+
 define_ordering = define_distance_ordering
 
+
 def process_municipality(context, origin_id):
     # Load data
-    df_candidates, df_persons = context.data("df_candidates"), context.data("df_persons")
+    df_candidates, df_persons = context.data("df_candidates"), context.data(
+        "df_persons"
+    )
 
     # Find relevant records
-    df_persons = df_persons[df_persons["commune_id"] == origin_id][[
-        "person_id", "home_location", "commute_distance"
-    ]].copy()
+    df_persons = df_persons[df_persons["commune_id"] == origin_id][
+        ["person_id", "home_location", "commute_distance"]
+    ].copy()
     df_candidates = df_candidates[df_candidates["origin_id"] == origin_id]
 
     # From previous step, this should be equal!
@@ -63,22 +72,28 @@ def process_municipality(context, origin_id):
     df_candidates = df_candidates.iloc[indices]
 
     df_candidates["person_id"] = df_persons["person_id"].values
-    df_candidates = df_candidates.rename(columns = dict(destination_id = "commune_id"))
+    df_candidates = df_candidates.rename(columns=dict(destination_id="commune_id"))
 
     return df_candidates[["person_id", "commune_id", "location_id", "geometry"]]
 
+
 def process(context, purpose, df_persons, df_candidates):
     unique_ids = df_candidates["origin_id"].unique()
 
     df_result = []
 
-    with context.progress(label = "Distributing %s destinations" % purpose, total = len(df_persons)) as progress:
-        with context.parallel(dict(df_persons = df_persons, df_candidates = df_candidates)) as parallel:
+    with context.progress(
+        label="Distributing %s destinations" % purpose, total=len(df_persons)
+    ) as progress:
+        with context.parallel(
+            dict(df_persons=df_persons, df_candidates=df_candidates)
+        ) as parallel:
             for df_partial in parallel.imap_unordered(process_municipality, unique_ids):
                 df_result.append(df_partial)
 
     return pd.concat(df_result).sort_index()
 
+
 def execute(context):
     data = context.stage("synthesis.population.spatial.primary.candidates")
     df_persons = data["persons"]
@@ -90,38 +105,69 @@ def execute(context):
     # Attach home locations
     df_home = context.stage("synthesis.population.spatial.home.locations")
 
-    df_work = pd.merge(df_work, df_home[["household_id", "geometry"]].rename(columns = {
-        "geometry": "home_location"
-    }), how = "left", on = "household_id")
-
-    df_education = pd.merge(df_education, df_home[["household_id", "geometry"]].rename(columns = {
-        "geometry": "home_location"
-    }), how = "left", on = "household_id")
+    df_work = pd.merge(
+        df_work,
+        df_home[["household_id", "geometry"]].rename(
+            columns={"geometry": "home_location"}
+        ),
+        how="left",
+        on="household_id",
+    )
+
+    df_education = pd.merge(
+        df_education,
+        df_home[["household_id", "geometry"]].rename(
+            columns={"geometry": "home_location"}
+        ),
+        how="left",
+        on="household_id",
+    )
 
     # Attach commute distances
     df_commute_distance = context.stage("synthesis.population.spatial.commute_distance")
 
-    df_work = pd.merge(df_work, df_commute_distance["work"], how = "left", on = "person_id")
-    df_education = pd.merge(df_education, df_commute_distance["education"], how = "left", on = "person_id")
+    df_work = pd.merge(df_work, df_commute_distance["work"], how="left", on="person_id")
+    df_education = pd.merge(
+        df_education, df_commute_distance["education"], how="left", on="person_id"
+    )
 
     # Attach geometry
-    df_locations = context.stage("synthesis.locations.work")[["location_id", "geometry"]]
+    df_locations = context.stage("synthesis.locations.work")[
+        ["location_id", "geometry"]
+    ]
     df_work_candidates = data["work_candidates"]
-    df_work_candidates = pd.merge(df_work_candidates, df_locations, how = "left", on = "location_id")
+    df_work_candidates = pd.merge(
+        df_work_candidates, df_locations, how="left", on="location_id"
+    )
     df_work_candidates = gpd.GeoDataFrame(df_work_candidates)
 
-    df_locations = context.stage("synthesis.locations.education")[["education_type", "location_id", "geometry"]]
+    df_locations = context.stage("synthesis.locations.education")[
+        ["education_type", "location_id", "geometry"]
+    ]
     df_education_candidates = data["education_candidates"]
-    df_education_candidates = pd.merge(df_education_candidates, df_locations, how = "left", on = "location_id")
+    df_education_candidates = pd.merge(
+        df_education_candidates, df_locations, how="left", on="location_id"
+    )
     df_education_candidates = gpd.GeoDataFrame(df_education_candidates)
 
     # Assign destinations
     df_work = process(context, "work", df_work, df_work_candidates)
-    if context.config("education_location_source") == 'bpe':
-        df_education = process(context, "education", df_education, df_education_candidates)
-    else :
+    if context.config("education_location_source") == "bpe":
+        df_education = process(
+            context, "education", df_education, df_education_candidates
+        )
+    else:
         education = []
         for prefix, education_type in EDUCATION_MAPPING.items():
-            education.append(process(context, prefix,df_education[df_education["age_range"]==prefix],df_education_candidates[df_education_candidates["education_type"].isin(education_type)]))
+            education.append(
+                process(
+                    context,
+                    prefix,
+                    df_education[df_education["age_range"] == prefix],
+                    df_education_candidates[
+                        df_education_candidates["education_type"].isin(education_type)
+                    ],
+                )
+            )
         df_education = pd.concat(education).sort_index()
     return df_work, df_education
diff --git a/synthesis/population/spatial/secondary/components.py b/synthesis/population/spatial/secondary/components.py
index d16a8877..49627650 100644
--- a/synthesis/population/spatial/secondary/components.py
+++ b/synthesis/population/spatial/secondary/components.py
@@ -2,9 +2,12 @@
 import sklearn.neighbors
 import numpy as np
 
+
 class CustomDistanceSampler(rda.FeasibleDistanceSampler):
-    def __init__(self, random, distributions, maximum_iterations = 1000):
-        rda.FeasibleDistanceSampler.__init__(self, random = random, maximum_iterations = maximum_iterations)
+    def __init__(self, random, distributions, maximum_iterations=1000):
+        rda.FeasibleDistanceSampler.__init__(
+            self, random=random, maximum_iterations=maximum_iterations
+        )
 
         self.random = random
         self.distributions = distributions
@@ -12,7 +15,9 @@ def __init__(self, random, distributions, maximum_iterations = 1000):
     def sample_distances(self, problem):
         distances = np.zeros((len(problem["modes"])))
 
-        for index, (mode, travel_time) in enumerate(zip(problem["modes"], problem["travel_times"])):
+        for index, (mode, travel_time) in enumerate(
+            zip(problem["modes"], problem["travel_times"])
+        ):
             mode_distribution = self.distributions[mode]
 
             bound_index = np.count_nonzero(travel_time > mode_distribution["bounds"])
@@ -24,6 +29,7 @@ def sample_distances(self, problem):
 
         return distances
 
+
 class CandidateIndex:
     def __init__(self, data):
         self.data = data
@@ -34,7 +40,9 @@ def __init__(self, data):
             self.indices[purpose] = sklearn.neighbors.KDTree(data["locations"])
 
     def query(self, purpose, location):
-        index = self.indices[purpose].query(location.reshape(1, -1), return_distance = False)[0][0]
+        index = self.indices[purpose].query(
+            location.reshape(1, -1), return_distance=False
+        )[0][0]
         identifier = self.data[purpose]["identifiers"][index]
         location = self.data[purpose]["locations"][index]
         return identifier, location
@@ -45,6 +53,7 @@ def sample(self, purpose, random):
         location = self.data[purpose]["locations"][index]
         return identifier, location
 
+
 class CustomDiscretizationSolver(rda.DiscretizationSolver):
     def __init__(self, index):
         self.index = index
@@ -62,9 +71,12 @@ def solve(self, problem, locations):
         assert len(discretized_locations) == problem["size"]
 
         return dict(
-            valid = True, locations = np.vstack(discretized_locations), identifiers = discretized_identifiers
+            valid=True,
+            locations=np.vstack(discretized_locations),
+            identifiers=discretized_identifiers,
         )
 
+
 class CustomFreeChainSolver(rda.RelaxationSolver):
     def __init__(self, random, index):
         self.random = random
@@ -76,4 +88,4 @@ def solve(self, problem, distances):
         locations = np.vstack((anchor, locations))
 
         assert len(locations) == len(distances) + 1
-        return dict(valid = True, locations = locations)
+        return dict(valid=True, locations=locations)
diff --git a/synthesis/population/spatial/secondary/distance_distributions.py b/synthesis/population/spatial/secondary/distance_distributions.py
index 7fb7273b..fbf31424 100644
--- a/synthesis/population/spatial/secondary/distance_distributions.py
+++ b/synthesis/population/spatial/secondary/distance_distributions.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pandas as pd
 
+
 def configure(context):
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def calculate_bounds(values, bin_size):
     values = np.sort(values)
@@ -26,25 +28,44 @@ def calculate_bounds(values, bin_size):
         bounds[-1] = np.inf
     else:
         bounds.append(np.inf)
-        
+
     return bounds
 
+
 def execute(context):
     # Prepare data
     df_households, df_persons, df_trips = context.stage("hts")
 
-    df_trips = pd.merge(df_trips, df_persons[["person_id", "person_weight"]].rename(columns = { "person_weight": "weight" }))
+    df_trips = pd.merge(
+        df_trips,
+        df_persons[["person_id", "person_weight"]].rename(
+            columns={"person_weight": "weight"}
+        ),
+    )
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
 
-    distance_column = "euclidean_distance" if "euclidean_distance" in df_trips else "routed_distance"
-    df = df_trips[["mode", "travel_time", distance_column, "weight", "preceding_purpose", "following_purpose"]].rename(columns = { distance_column: "distance" })
+    distance_column = (
+        "euclidean_distance" if "euclidean_distance" in df_trips else "routed_distance"
+    )
+    df = df_trips[
+        [
+            "mode",
+            "travel_time",
+            distance_column,
+            "weight",
+            "preceding_purpose",
+            "following_purpose",
+        ]
+    ].rename(columns={distance_column: "distance"})
 
     # Filtering
     primary_activities = ["home", "work", "education"]
-    df = df[~(
-        df["preceding_purpose"].isin(primary_activities) &
-        df["following_purpose"].isin(primary_activities)
-    )]
+    df = df[
+        ~(
+            df["preceding_purpose"].isin(primary_activities)
+            & df["following_purpose"].isin(primary_activities)
+        )
+    ]
 
     # Calculate distributions
     modes = df["mode"].unique()
@@ -57,11 +78,13 @@ def execute(context):
         f_mode = df["mode"] == mode
         bounds = calculate_bounds(df[f_mode]["travel_time"].values, bin_size)
 
-        distributions[mode] = dict(bounds = np.array(bounds), distributions = [])
+        distributions[mode] = dict(bounds=np.array(bounds), distributions=[])
 
         # Second, calculate distribution per band
         for lower_bound, upper_bound in zip([-np.inf] + bounds[:-1], bounds):
-            f_bound = (df["travel_time"] > lower_bound) & (df["travel_time"] <= upper_bound)
+            f_bound = (df["travel_time"] > lower_bound) & (
+                df["travel_time"] <= upper_bound
+            )
 
             # Set up distribution
             values = df[f_mode & f_bound]["distance"].values
@@ -75,6 +98,8 @@ def execute(context):
             cdf /= cdf[-1]
 
             # Write distribution
-            distributions[mode]["distributions"].append(dict(cdf = cdf, values = values, weights = weights))
+            distributions[mode]["distributions"].append(
+                dict(cdf=cdf, values=values, weights=weights)
+            )
 
     return distributions
diff --git a/synthesis/population/spatial/secondary/locations.py b/synthesis/population/spatial/secondary/locations.py
index b36fb214..f6871e79 100644
--- a/synthesis/population/spatial/secondary/locations.py
+++ b/synthesis/population/spatial/secondary/locations.py
@@ -6,6 +6,7 @@
 
 from synthesis.population.spatial.secondary.problems import find_assignment_problems
 
+
 def configure(context):
     context.stage("synthesis.population.trips")
 
@@ -21,72 +22,113 @@ def configure(context):
 
     context.config("secloc_maximum_iterations", np.inf)
 
+
 def prepare_locations(context):
     # Load persons and their primary locations
     df_home = context.stage("synthesis.population.spatial.home.locations")
-    df_work, df_education = context.stage("synthesis.population.spatial.primary.locations")
+    df_work, df_education = context.stage(
+        "synthesis.population.spatial.primary.locations"
+    )
     crs = df_home.crs
 
-    df_home = df_home.rename(columns = { "geometry": "home" })
-    df_work = df_work.rename(columns = { "geometry": "work" })
-    df_education = df_education.rename(columns = { "geometry": "education" })
+    df_home = df_home.rename(columns={"geometry": "home"})
+    df_work = df_work.rename(columns={"geometry": "work"})
+    df_education = df_education.rename(columns={"geometry": "education"})
+
+    df_locations = context.stage("synthesis.population.sampled")[
+        ["person_id", "household_id"]
+    ]
+    df_locations = pd.merge(
+        df_locations, df_home[["household_id", "home"]], how="left", on="household_id"
+    )
+    df_locations = pd.merge(
+        df_locations, df_work[["person_id", "work"]], how="left", on="person_id"
+    )
+    df_locations = pd.merge(
+        df_locations,
+        df_education[["person_id", "education"]],
+        how="left",
+        on="person_id",
+    )
 
-    df_locations = context.stage("synthesis.population.sampled")[["person_id", "household_id"]]
-    df_locations = pd.merge(df_locations, df_home[["household_id", "home"]], how = "left", on = "household_id")
-    df_locations = pd.merge(df_locations, df_work[["person_id", "work"]], how = "left", on = "person_id")
-    df_locations = pd.merge(df_locations, df_education[["person_id", "education"]], how = "left", on = "person_id")
+    return (
+        df_locations[["person_id", "home", "work", "education"]].sort_values(
+            by="person_id"
+        ),
+        crs,
+    )
 
-    return df_locations[["person_id", "home", "work", "education"]].sort_values(by = "person_id"), crs
 
 def prepare_destinations(context):
     df_locations = context.stage("synthesis.locations.secondary")
 
     identifiers = df_locations["location_id"].values
-    locations = np.vstack(df_locations["geometry"].apply(lambda x: np.array([x.x, x.y])).values)
+    locations = np.vstack(
+        df_locations["geometry"].apply(lambda x: np.array([x.x, x.y])).values
+    )
 
     data = {}
 
     for purpose in ("shop", "leisure", "other"):
         f = df_locations["offers_%s" % purpose].values
 
-        data[purpose] = dict(
-            identifiers = identifiers[f],
-            locations = locations[f]
-        )
+        data[purpose] = dict(identifiers=identifiers[f], locations=locations[f])
 
     return data
 
+
 def resample_cdf(cdf, factor):
     if factor >= 0.0:
         cdf = cdf * (1.0 + factor * np.arange(1, len(cdf) + 1) / len(cdf))
     else:
-        cdf = cdf * (1.0 + abs(factor) - abs(factor) * np.arange(1, len(cdf) + 1) / len(cdf))
+        cdf = cdf * (
+            1.0 + abs(factor) - abs(factor) * np.arange(1, len(cdf) + 1) / len(cdf)
+        )
 
     cdf /= cdf[-1]
     return cdf
 
+
 def resample_distributions(distributions, factors):
     for mode, mode_distributions in distributions.items():
         for distribution in mode_distributions["distributions"]:
             distribution["cdf"] = resample_cdf(distribution["cdf"], factors[mode])
 
-from synthesis.population.spatial.secondary.rda import AssignmentSolver, DiscretizationErrorObjective, GravityChainSolver, AngularTailSolver, GeneralRelaxationSolver
-from synthesis.population.spatial.secondary.components import CustomDistanceSampler, CustomDiscretizationSolver, CandidateIndex, CustomFreeChainSolver
+
+from synthesis.population.spatial.secondary.rda import (
+    AssignmentSolver,
+    DiscretizationErrorObjective,
+    GravityChainSolver,
+    AngularTailSolver,
+    GeneralRelaxationSolver,
+)
+from synthesis.population.spatial.secondary.components import (
+    CustomDistanceSampler,
+    CustomDiscretizationSolver,
+    CandidateIndex,
+    CustomFreeChainSolver,
+)
+
 
 def execute(context):
     # Load trips and primary locations
-    df_trips = context.stage("synthesis.population.trips").sort_values(by = ["person_id", "trip_index"])
+    df_trips = context.stage("synthesis.population.trips").sort_values(
+        by=["person_id", "trip_index"]
+    )
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
     df_primary, crs = prepare_locations(context)
 
     # Prepare data
-    distance_distributions = context.stage("synthesis.population.spatial.secondary.distance_distributions")
+    distance_distributions = context.stage(
+        "synthesis.population.spatial.secondary.distance_distributions"
+    )
     destinations = prepare_destinations(context)
 
     # Resampling for calibration
-    resample_distributions(distance_distributions, dict(
-        car = 0.0, car_passenger = 0.1, pt = 0.5, bike = 0.0, walk = -0.5
-    ))
+    resample_distributions(
+        distance_distributions,
+        dict(car=0.0, car_passenger=0.1, pt=0.5, bike=0.0, walk=-0.5),
+    )
 
     # Segment into subsamples
     processes = context.config("processes")
@@ -96,108 +138,133 @@ def execute(context):
     unique_person_ids = np.array_split(unique_person_ids, processes)
 
     random = np.random.RandomState(context.config("random_seed"))
-    random_seeds = random.randint(10000, size = processes)
+    random_seeds = random.randint(10000, size=processes)
 
     # Create batch problems for parallelization
     batches = []
 
     for index in range(processes):
-        batches.append((
-            df_trips[df_trips["person_id"].isin(unique_person_ids[index])],
-            df_primary[df_primary["person_id"].isin(unique_person_ids[index])],
-            random_seeds[index], crs
-        ))
+        batches.append(
+            (
+                df_trips[df_trips["person_id"].isin(unique_person_ids[index])],
+                df_primary[df_primary["person_id"].isin(unique_person_ids[index])],
+                random_seeds[index],
+                crs,
+            )
+        )
 
     # Run algorithm in parallel
-    with context.progress(label = "Assigning secondary locations to persons", total = number_of_persons):
-        with context.parallel(processes = processes, data = dict(
-            distance_distributions = distance_distributions,
-            destinations = destinations
-        )) as parallel:
+    with context.progress(
+        label="Assigning secondary locations to persons", total=number_of_persons
+    ):
+        with context.parallel(
+            processes=processes,
+            data=dict(
+                distance_distributions=distance_distributions, destinations=destinations
+            ),
+        ) as parallel:
             df_locations, df_convergence = [], []
 
-            for df_locations_item, df_convergence_item in parallel.imap_unordered(process, batches):
+            for df_locations_item, df_convergence_item in parallel.imap_unordered(
+                process, batches
+            ):
                 df_locations.append(df_locations_item)
                 df_convergence.append(df_convergence_item)
 
-    df_locations = pd.concat(df_locations).sort_values(by = ["person_id", "activity_index"])
+    df_locations = pd.concat(df_locations).sort_values(
+        by=["person_id", "activity_index"]
+    )
     df_convergence = pd.concat(df_convergence)
 
     print("Success rate:", df_convergence["valid"].mean())
 
     return df_locations, df_convergence
 
+
 def process(context, arguments):
-  df_trips, df_primary, random_seed, crs = arguments
-
-  # Set up RNG
-  random = np.random.RandomState(random_seed)
-  maximum_iterations = context.config("secloc_maximum_iterations")
-
-  # Set up discretization solver
-  destinations = context.data("destinations")
-  candidate_index = CandidateIndex(destinations)
-  discretization_solver = CustomDiscretizationSolver(candidate_index)
-
-  # Set up distance sampler
-  distance_distributions = context.data("distance_distributions")
-  distance_sampler = CustomDistanceSampler(
-        maximum_iterations = min(1000, maximum_iterations),
-        random = random,
-        distributions = distance_distributions)
-
-  # Set up relaxation solver; currently, we do not consider tail problems.
-  chain_solver = GravityChainSolver(
-    random = random, eps = 10.0, lateral_deviation = 10.0, alpha = 0.1,
-    maximum_iterations = min(1000, maximum_iterations)
+    df_trips, df_primary, random_seed, crs = arguments
+
+    # Set up RNG
+    random = np.random.RandomState(random_seed)
+    maximum_iterations = context.config("secloc_maximum_iterations")
+
+    # Set up discretization solver
+    destinations = context.data("destinations")
+    candidate_index = CandidateIndex(destinations)
+    discretization_solver = CustomDiscretizationSolver(candidate_index)
+
+    # Set up distance sampler
+    distance_distributions = context.data("distance_distributions")
+    distance_sampler = CustomDistanceSampler(
+        maximum_iterations=min(1000, maximum_iterations),
+        random=random,
+        distributions=distance_distributions,
     )
 
-  tail_solver = AngularTailSolver(random = random)
-  free_solver = CustomFreeChainSolver(random, candidate_index)
+    # Set up relaxation solver; currently, we do not consider tail problems.
+    chain_solver = GravityChainSolver(
+        random=random,
+        eps=10.0,
+        lateral_deviation=10.0,
+        alpha=0.1,
+        maximum_iterations=min(1000, maximum_iterations),
+    )
+
+    tail_solver = AngularTailSolver(random=random)
+    free_solver = CustomFreeChainSolver(random, candidate_index)
 
-  relaxation_solver = GeneralRelaxationSolver(chain_solver, tail_solver, free_solver)
+    relaxation_solver = GeneralRelaxationSolver(chain_solver, tail_solver, free_solver)
 
-  # Set up assignment solver
-  thresholds = dict(
-    car = 200.0, car_passenger = 200.0, pt = 200.0,
-    bike = 100.0, walk = 100.0
-  )
+    # Set up assignment solver
+    thresholds = dict(car=200.0, car_passenger=200.0, pt=200.0, bike=100.0, walk=100.0)
 
-  assignment_objective = DiscretizationErrorObjective(thresholds = thresholds)
-  assignment_solver = AssignmentSolver(
-      distance_sampler = distance_sampler,
-      relaxation_solver = relaxation_solver,
-      discretization_solver = discretization_solver,
-      objective = assignment_objective,
-      maximum_iterations = min(20, maximum_iterations)
-      )
+    assignment_objective = DiscretizationErrorObjective(thresholds=thresholds)
+    assignment_solver = AssignmentSolver(
+        distance_sampler=distance_sampler,
+        relaxation_solver=relaxation_solver,
+        discretization_solver=discretization_solver,
+        objective=assignment_objective,
+        maximum_iterations=min(20, maximum_iterations),
+    )
 
-  df_locations = []
-  df_convergence = []
+    df_locations = []
+    df_convergence = []
 
-  last_person_id = None
+    last_person_id = None
 
-  for problem in find_assignment_problems(df_trips, df_primary):
-      result = assignment_solver.solve(problem)
+    for problem in find_assignment_problems(df_trips, df_primary):
+        result = assignment_solver.solve(problem)
 
-      starting_activity_index = problem["activity_index"]
+        starting_activity_index = problem["activity_index"]
 
-      for index, (identifier, location) in enumerate(zip(result["discretization"]["identifiers"], result["discretization"]["locations"])):
-          df_locations.append((
-              problem["person_id"], starting_activity_index + index, identifier, geo.Point(location)
-          ))
+        for index, (identifier, location) in enumerate(
+            zip(
+                result["discretization"]["identifiers"],
+                result["discretization"]["locations"],
+            )
+        ):
+            df_locations.append(
+                (
+                    problem["person_id"],
+                    starting_activity_index + index,
+                    identifier,
+                    geo.Point(location),
+                )
+            )
 
-      df_convergence.append((
-          result["valid"], problem["size"]
-      ))
+        df_convergence.append((result["valid"], problem["size"]))
 
-      if problem["person_id"] != last_person_id:
-          last_person_id = problem["person_id"]
-          context.progress.update()
+        if problem["person_id"] != last_person_id:
+            last_person_id = problem["person_id"]
+            context.progress.update()
 
-  df_locations = pd.DataFrame.from_records(df_locations, columns = ["person_id", "activity_index", "location_id", "geometry"])
-  df_locations = gpd.GeoDataFrame(df_locations, crs = crs)
-  assert not df_locations["geometry"].isna().any()
+    df_locations = pd.DataFrame.from_records(
+        df_locations, columns=["person_id", "activity_index", "location_id", "geometry"]
+    )
+    df_locations = gpd.GeoDataFrame(df_locations, crs=crs)
+    assert not df_locations["geometry"].isna().any()
 
-  df_convergence = pd.DataFrame.from_records(df_convergence, columns = ["valid", "size"])
-  return df_locations, df_convergence
+    df_convergence = pd.DataFrame.from_records(
+        df_convergence, columns=["valid", "size"]
+    )
+    return df_locations, df_convergence
diff --git a/synthesis/population/spatial/secondary/problems.py b/synthesis/population/spatial/secondary/problems.py
index b4f7295e..ae60266c 100644
--- a/synthesis/population/spatial/secondary/problems.py
+++ b/synthesis/population/spatial/secondary/problems.py
@@ -1,14 +1,29 @@
 import numpy as np
 import pandas as pd
 
-FIELDS = ["person_id", "trip_index", "preceding_purpose", "following_purpose", "mode", "travel_time"]
+FIELDS = [
+    "person_id",
+    "trip_index",
+    "preceding_purpose",
+    "following_purpose",
+    "mode",
+    "travel_time",
+]
 FIXED_PURPOSES = ["home", "work", "education"]
 
+
 def find_bare_assignment_problems(df):
     problem = None
 
-    for row in df[FIELDS].itertuples(index = False):
-        person_id, trip_index, preceding_purpose, following_purpose, mode, travel_time = row
+    for row in df[FIELDS].itertuples(index=False):
+        (
+            person_id,
+            trip_index,
+            preceding_purpose,
+            following_purpose,
+            mode,
+            travel_time,
+        ) = row
 
         if not problem is None and person_id != problem["person_id"]:
             # We switch person, but we're still tracking a problem. This is a tail!
@@ -18,8 +33,11 @@ def find_bare_assignment_problems(df):
         if problem is None:
             # Start a new problem
             problem = dict(
-                person_id = person_id, trip_index = trip_index, purposes = [preceding_purpose],
-                modes = [], travel_times = []
+                person_id=person_id,
+                trip_index=trip_index,
+                purposes=[preceding_purpose],
+                modes=[],
+                travel_times=[],
             )
 
         problem["purposes"].append(following_purpose)
@@ -34,16 +52,18 @@ def find_bare_assignment_problems(df):
     if not problem is None:
         yield problem
 
+
 LOCATION_FIELDS = ["person_id", "home", "work", "education"]
 
+
 def find_assignment_problems(df, df_locations):
     """
-        Enriches assignment problems with:
-          - Locations of the fixed activities
-          - Size of the problem
-          - Reduces purposes to the variable ones
+    Enriches assignment problems with:
+      - Locations of the fixed activities
+      - Size of the problem
+      - Reduces purposes to the variable ones
     """
-    location_iterator = df_locations[LOCATION_FIELDS].itertuples(index = False)
+    location_iterator = df_locations[LOCATION_FIELDS].itertuples(index=False)
     current_location = None
 
     for problem in find_bare_assignment_problems(df):
@@ -61,13 +81,13 @@ def find_assignment_problems(df, df_locations):
             problem["purposes"] = problem["purposes"][:-1]
 
         else:
-            pass # Neither chain nor tail
+            pass  # Neither chain nor tail
 
         # Define size
         problem["size"] = len(problem["purposes"])
 
         if problem["size"] == 0:
-            continue # We can skip if there are no variable activities
+            continue  # We can skip if there are no variable activities
 
         # Advance location iterator until we arrive at the current problem's person
         while current_location is None or current_location[0] != problem["person_id"]:
@@ -78,12 +98,18 @@ def find_assignment_problems(df, df_locations):
         problem["destination"] = None
 
         if origin_purpose in FIXED_PURPOSES:
-            problem["origin"] = current_location[LOCATION_FIELDS.index(origin_purpose)] # Shapely POINT
+            problem["origin"] = current_location[
+                LOCATION_FIELDS.index(origin_purpose)
+            ]  # Shapely POINT
             problem["origin"] = np.array([[problem["origin"].x, problem["origin"].y]])
 
         if destination_purpose in FIXED_PURPOSES:
-            problem["destination"] = current_location[LOCATION_FIELDS.index(destination_purpose)] # Shapely POINT
-            problem["destination"] = np.array([[problem["destination"].x, problem["destination"].y]])
+            problem["destination"] = current_location[
+                LOCATION_FIELDS.index(destination_purpose)
+            ]  # Shapely POINT
+            problem["destination"] = np.array(
+                [[problem["destination"].x, problem["destination"].y]]
+            )
 
         if problem["origin"] is None:
             problem["activity_index"] = problem["trip_index"]
diff --git a/synthesis/population/spatial/secondary/rda.py b/synthesis/population/spatial/secondary/rda.py
index 232d1c86..3eaddc1c 100644
--- a/synthesis/population/spatial/secondary/rda.py
+++ b/synthesis/population/spatial/secondary/rda.py
@@ -1,10 +1,15 @@
 import numpy as np
 import numpy.linalg as la
 
-def check_feasibility(distances, direct_distance, consider_total_distance = True):
-    return calculate_feasibility(distances, direct_distance, consider_total_distance) == 0.0
 
-def calculate_feasibility(distances, direct_distance, consider_total_distance = True):
+def check_feasibility(distances, direct_distance, consider_total_distance=True):
+    return (
+        calculate_feasibility(distances, direct_distance, consider_total_distance)
+        == 0.0
+    )
+
+
+def calculate_feasibility(distances, direct_distance, consider_total_distance=True):
     total_distance = np.sum(distances)
     delta_distance = 0.0
 
@@ -16,24 +21,38 @@ def calculate_feasibility(distances, direct_distance, consider_total_distance =
 
     return float(max(delta, 0))
 
+
 class DiscretizationSolver:
     def solve(self, problem, locations):
         raise NotImplementedError()
 
+
 class RelaxationSolver:
     def solve(self, problem, distances):
         raise NotImplementedError()
 
+
 class DistanceSampler:
     def sample(self, problem):
         raise NotImplementedError()
 
+
 class AssignmentObjective:
-    def evaluate(self, problem, distance_result, relaxation_result, discretization_result):
+    def evaluate(
+        self, problem, distance_result, relaxation_result, discretization_result
+    ):
         raise NotImplementedError()
 
+
 class AssignmentSolver:
-    def __init__(self, distance_sampler, relaxation_solver, discretization_solver, objective, maximum_iterations = 1000):
+    def __init__(
+        self,
+        distance_sampler,
+        relaxation_solver,
+        discretization_solver,
+        objective,
+        maximum_iterations=1000,
+    ):
         self.maximum_iterations = maximum_iterations
 
         self.relaxation_solver = relaxation_solver
@@ -47,12 +66,21 @@ def solve(self, problem):
         for assignment_iteration in range(self.maximum_iterations):
             distance_result = self.distance_sampler.sample(problem)
 
-            relaxation_result = self.relaxation_solver.solve(problem, distance_result["distances"])
-            discretization_result = self.discretization_solver.solve(problem, relaxation_result["locations"])
+            relaxation_result = self.relaxation_solver.solve(
+                problem, distance_result["distances"]
+            )
+            discretization_result = self.discretization_solver.solve(
+                problem, relaxation_result["locations"]
+            )
 
-            assignment_result = self.objective.evaluate(problem, distance_result, relaxation_result, discretization_result)
+            assignment_result = self.objective.evaluate(
+                problem, distance_result, relaxation_result, discretization_result
+            )
 
-            if best_result is None or assignment_result["objective"] < best_result["objective"]:
+            if (
+                best_result is None
+                or assignment_result["objective"] < best_result["objective"]
+            ):
                 best_result = assignment_result
 
                 assignment_result["distance"] = distance_result
@@ -65,8 +93,9 @@ def solve(self, problem):
 
         return best_result
 
+
 class GeneralRelaxationSolver(RelaxationSolver):
-    def __init__(self, chain_solver, tail_solver = None, free_solver = None):
+    def __init__(self, chain_solver, tail_solver=None, free_solver=None):
         self.chain_solver = chain_solver
         self.tail_solver = tail_solver
         self.free_solver = free_solver
@@ -81,6 +110,7 @@ def solve(self, problem, distances):
         else:
             return self.chain_solver.solve(problem, distances)
 
+
 def sample_tail(random, anchor, distances):
     angles = random.random_sample(len(distances)) * 2.0 * np.pi
     offsets = np.vstack([np.cos(angles), np.sin(angles)]).T * distances[:, np.newaxis]
@@ -92,6 +122,7 @@ def sample_tail(random, anchor, distances):
 
     return np.vstack(locations[1:])
 
+
 class AngularTailSolver(RelaxationSolver):
     def __init__(self, random):
         self.random = random
@@ -111,26 +142,38 @@ def solve(self, problem, distances):
             raise RuntimeError("Invalid chain for AngularTailSolver")
 
         locations = sample_tail(self.random, anchor, distances)
-        if reverse: locations = locations[::-1,:]
+        if reverse:
+            locations = locations[::-1, :]
 
         assert len(locations) == len(distances)
-        return dict(valid = True, locations = locations)
+        return dict(valid=True, locations=locations)
+
 
 class GravityChainSolver:
-    def __init__(self, random, alpha = 0.3, eps = 1.0, maximum_iterations = 1000, lateral_deviation = None):
+    def __init__(
+        self,
+        random,
+        alpha=0.3,
+        eps=1.0,
+        maximum_iterations=1000,
+        lateral_deviation=None,
+    ):
         self.alpha = 0.3
         self.eps = 1e-2
         self.maximum_iterations = maximum_iterations
         self.random = random
         self.lateral_deviation = lateral_deviation
 
-    def solve_two_points(self, problem, origin, destination, distances, direction, direct_distance):
+    def solve_two_points(
+        self, problem, origin, destination, distances, direction, direct_distance
+    ):
         if direct_distance == 0.0:
             location = origin + direction * distances[0]
 
             return dict(
-                valid = distances[0] == distances[1],
-                locations = location.reshape(-1, 2), iterations = None
+                valid=distances[0] == distances[1],
+                locations=location.reshape(-1, 2),
+                iterations=None,
             )
 
         elif direct_distance > np.sum(distances):
@@ -141,9 +184,7 @@ def solve_two_points(self, problem, origin, destination, distances, direction, d
 
             location = origin + direction * ratio * direct_distance
 
-            return dict(
-                valid = False, locations = location.reshape(-1, 2), iterations = None
-            )
+            return dict(valid=False, locations=location.reshape(-1, 2), iterations=None)
 
         elif direct_distance < np.abs(distances[0] - distances[1]):
             ratio = 1.0
@@ -154,24 +195,24 @@ def solve_two_points(self, problem, origin, destination, distances, direction, d
             maximum_distance = max(distances)
             location = origin + direction * ratio * maximum_distance
 
-            return dict(
-                valid = False, locations = location.reshape(-1, 2), iterations = None
-            )
+            return dict(valid=False, locations=location.reshape(-1, 2), iterations=None)
 
         else:
-            A = 0.5 * ( distances[0]**2 - distances[1]**2 + direct_distance**2 ) / direct_distance
-            H = np.sqrt(max(0, distances[0]**2 - A**2))
+            A = (
+                0.5
+                * (distances[0] ** 2 - distances[1] ** 2 + direct_distance**2)
+                / direct_distance
+            )
+            H = np.sqrt(max(0, distances[0] ** 2 - A**2))
             r = self.random.random_sample()
 
             center = origin + direction * A
             offset = direction * H
-            offset = np.array([offset[0,1], -offset[0,0]])
+            offset = np.array([offset[0, 1], -offset[0, 0]])
 
             location = center + (1.0 if r < 0.5 else -1.0) * offset
 
-            return dict(
-                valid = True, locations = location.reshape(-1, 2), iterations = None
-            )
+            return dict(valid=True, locations=location.reshape(-1, 2), iterations=None)
 
     def solve(self, problem, distances):
         origin, destination = problem["origin"], problem["destination"]
@@ -182,21 +223,23 @@ def solve(self, problem, distances):
         # Prepare direction and normal direction
         direct_distance = la.norm(destination - origin)
 
-        if direct_distance < 1e-12: # We have a zero direct distance, choose a direction randomly
+        if (
+            direct_distance < 1e-12
+        ):  # We have a zero direct distance, choose a direction randomly
             angle = self.random.random() * np.pi * 2.0
 
-            direction = np.array([
-                np.cos(angle), np.sin(angle)
-            ]).reshape((1, 2))
+            direction = np.array([np.cos(angle), np.sin(angle)]).reshape((1, 2))
 
         else:
             direction = (destination - origin) / direct_distance
 
-        normal = np.array([direction[0,1], -direction[0,0]])
+        normal = np.array([direction[0, 1], -direction[0, 0]])
 
         # If we have only one variable point, take a short cut
         if problem["size"] == 1:
-             return self.solve_two_points(problem, origin, destination, distances, direction, direct_distance)
+            return self.solve_two_points(
+                problem, origin, destination, distances, direction, direct_distance
+            )
 
         # Prepare initial locations
         if np.sum(distances) < 1e-12:
@@ -208,52 +251,76 @@ def solve(self, problem, distances):
         locations = np.vstack([origin, locations, destination])
 
         if not check_feasibility(distances, direct_distance):
-            return dict( # We still return some locations although they may not be perfect
-                valid = False, locations = locations[1:-1], iterations = None
+            return (
+                dict(  # We still return some locations although they may not be perfect
+                    valid=False, locations=locations[1:-1], iterations=None
+                )
             )
 
         # Add lateral devations
-        lateral_deviation = self.lateral_deviation if not self.lateral_deviation is None else max(direct_distance, 1.0)
-        locations[1:-1] += normal * 2.0 * (self.random.normal(size = len(distances) - 1)[:, np.newaxis] - 0.5) * lateral_deviation
+        lateral_deviation = (
+            self.lateral_deviation
+            if not self.lateral_deviation is None
+            else max(direct_distance, 1.0)
+        )
+        locations[1:-1] += (
+            normal
+            * 2.0
+            * (self.random.normal(size=len(distances) - 1)[:, np.newaxis] - 0.5)
+            * lateral_deviation
+        )
 
         # Prepare gravity simulation
         valid = False
 
         origin_weights = np.ones((len(distances) - 1, 2))
-        origin_weights[0,:] = 2.0
+        origin_weights[0, :] = 2.0
 
         destination_weights = np.ones((len(distances) - 1, 2))
-        destination_weights[-1,:] = 2.0
+        destination_weights[-1, :] = 2.0
 
         # Run gravity simulation
         for k in range(self.maximum_iterations):
             directions = locations[:-1] - locations[1:]
-            lengths = la.norm(directions, axis = 1)
+            lengths = la.norm(directions, axis=1)
 
             offset = distances - lengths
             lengths[lengths < 1.0] = 1.0
             directions /= lengths[:, np.newaxis]
 
-            if np.all(np.abs(offset) < self.eps): # Check if we have converged
+            if np.all(np.abs(offset) < self.eps):  # Check if we have converged
                 valid = True
                 break
 
             # Apply adjustment to locations
             adjustment = np.zeros((len(distances) - 1, 2))
-            adjustment -= 0.5 * self.alpha * offset[:-1, np.newaxis] * directions[:-1] * origin_weights
-            adjustment += 0.5 * self.alpha * offset[1:, np.newaxis] * directions[1:] * destination_weights
+            adjustment -= (
+                0.5
+                * self.alpha
+                * offset[:-1, np.newaxis]
+                * directions[:-1]
+                * origin_weights
+            )
+            adjustment += (
+                0.5
+                * self.alpha
+                * offset[1:, np.newaxis]
+                * directions[1:]
+                * destination_weights
+            )
 
             locations[1:-1] += adjustment
 
             if np.isnan(locations).any() or np.isinf(locations).any():
-                raise RuntimeError("NaN/Inf value encountered during gravity simulation")
+                raise RuntimeError(
+                    "NaN/Inf value encountered during gravity simulation"
+                )
+
+        return dict(valid=valid, locations=locations[1:-1], iterations=k)
 
-        return dict(
-            valid = valid, locations = locations[1:-1], iterations = k
-        )
 
 class FeasibleDistanceSampler(DistanceSampler):
-    def __init__(self, random, maximum_iterations = 1000):
+    def __init__(self, random, maximum_iterations=1000):
         self.maximum_iterations = maximum_iterations
         self.random = random
 
@@ -264,26 +331,26 @@ def sample_distances(self, problem):
     def sample(self, problem):
         origin, destination = problem["origin"], problem["destination"]
 
-        if origin is None and destination is None: # This is a free chain
+        if origin is None and destination is None:  # This is a free chain
             distances = self.sample_distances(problem)
-            return dict(valid = True, distances = distances, iterations = None)
+            return dict(valid=True, distances=distances, iterations=None)
 
-        elif origin is None: # This is a left tail
+        elif origin is None:  # This is a left tail
             distances = self.sample_distances(problem)
-            return dict(valid = True, distances = distances, iterations = None)
+            return dict(valid=True, distances=distances, iterations=None)
 
-        elif destination is None: # This is a right tail
+        elif destination is None:  # This is a right tail
             distances = self.sample_distances(problem)
-            return dict(valid = True, distances = distances, iterations = None)
+            return dict(valid=True, distances=distances, iterations=None)
 
-        direct_distance = la.norm(destination - origin, axis = 1)
+        direct_distance = la.norm(destination - origin, axis=1)
 
         # One point and two trips
         if direct_distance < 1e-3 and problem["size"] == 1:
             distances = self.sample_distances(problem)
             distances = np.array([distances[0], distances[0]])
 
-            return dict(valid = True, distances = distances, iterations = None)
+            return dict(valid=True, distances=distances, iterations=None)
 
         # This is the general case
         best_distances = None
@@ -300,32 +367,35 @@ def sample(self, problem):
                 if delta == 0.0:
                     break
 
-        return dict(
-            valid = best_delta == 0.0,
-            distances = best_distances,
-            iterations = k
-        )
+        return dict(valid=best_delta == 0.0, distances=best_distances, iterations=k)
+
 
 class DiscretizationErrorObjective(AssignmentObjective):
     def __init__(self, thresholds):
         self.thresholds = thresholds
 
-    def evaluate(self, problem, distance_result, relaxation_result, discretization_result):
+    def evaluate(
+        self, problem, distance_result, relaxation_result, discretization_result
+    ):
         sampled_distances = distance_result["distances"]
 
         discretized_locations = []
-        if not problem["origin"] is None: discretized_locations.append(problem["origin"])
+        if not problem["origin"] is None:
+            discretized_locations.append(problem["origin"])
         discretized_locations.append(discretization_result["locations"])
-        if not problem["destination"] is None: discretized_locations.append(problem["destination"])
+        if not problem["destination"] is None:
+            discretized_locations.append(problem["destination"])
         discretized_locations = np.vstack(discretized_locations)
 
-        discretized_distances = la.norm(discretized_locations[:-1] - discretized_locations[1:], axis = 1)
+        discretized_distances = la.norm(
+            discretized_locations[:-1] - discretized_locations[1:], axis=1
+        )
         discretization_error = np.abs(sampled_distances - discretized_distances)
 
         objective = 0.0
         for error, mode in zip(discretization_error, problem["modes"]):
             target_error = self.thresholds[mode]
-            excess_error = max(0.0, error - target_error )
+            excess_error = max(0.0, error - target_error)
             objective = max(objective, excess_error)
 
         valid = objective == 0.0
@@ -333,4 +403,4 @@ def evaluate(self, problem, distance_result, relaxation_result, discretization_r
         valid &= relaxation_result["valid"]
         valid &= discretization_result["valid"]
 
-        return dict(valid = valid, objective = objective)
+        return dict(valid=valid, objective=objective)
diff --git a/synthesis/population/trips.py b/synthesis/population/trips.py
index 7a76af96..c4483743 100644
--- a/synthesis/population/trips.py
+++ b/synthesis/population/trips.py
@@ -7,12 +7,14 @@
 This stage duplicates trips and attaches them to the synthetic population.
 """
 
+
 def configure(context):
     context.stage("synthesis.population.matched")
     context.config("random_seed")
 
     hts = context.config("hts")
-    context.stage("data.hts.selected", alias = "hts")
+    context.stage("data.hts.selected", alias="hts")
+
 
 def execute(context):
     # Load data
@@ -20,23 +22,39 @@ def execute(context):
 
     # Duplicate with synthetic persons
     df_matching = context.stage("synthesis.population.matched")
-    df_trips = df_trips.rename(columns = { "person_id": "hts_id" })
-    df_trips = pd.merge(df_matching, df_trips, on = "hts_id")
-    df_trips = df_trips.sort_values(by = ["person_id", "trip_id"])
+    df_trips = df_trips.rename(columns={"person_id": "hts_id"})
+    df_trips = pd.merge(df_matching, df_trips, on="hts_id")
+    df_trips = df_trips.sort_values(by=["person_id", "trip_id"])
 
     # Define trip index
-    df_count = df_trips.groupby("person_id").size().reset_index(name = "count")
-    df_trips["trip_index"] = np.hstack([np.arange(count) for count in df_count["count"].values])
-    df_trips = df_trips.sort_values(by = ["person_id", "trip_index"])
+    df_count = df_trips.groupby("person_id").size().reset_index(name="count")
+    df_trips["trip_index"] = np.hstack(
+        [np.arange(count) for count in df_count["count"].values]
+    )
+    df_trips = df_trips.sort_values(by=["person_id", "trip_index"])
 
     # Diversify departure times
     random = np.random.RandomState(context.config("random_seed"))
-    counts = df_trips[["person_id"]].groupby("person_id").size().reset_index(name = "count")["count"].values
+    counts = (
+        df_trips[["person_id"]]
+        .groupby("person_id")
+        .size()
+        .reset_index(name="count")["count"]
+        .values
+    )
 
-    interval = df_trips[["person_id", "departure_time"]].groupby("person_id").min().reset_index()["departure_time"].values
-    interval = np.minimum(1800.0, interval) # If first departure time is just 5min after midnight, we only add a deviation of 5min
+    interval = (
+        df_trips[["person_id", "departure_time"]]
+        .groupby("person_id")
+        .min()
+        .reset_index()["departure_time"]
+        .values
+    )
+    interval = np.minimum(
+        1800.0, interval
+    )  # If first departure time is just 5min after midnight, we only add a deviation of 5min
 
-    offset = random.random_sample(size = (len(counts), )) * interval * 2.0 - interval
+    offset = random.random_sample(size=(len(counts),)) * interval * 2.0 - interval
     offset = np.repeat(offset, counts)
 
     df_trips["departure_time"] += offset
@@ -47,11 +65,18 @@ def execute(context):
     assert (df_trips["departure_time"] >= 0.0).all()
     assert (df_trips["arrival_time"] >= 0.0).all()
 
-    return df_trips[[
-        "person_id", "trip_index",
-        "departure_time", "arrival_time",
-        "preceding_purpose", "following_purpose",
-        "is_first_trip", "is_last_trip",
-        "trip_duration", "activity_duration",
-        "mode"
-    ]]
+    return df_trips[
+        [
+            "person_id",
+            "trip_index",
+            "departure_time",
+            "arrival_time",
+            "preceding_purpose",
+            "following_purpose",
+            "is_first_trip",
+            "is_last_trip",
+            "trip_duration",
+            "activity_duration",
+            "mode",
+        ]
+    ]
diff --git a/synthesis/vehicles/cars/default.py b/synthesis/vehicles/cars/default.py
index 1bf32836..0ebf92e4 100644
--- a/synthesis/vehicles/cars/default.py
+++ b/synthesis/vehicles/cars/default.py
@@ -5,20 +5,34 @@
 Creates a vehicle fleet based on a default vehicle type
 """
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
 
+
 def execute(context):
     df_persons = context.stage("synthesis.population.enriched")
 
-    df_vehicle_types = pd.DataFrame.from_records([{
-        "type_id": "default_car", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car",
-        "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average",
-    }])
+    df_vehicle_types = pd.DataFrame.from_records(
+        [
+            {
+                "type_id": "default_car",
+                "nb_seats": 4,
+                "length": 5.0,
+                "width": 1.0,
+                "pce": 1.0,
+                "mode": "car",
+                "hbefa_cat": "PASSENGER_CAR",
+                "hbefa_tech": "average",
+                "hbefa_size": "average",
+                "hbefa_emission": "average",
+            }
+        ]
+    )
 
     df_vehicles = df_persons[["person_id"]].copy()
-    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
-    
+    df_vehicles = df_vehicles.rename(columns={"person_id": "owner_id"})
+
     df_vehicles["mode"] = "car"
 
     df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car"
@@ -28,4 +42,4 @@ def execute(context):
     df_vehicles["age"] = 0
     df_vehicles["euro"] = 6
 
-    return df_vehicle_types, df_vehicles
\ No newline at end of file
+    return df_vehicle_types, df_vehicles
diff --git a/synthesis/vehicles/cars/fleet_sampling.py b/synthesis/vehicles/cars/fleet_sampling.py
index dcd20a5a..269a2404 100644
--- a/synthesis/vehicles/cars/fleet_sampling.py
+++ b/synthesis/vehicles/cars/fleet_sampling.py
@@ -7,6 +7,7 @@
 Creates the synthetic vehicle fleet
 """
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
     context.stage("synthesis.population.spatial.home.zones")
@@ -15,21 +16,30 @@ def configure(context):
 
     context.config("vehicles_year", 2021)
 
+
 def _sample_vehicle(context, args):
     vehicle = args
     year = context.config("vehicles_year")
-    df_vehicle_fleet_counts, df_vehicle_age_counts = context.data("fleet"), context.data("age")
+    df_vehicle_fleet_counts, df_vehicle_age_counts = context.data(
+        "fleet"
+    ), context.data("age")
 
     commune_id = vehicle["commune_id"]
 
-    if  commune_id in df_vehicle_fleet_counts["commune_id"].unique():
-        fleet = df_vehicle_fleet_counts.loc[df_vehicle_fleet_counts["commune_id"] == commune_id]
+    if commune_id in df_vehicle_fleet_counts["commune_id"].unique():
+        fleet = df_vehicle_fleet_counts.loc[
+            df_vehicle_fleet_counts["commune_id"] == commune_id
+        ]
         choice = fleet.sample(weights="fleet")
         critair = choice["critair"].values[0]
         technology = choice["technology"].values[0]
 
-        age_mask = (df_vehicle_age_counts["critair"] == critair) & (df_vehicle_age_counts["technology"] == technology)
-        age = df_vehicle_age_counts.loc[age_mask].sample(weights="fleet")["age"].values[0]
+        age_mask = (df_vehicle_age_counts["critair"] == critair) & (
+            df_vehicle_age_counts["technology"] == technology
+        )
+        age = (
+            df_vehicle_age_counts.loc[age_mask].sample(weights="fleet")["age"].values[0]
+        )
     else:
         choice = df_vehicle_age_counts.sample(weights="fleet")
         critair = choice["critair"].values[0]
@@ -53,16 +63,24 @@ def _sample_vehicle(context, args):
     context.progress.update()
     return vehicle
 
+
 def _get_euro_from_critair(vehicle, year):
 
-    critair = vehicle["critair"]  # Crit'air 1, Crit'air 2, ..., Crit'air 5, Crit'air E, Non classée
-    technology = vehicle["technology"]  # Gazole, Essence, Electrique et hydrogène, Essence hybride rechargeable, Gaz, Gazole hybride rechargeable
-    age = vehicle["age"]  # 0 ans, 1 ans, ..., 19 ans, >20 ans
+    critair = vehicle[
+        "critair"
+    ]  # Crit'air 1, Crit'air 2, ..., Crit'air 5, Crit'air E, Non classée
+    technology = vehicle[
+        "technology"
+    ]  # Gazole, Essence, Electrique et hydrogène, Essence hybride rechargeable, Gaz, Gazole hybride rechargeable
+    age = vehicle["age"]  # 0 ans, 1 ans, ..., 19 ans, >20 ans
 
     # we are using the following table : https://www.ecologie.gouv.fr/sites/default/files/Tableau_classification_des_vehicules.pdf
-    age_num = re.findall(r'\d+', age)
+    age_num = re.findall(r"\d+", age)
     if len(age_num) == 0:
-        raise RuntimeError("Badly formatted 'age' variable found for vehicle (id: %s) : %s" % (age, vehicle["vehicle_id"]))
+        raise RuntimeError(
+            "Badly formatted 'age' variable found for vehicle (id: %s) : %s"
+            % (age, vehicle["vehicle_id"])
+        )
 
     birthday = int(year) - int(age_num[0])
 
@@ -92,7 +110,7 @@ def _get_euro_from_critair(vehicle, year):
     if critair == "Crit'air 2" and technology == "Gazole":
         euro = max(euro, 5)  # or 6 in table
     if critair == "Crit'air 3" and technology == "Essence":
-        euro = max(euro, 2) # or 3 in table
+        euro = max(euro, 2)  # or 3 in table
     if critair == "Crit'air 3" and technology == "Gazole":
         euro = max(euro, 4)
     if critair == "Crit'air 4" and technology == "Gazole":
@@ -103,14 +121,15 @@ def _get_euro_from_critair(vehicle, year):
         euro = max(euro, 1)
 
     euro = str(euro)
-    if euro == '6':
+    if euro == "6":
         if 2016 <= birthday < 2019:
-            euro = '6ab'
+            euro = "6ab"
         else:
-            euro = '6c'
+            euro = "6c"
 
     return euro
 
+
 def execute(context):
 
     df_vehicle_types = context.stage("data.vehicles.types")
@@ -118,11 +137,15 @@ def execute(context):
     df_persons = context.stage("synthesis.population.enriched")
     df_homes = context.stage("synthesis.population.spatial.home.zones")
 
-    df_vehicles = pd.merge(df_persons[["household_id", "person_id"]], df_homes[["household_id", "commune_id"]], on = "household_id")
+    df_vehicles = pd.merge(
+        df_persons[["household_id", "person_id"]],
+        df_homes[["household_id", "commune_id"]],
+        on="household_id",
+    )
 
-    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
+    df_vehicles = df_vehicles.rename(columns={"person_id": "owner_id"})
     df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car"
-    df_vehicles = df_vehicles.drop_duplicates("vehicle_id") # is this needed?
+    df_vehicles = df_vehicles.drop_duplicates("vehicle_id")  # is this needed?
     df_vehicles["type_id"] = "default_car"
     df_vehicles["mode"] = "car"
 
@@ -130,11 +153,17 @@ def execute(context):
 
     res = []
 
-    with context.progress(label = "Processing vehicles data ...", total = len(df_vehicles)) as progress:
-        with context.parallel(dict(fleet = df_vehicle_fleet_counts, age = df_vehicle_age_counts)) as parallel:
-            for df_partial in parallel.imap(_sample_vehicle, df_vehicles.to_dict(orient="records")):
+    with context.progress(
+        label="Processing vehicles data ...", total=len(df_vehicles)
+    ) as progress:
+        with context.parallel(
+            dict(fleet=df_vehicle_fleet_counts, age=df_vehicle_age_counts)
+        ) as parallel:
+            for df_partial in parallel.imap(
+                _sample_vehicle, df_vehicles.to_dict(orient="records")
+            ):
                 res.append(df_partial)
 
     df_vehicles = pd.DataFrame.from_dict(res)
 
-    return df_vehicle_types, df_vehicles
\ No newline at end of file
+    return df_vehicle_types, df_vehicles
diff --git a/synthesis/vehicles/passengers/default.py b/synthesis/vehicles/passengers/default.py
index 6916f5bb..4ed9249f 100644
--- a/synthesis/vehicles/passengers/default.py
+++ b/synthesis/vehicles/passengers/default.py
@@ -5,20 +5,34 @@
 Creates a vehicle fleet based on a default vehicle type for the dummy passenger mode
 """
 
+
 def configure(context):
     context.stage("synthesis.population.enriched")
 
+
 def execute(context):
     df_persons = context.stage("synthesis.population.enriched")
 
-    df_vehicle_types = pd.DataFrame.from_records([{
-        "type_id": "default_car_passenger", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car_passenger",
-        "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average",
-    }])
+    df_vehicle_types = pd.DataFrame.from_records(
+        [
+            {
+                "type_id": "default_car_passenger",
+                "nb_seats": 4,
+                "length": 5.0,
+                "width": 1.0,
+                "pce": 1.0,
+                "mode": "car_passenger",
+                "hbefa_cat": "PASSENGER_CAR",
+                "hbefa_tech": "average",
+                "hbefa_size": "average",
+                "hbefa_emission": "average",
+            }
+        ]
+    )
 
     df_vehicles = df_persons[["person_id"]].copy()
-    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
-    
+    df_vehicles = df_vehicles.rename(columns={"person_id": "owner_id"})
+
     df_vehicles["mode"] = "car_passenger"
 
     df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car_passenger"
@@ -28,4 +42,4 @@ def execute(context):
     df_vehicles["age"] = 0
     df_vehicles["euro"] = 6
 
-    return df_vehicle_types, df_vehicles
\ No newline at end of file
+    return df_vehicle_types, df_vehicles
diff --git a/synthesis/vehicles/vehicles.py b/synthesis/vehicles/vehicles.py
index 922cd36c..14cf552b 100644
--- a/synthesis/vehicles/vehicles.py
+++ b/synthesis/vehicles/vehicles.py
@@ -1,20 +1,24 @@
 import pandas as pd
 
+
 def configure(context):
     method = context.config("vehicles_method", "default")
 
     if method == "default":
-        context.stage("synthesis.vehicles.cars.default", alias = "cars")
+        context.stage("synthesis.vehicles.cars.default", alias="cars")
     elif method == "fleet_sample":
-        context.stage("synthesis.vehicles.cars.fleet_sampling", alias = "cars")
+        context.stage("synthesis.vehicles.cars.fleet_sampling", alias="cars")
     else:
         raise RuntimeError("Unknown vehicles generation method : %s" % method)
-    
+
     context.stage("synthesis.vehicles.passengers.default")
 
+
 def execute(context):
     df_car_types, df_cars = context.stage("cars")
-    df_passenger_types, df_passengers = context.stage("synthesis.vehicles.passengers.default")
+    df_passenger_types, df_passengers = context.stage(
+        "synthesis.vehicles.passengers.default"
+    )
 
     df_vehicles = pd.concat([df_cars, df_passengers])
     df_types = pd.concat([df_car_types, df_passenger_types])
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
index e2755d7a..233c7934 100644
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
@@ -4,6 +4,7 @@
 from . import testdata
 import sqlite3
 
+
 def hash_sqlite_db(path):
     """
     Hash SQLite database file from its dump.
@@ -37,6 +38,7 @@ def hash_file(file):
     f.close()
     return hash.hexdigest()
 
+
 def test_determinism(tmpdir):
     data_path = str(tmpdir.mkdir("data"))
     testdata.create(data_path)
@@ -44,51 +46,62 @@ def test_determinism(tmpdir):
     for index in range(2):
         _test_determinism(index, data_path, tmpdir)
 
+
 def _test_determinism(index, data_path, tmpdir):
     print("Running index %d" % index)
 
     cache_path = str(tmpdir.mkdir("cache_%d" % index))
     output_path = str(tmpdir.mkdir("output_%d" % index))
     config = dict(
-        data_path = data_path, output_path = output_path,
-        regions = [10, 11], sampling_rate = 1.0, hts = "entd",
-        random_seed = 1000, processes = 1,
-        secloc_maximum_iterations = 10,
-        maven_skip_tests = True,
-        matching_attributes = [
-            "sex", "any_cars", "age_class", "socioprofessional_class",
-            "income_class", "departement_id"
-        ]
+        data_path=data_path,
+        output_path=output_path,
+        regions=[10, 11],
+        sampling_rate=1.0,
+        hts="entd",
+        random_seed=1000,
+        processes=1,
+        secloc_maximum_iterations=10,
+        maven_skip_tests=True,
+        matching_attributes=[
+            "sex",
+            "any_cars",
+            "age_class",
+            "socioprofessional_class",
+            "income_class",
+            "departement_id",
+        ],
     )
 
     stages = [
-        dict(descriptor = "synthesis.output"),
+        dict(descriptor="synthesis.output"),
     ]
 
-    synpp.run(stages, config, working_directory = cache_path)
+    synpp.run(stages, config, working_directory=cache_path)
 
     REFERENCE_CSV_HASHES = {
-        "ile_de_france_activities.csv":     "53c44fb4026d2037729ee8ff1c8fb93f",
-        "ile_de_france_households.csv":     "ca2a29ef13467326f937638f1ff8be1a",
-        "ile_de_france_persons.csv":        "ddbe9b418c915b14e888b54efbdf9b1e",
-        "ile_de_france_trips.csv":          "6c5f3427e41e683da768eeb53796a806",
-        "ile_de_france_vehicle_types.csv":  "00bee1ea6d7bc9af43ae6c7101dd75da",
-        "ile_de_france_vehicles.csv":       "3567b0f29e51d521b13d91c82c77cecb",
+        "ile_de_france_activities.csv": "53c44fb4026d2037729ee8ff1c8fb93f",
+        "ile_de_france_households.csv": "ca2a29ef13467326f937638f1ff8be1a",
+        "ile_de_france_persons.csv": "ddbe9b418c915b14e888b54efbdf9b1e",
+        "ile_de_france_trips.csv": "6c5f3427e41e683da768eeb53796a806",
+        "ile_de_france_vehicle_types.csv": "00bee1ea6d7bc9af43ae6c7101dd75da",
+        "ile_de_france_vehicles.csv": "3567b0f29e51d521b13d91c82c77cecb",
     }
 
     REFERENCE_GPKG_HASHES = {
-        "ile_de_france_activities.gpkg":    "884eec1fd0c29904284eb4362ff89be1",
-        "ile_de_france_commutes.gpkg":      "5a4180390a69349cc655c07c5671e8d3",
-        "ile_de_france_homes.gpkg":         "a85e973f0e2f51031cd60170d351845e",
-        "ile_de_france_trips.gpkg":         "d0aec4033cfc184bf1b91ae13a537ef8",
+        "ile_de_france_activities.gpkg": "884eec1fd0c29904284eb4362ff89be1",
+        "ile_de_france_commutes.gpkg": "5a4180390a69349cc655c07c5671e8d3",
+        "ile_de_france_homes.gpkg": "a85e973f0e2f51031cd60170d351845e",
+        "ile_de_france_trips.gpkg": "d0aec4033cfc184bf1b91ae13a537ef8",
     }
 
     generated_csv_hashes = {
-        file: hash_file("%s/%s" % (output_path, file)) for file in REFERENCE_CSV_HASHES.keys()
+        file: hash_file("%s/%s" % (output_path, file))
+        for file in REFERENCE_CSV_HASHES.keys()
     }
 
     generated_gpkg_hashes = {
-        file: hash_sqlite_db("%s/%s" % (output_path, file)) for file in REFERENCE_GPKG_HASHES.keys()
+        file: hash_sqlite_db("%s/%s" % (output_path, file))
+        for file in REFERENCE_GPKG_HASHES.keys()
     }
 
     print("Generated CSV hashes: ", generated_csv_hashes)
@@ -100,6 +113,7 @@ def _test_determinism(index, data_path, tmpdir):
     for file in REFERENCE_GPKG_HASHES.keys():
         assert REFERENCE_GPKG_HASHES[file] == generated_gpkg_hashes[file]
 
+
 def test_determinism_matsim(tmpdir):
     data_path = str(tmpdir.mkdir("data"))
     testdata.create(data_path)
@@ -107,36 +121,45 @@ def test_determinism_matsim(tmpdir):
     for index in range(2):
         _test_determinism_matsim(index, data_path, tmpdir)
 
+
 def _test_determinism_matsim(index, data_path, tmpdir):
     print("Running index %d" % index)
 
     cache_path = str(tmpdir.mkdir("cache_%d" % index))
     output_path = str(tmpdir.mkdir("output_%d" % index))
     config = dict(
-        data_path = data_path, output_path = output_path,
-        regions = [10, 11], sampling_rate = 1.0, hts = "entd",
-        random_seed = 1000, processes = 1,
-        secloc_maximum_iterations = 10,
-        maven_skip_tests = True,
-        matching_attributes = [
-            "sex", "any_cars", "age_class", "socioprofessional_class",
-            "income_class", "departement_id"
-        ]
+        data_path=data_path,
+        output_path=output_path,
+        regions=[10, 11],
+        sampling_rate=1.0,
+        hts="entd",
+        random_seed=1000,
+        processes=1,
+        secloc_maximum_iterations=10,
+        maven_skip_tests=True,
+        matching_attributes=[
+            "sex",
+            "any_cars",
+            "age_class",
+            "socioprofessional_class",
+            "income_class",
+            "departement_id",
+        ],
     )
 
     stages = [
-        dict(descriptor = "matsim.output"),
+        dict(descriptor="matsim.output"),
     ]
 
-    synpp.run(stages, config, working_directory = cache_path)
+    synpp.run(stages, config, working_directory=cache_path)
 
     REFERENCE_HASHES = {
-        #"ile_de_france_population.xml.gz":  "e1407f918cb92166ebf46ad769d8d085",
-        #"ile_de_france_network.xml.gz":     "5f10ec295b49d2bb768451c812955794",
-        "ile_de_france_households.xml.gz":  "64a0c9fab72aad51bc6adb926a1c9d44",
-        #"ile_de_france_facilities.xml.gz":  "5ad41afff9ae5c470082510b943e6778",
-        "ile_de_france_config.xml":         "30871dfbbd2b5bf6922be1dfe20ffe73",
-        "ile_de_france_vehicles.xml.gz":    "d7c8d0dba531a21dc83355b2f82778c2"
+        # "ile_de_france_population.xml.gz":  "e1407f918cb92166ebf46ad769d8d085",
+        # "ile_de_france_network.xml.gz":     "5f10ec295b49d2bb768451c812955794",
+        "ile_de_france_households.xml.gz": "64a0c9fab72aad51bc6adb926a1c9d44",
+        # "ile_de_france_facilities.xml.gz":  "5ad41afff9ae5c470082510b943e6778",
+        "ile_de_france_config.xml": "30871dfbbd2b5bf6922be1dfe20ffe73",
+        "ile_de_france_vehicles.xml.gz": "d7c8d0dba531a21dc83355b2f82778c2",
     }
 
     # activities.gpkg, trips.gpkg, meta.json,
@@ -147,7 +170,8 @@ def _test_determinism_matsim(index, data_path, tmpdir):
     # detailed inspection of meta.json would make sense!
 
     generated_hashes = {
-        file: hash_file("%s/%s" % (output_path, file)) for file in REFERENCE_HASHES.keys()
+        file: hash_file("%s/%s" % (output_path, file))
+        for file in REFERENCE_HASHES.keys()
     }
 
     print("Generated hashes: ", generated_hashes)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index d9856f52..384f8242 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -4,6 +4,7 @@
 from . import testdata
 import pandas as pd
 
+
 def test_data(tmpdir):
     data_path = str(tmpdir.mkdir("data"))
     testdata.create(data_path)
@@ -11,50 +12,55 @@ def test_data(tmpdir):
     cache_path = str(tmpdir.mkdir("cache"))
     output_path = str(tmpdir.mkdir("output"))
     config = dict(
-        data_path = data_path, output_path = output_path,
-        regions = [10, 11], hts = "entd")
+        data_path=data_path, output_path=output_path, regions=[10, 11], hts="entd"
+    )
 
     stages = [
-        dict(descriptor = "data.spatial.iris"),
-        dict(descriptor = "data.spatial.codes"),
-        dict(descriptor = "data.spatial.population"),
-        dict(descriptor = "data.bpe.cleaned"),
-        dict(descriptor = "data.income.municipality"),
-        dict(descriptor = "data.hts.entd.cleaned"),
-        dict(descriptor = "data.hts.egt.cleaned"),
-        dict(descriptor = "data.census.cleaned"),
-        dict(descriptor = "data.od.cleaned"),
-        dict(descriptor = "data.hts.output"),
-        dict(descriptor = "data.sirene.output"),
+        dict(descriptor="data.spatial.iris"),
+        dict(descriptor="data.spatial.codes"),
+        dict(descriptor="data.spatial.population"),
+        dict(descriptor="data.bpe.cleaned"),
+        dict(descriptor="data.income.municipality"),
+        dict(descriptor="data.hts.entd.cleaned"),
+        dict(descriptor="data.hts.egt.cleaned"),
+        dict(descriptor="data.census.cleaned"),
+        dict(descriptor="data.od.cleaned"),
+        dict(descriptor="data.hts.output"),
+        dict(descriptor="data.sirene.output"),
     ]
 
-    synpp.run(stages, config, working_directory = cache_path)
+    synpp.run(stages, config, working_directory=cache_path)
 
     assert os.path.isfile("%s/ile_de_france_hts_households.csv" % output_path)
     assert os.path.isfile("%s/ile_de_france_hts_persons.csv" % output_path)
     assert os.path.isfile("%s/ile_de_france_hts_trips.csv" % output_path)
     assert os.path.isfile("%s/ile_de_france_sirene.gpkg" % output_path)
 
-def run_population(tmpdir, hts, update = {}):
+
+def run_population(tmpdir, hts, update={}):
     data_path = str(tmpdir.mkdir("data"))
     testdata.create(data_path)
 
     cache_path = str(tmpdir.mkdir("cache"))
     output_path = str(tmpdir.mkdir("output"))
     config = dict(
-        data_path = data_path, output_path = output_path,
-        regions = [10, 11], sampling_rate = 1.0, hts = hts,
-        random_seed = 1000, processes = 1,
-        secloc_maximum_iterations = 10,
-        maven_skip_tests = True
+        data_path=data_path,
+        output_path=output_path,
+        regions=[10, 11],
+        sampling_rate=1.0,
+        hts=hts,
+        random_seed=1000,
+        processes=1,
+        secloc_maximum_iterations=10,
+        maven_skip_tests=True,
     )
     config.update(update)
 
     stages = [
-        dict(descriptor = "synthesis.output"),
+        dict(descriptor="synthesis.output"),
     ]
 
-    synpp.run(stages, config, working_directory = cache_path)
+    synpp.run(stages, config, working_directory=cache_path)
 
     assert os.path.isfile("%s/ile_de_france_activities.csv" % output_path)
     assert os.path.isfile("%s/ile_de_france_persons.csv" % output_path)
@@ -63,50 +69,94 @@ def run_population(tmpdir, hts, update = {}):
     assert os.path.isfile("%s/ile_de_france_trips.gpkg" % output_path)
     assert os.path.isfile("%s/ile_de_france_meta.json" % output_path)
 
-    assert 2235 == len(pd.read_csv("%s/ile_de_france_activities.csv" % output_path, usecols = ["household_id"], sep = ";"))
-    assert 447 == len(pd.read_csv("%s/ile_de_france_persons.csv" % output_path, usecols = ["household_id"], sep = ";"))
-    assert 149 == len(pd.read_csv("%s/ile_de_france_households.csv" % output_path, usecols = ["household_id"], sep = ";"))
-    
-    assert 447 * 2 == len(pd.read_csv("%s/ile_de_france_vehicles.csv" % output_path, usecols = ["vehicle_id"], sep = ";"))
+    assert 2235 == len(
+        pd.read_csv(
+            "%s/ile_de_france_activities.csv" % output_path,
+            usecols=["household_id"],
+            sep=";",
+        )
+    )
+    assert 447 == len(
+        pd.read_csv(
+            "%s/ile_de_france_persons.csv" % output_path,
+            usecols=["household_id"],
+            sep=";",
+        )
+    )
+    assert 149 == len(
+        pd.read_csv(
+            "%s/ile_de_france_households.csv" % output_path,
+            usecols=["household_id"],
+            sep=";",
+        )
+    )
+
+    assert 447 * 2 == len(
+        pd.read_csv(
+            "%s/ile_de_france_vehicles.csv" % output_path,
+            usecols=["vehicle_id"],
+            sep=";",
+        )
+    )
     if "vehicles_method" in update and update["vehicles_method"] == "fleet_sample":
-        assert 17 + 1 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";"))
+        assert 17 + 1 == len(
+            pd.read_csv(
+                "%s/ile_de_france_vehicle_types.csv" % output_path,
+                usecols=["type_id"],
+                sep=";",
+            )
+        )
     else:
-        assert 2 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";"))
+        assert 2 == len(
+            pd.read_csv(
+                "%s/ile_de_france_vehicle_types.csv" % output_path,
+                usecols=["type_id"],
+                sep=";",
+            )
+        )
+
 
 def test_population_with_entd(tmpdir):
     run_population(tmpdir, "entd")
 
+
 def test_population_with_egt(tmpdir):
     run_population(tmpdir, "egt")
 
+
 def test_population_with_mode_choice(tmpdir):
-    run_population(tmpdir, "entd", { "mode_choice": True })
+    run_population(tmpdir, "entd", {"mode_choice": True})
+
 
 def test_population_with_fleet_sample(tmpdir):
-    run_population(tmpdir, "entd", { 
-        "vehicles_method": "fleet_sample",
-        "vehicles_year": 2021
-    })
+    run_population(
+        tmpdir, "entd", {"vehicles_method": "fleet_sample", "vehicles_year": 2021}
+    )
+
 
 def test_population_with_bhepop2_income(tmpdir):
-    run_population(tmpdir, "egt", { 
-        "income_assignation_method": "bhepop2"
-    })
+    run_population(tmpdir, "egt", {"income_assignation_method": "bhepop2"})
+
 
 def test_population_with_urban_type(tmpdir):
-    run_population(tmpdir, "entd", { 
-        "use_urban_type": True, 
-        "matching_attributes": [
-            "urban_type", "*default*"
-        ],
-        "matching_minimum_observations": 5
-    })
+    run_population(
+        tmpdir,
+        "entd",
+        {
+            "use_urban_type": True,
+            "matching_attributes": ["urban_type", "*default*"],
+            "matching_minimum_observations": 5,
+        },
+    )
+
 
 def test_population_with_urban_type_and_egt(tmpdir):
-    run_population(tmpdir, "egt", { 
-        "use_urban_type": True, 
-        "matching_attributes": [
-            "urban_type", "*default*"
-        ],
-        "matching_minimum_observations": 5
-    })
+    run_population(
+        tmpdir,
+        "egt",
+        {
+            "use_urban_type": True,
+            "matching_attributes": ["urban_type", "*default*"],
+            "matching_minimum_observations": 5,
+        },
+    )
diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index e31d6be9..baf1a2bc 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -3,6 +3,7 @@
 import hashlib
 from . import testdata
 
+
 def test_simulation(tmpdir):
     data_path = str(tmpdir.mkdir("data"))
     testdata.create(data_path)
@@ -11,18 +12,20 @@ def test_simulation(tmpdir):
     output_path = str(tmpdir.mkdir("output"))
 
     config = dict(
-        data_path = data_path, output_path = output_path,
-        regions = [10, 11], sampling_rate = 1.0, hts = "entd",
-        random_seed = 1000, processes = 1,
-        secloc_maximum_iterations = 10,
-        maven_skip_tests = True
+        data_path=data_path,
+        output_path=output_path,
+        regions=[10, 11],
+        sampling_rate=1.0,
+        hts="entd",
+        random_seed=1000,
+        processes=1,
+        secloc_maximum_iterations=10,
+        maven_skip_tests=True,
     )
 
-    stages = [
-        dict(descriptor = "matsim.output")
-    ]
+    stages = [dict(descriptor="matsim.output")]
 
-    synpp.run(stages, config, working_directory = cache_path)
+    synpp.run(stages, config, working_directory=cache_path)
 
     assert os.path.isfile("%s/ile_de_france_population.xml.gz" % output_path)
     assert os.path.isfile("%s/ile_de_france_network.xml.gz" % output_path)
diff --git a/tests/testdata.py b/tests/testdata.py
index 6e75f71d..1ef1fae5 100644
--- a/tests/testdata.py
+++ b/tests/testdata.py
@@ -7,6 +7,7 @@
 import glob
 import subprocess
 
+
 def create(output_path):
     """
     This script creates test fixtures for the Île-de-France / France pipeline.
@@ -76,23 +77,27 @@ def create(output_path):
     print("Creating zoning system ...")
     df = []
 
-    WITH_IRIS = set([
-        "1B013", "1B014", "1B018", "1B019",
-        "2D007", "2D008", "2D012", "2D013"
-    ])
+    WITH_IRIS = set(
+        ["1B013", "1B014", "1B018", "1B019", "2D007", "2D008", "2D012", "2D013"]
+    )
 
     for region_column in np.arange(2):
         region_prefix = region_column + 1
-        region_number = region_prefix * 10 # TODO: This means we will have 10 and 20, but the unit tests define 10 and 11 (so only 10 is used -> TODO)
+        region_number = (
+            region_prefix * 10
+        )  # TODO: This means we will have 10 and 20, but the unit tests define 10 and 11 (so only 10 is used -> TODO)
 
         region_x = anchor_x + region_column * REGION_LENGTH
         region_y = anchor_y + 0
 
         for department_row in np.arange(2):
             for department_column in np.arange(2):
-                department_letter = { (0, 0): "A", (0, 1): "B", (1, 0): "C", (1, 1): "D" }[(
-                    department_row, department_column
-                )]
+                department_letter = {
+                    (0, 0): "A",
+                    (0, 1): "B",
+                    (1, 0): "C",
+                    (1, 1): "D",
+                }[(department_row, department_column)]
 
                 department_name = "%d%s" % (region_prefix, department_letter)
 
@@ -100,13 +105,20 @@ def create(output_path):
                 department_y = region_y - department_row * DEPARTMENT_LENGTH
 
                 for municipality_index in np.arange(25):
-                    municipality_name = "%s%03d" % (department_name, municipality_index + 1)
+                    municipality_name = "%s%03d" % (
+                        department_name,
+                        municipality_index + 1,
+                    )
 
                     municipality_row = municipality_index // 5
                     municipality_column = municipality_index % 5
 
-                    municipality_x = department_x + municipality_column * MUNICIPALITY_LENGTH
-                    municipality_y = department_y - municipality_row * MUNICIPALITY_LENGTH
+                    municipality_x = (
+                        department_x + municipality_column * MUNICIPALITY_LENGTH
+                    )
+                    municipality_y = (
+                        department_y - municipality_row * MUNICIPALITY_LENGTH
+                    )
 
                     if municipality_name in WITH_IRIS:
                         for iris_index in np.arange(100):
@@ -118,48 +130,61 @@ def create(output_path):
                             iris_x = municipality_x + iris_column * IRIS_LENGTH
                             iris_y = municipality_y - iris_row * IRIS_LENGTH
 
-                            iris_polygon = geo.Polygon([
-                                (iris_x, iris_y), (iris_x + IRIS_LENGTH, iris_y),
-                                (iris_x + IRIS_LENGTH, iris_y - IRIS_LENGTH),
-                                (iris_x, iris_y - IRIS_LENGTH)
-                            ])
-
-                            df.append(dict(
-                                region = region_number,
-                                department = department_name,
-                                municipality = municipality_name,
-                                iris = iris_name,
-                                geometry = iris_polygon
-                            ))
+                            iris_polygon = geo.Polygon(
+                                [
+                                    (iris_x, iris_y),
+                                    (iris_x + IRIS_LENGTH, iris_y),
+                                    (iris_x + IRIS_LENGTH, iris_y - IRIS_LENGTH),
+                                    (iris_x, iris_y - IRIS_LENGTH),
+                                ]
+                            )
+
+                            df.append(
+                                dict(
+                                    region=region_number,
+                                    department=department_name,
+                                    municipality=municipality_name,
+                                    iris=iris_name,
+                                    geometry=iris_polygon,
+                                )
+                            )
 
                     else:
-                        municipality_polygon = geo.Polygon([
-                            (municipality_x, municipality_y), (municipality_x + MUNICIPALITY_LENGTH, municipality_y),
-                            (municipality_x + MUNICIPALITY_LENGTH, municipality_y - MUNICIPALITY_LENGTH),
-                            (municipality_x, municipality_y - MUNICIPALITY_LENGTH)
-                        ])
+                        municipality_polygon = geo.Polygon(
+                            [
+                                (municipality_x, municipality_y),
+                                (municipality_x + MUNICIPALITY_LENGTH, municipality_y),
+                                (
+                                    municipality_x + MUNICIPALITY_LENGTH,
+                                    municipality_y - MUNICIPALITY_LENGTH,
+                                ),
+                                (municipality_x, municipality_y - MUNICIPALITY_LENGTH),
+                            ]
+                        )
 
                         iris_name = "%s0000" % municipality_name
 
-                        df.append(dict(
-                            region = region_number,
-                            department = department_name,
-                            municipality = municipality_name,
-                            iris = iris_name,
-                            geometry = municipality_polygon
-                        ))
+                        df.append(
+                            dict(
+                                region=region_number,
+                                department=department_name,
+                                municipality=municipality_name,
+                                iris=iris_name,
+                                geometry=municipality_polygon,
+                            )
+                        )
 
     df = pd.DataFrame.from_records(df)
-    df = gpd.GeoDataFrame(df, crs = "EPSG:2154")
-   
+    df = gpd.GeoDataFrame(df, crs="EPSG:2154")
+
     # Dataset: IRIS zones
     # Required attributes: CODE_IRIS, INSEE_COM, geometry
     print("Creating IRIS zones ...")
 
     df_iris = df.copy()
-    df_iris = df_iris[["iris", "municipality", "geometry"]].rename(columns = dict(
-        iris = "CODE_IRIS", municipality = "INSEE_COM"
-    ))
+    df_iris = df_iris[["iris", "municipality", "geometry"]].rename(
+        columns=dict(iris="CODE_IRIS", municipality="INSEE_COM")
+    )
 
     os.mkdir("%s/iris_2021" % output_path)
     df_iris.to_file("%s/iris_2021/CONTOURS-IRIS.shp" % output_path)
@@ -174,17 +199,20 @@ def create(output_path):
     print("Creating codes ...")
 
     df_codes = df.copy()
-    df_codes = df_codes[["iris", "municipality", "department", "region"]].rename(columns = dict(
-        iris = "CODE_IRIS", municipality = "DEPCOM", department = "DEP", region = "REG"
-    ))
+    df_codes = df_codes[["iris", "municipality", "department", "region"]].rename(
+        columns=dict(
+            iris="CODE_IRIS", municipality="DEPCOM", department="DEP", region="REG"
+        )
+    )
 
     os.mkdir("%s/codes_2021" % output_path)
 
-    with zipfile.ZipFile("%s/codes_2021/reference_IRIS_geo2021.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/codes_2021/reference_IRIS_geo2021.zip" % output_path, "w"
+    ) as archive:
         with archive.open("reference_IRIS_geo2021.xlsx", "w") as f:
             df_codes.to_excel(
-                f, sheet_name = "Emboitements_IRIS",
-                startrow = 5, index = False
+                f, sheet_name="Emboitements_IRIS", startrow=5, index=False
             )
 
     # Dataset: Aggregate census
@@ -192,20 +220,22 @@ def create(output_path):
     print("Creating aggregate census ...")
 
     df_population = df.copy()
-    df_population = df_population[["iris", "municipality", "department", "region"]].rename(columns = dict(
-        iris = "IRIS", municipality = "COM", department = "DEP", region = "REG"
-    ))
+    df_population = df_population[
+        ["iris", "municipality", "department", "region"]
+    ].rename(
+        columns=dict(iris="IRIS", municipality="COM", department="DEP", region="REG")
+    )
 
     # Set all population to fixed number
     df_population["P19_POP"] = 120.0
 
     os.mkdir("%s/rp_2019" % output_path)
 
-    with zipfile.ZipFile("%s/rp_2019/base-ic-evol-struct-pop-2019.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/rp_2019/base-ic-evol-struct-pop-2019.zip" % output_path, "w"
+    ) as archive:
         with archive.open("base-ic-evol-struct-pop-2019.xlsx", "w") as f:
-            df_population.to_excel(
-                f, sheet_name = "IRIS", startrow = 5, index = False
-            )
+            df_population.to_excel(f, sheet_name="IRIS", startrow=5, index=False)
 
     # Dataset: BPE
     # Required attributes: DCIRIS, LAMBERT_X, LAMBERT_Y, TYPEQU, DEPCOM, DEP
@@ -215,13 +245,15 @@ def create(output_path):
     observations = BPE_OBSERVATIONS
     categories = np.array(["A", "B", "C", "D", "E", "F", "G"])
 
-    df_selection = df.iloc[random.randint(0, len(df), size = observations)].copy()
+    df_selection = df.iloc[random.randint(0, len(df), size=observations)].copy()
     df_selection["DCIRIS"] = df_selection["iris"]
     df_selection["DEPCOM"] = df_selection["municipality"]
     df_selection["DEP"] = df_selection["department"]
     df_selection["LAMBERT_X"] = df_selection["geometry"].centroid.x
     df_selection["LAMBERT_Y"] = df_selection["geometry"].centroid.y
-    df_selection["TYPEQU"] = categories[random.randint(0, len(categories), size = len(df_selection))]
+    df_selection["TYPEQU"] = categories[
+        random.randint(0, len(categories), size=len(df_selection))
+    ]
 
     # Deliberately set coordinates for some to NaN
     df_selection.iloc[-10:, df_selection.columns.get_loc("LAMBERT_X")] = np.nan
@@ -231,10 +263,11 @@ def create(output_path):
 
     os.mkdir("%s/bpe_2021" % output_path)
 
-    with zipfile.ZipFile("%s/bpe_2021/bpe21_ensemble_xy_csv.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/bpe_2021/bpe21_ensemble_xy_csv.zip" % output_path, "w"
+    ) as archive:
         with archive.open("bpe21_ensemble_xy.csv", "w") as f:
-            df_selection[columns].to_csv(f,
-                sep = ";", index = False)
+            df_selection[columns].to_csv(f, sep=";", index=False)
 
     # Dataset: Tax data
     # Required attributes: CODGEO, D115, ..., D915
@@ -244,23 +277,80 @@ def create(output_path):
     filosofi_year = "19"
     income_data = {
         "househod_size": [
-            {"name": "1_pers", "sheet": "TAILLEM_1", "col_pattern": "TME1", "data": [9820,13380,15730,18140,20060,22050,24710,28120,34150]},
-            {"name": "2_pers", "sheet": "TAILLEM_2", "col_pattern": "TME2", "data": [12950,16840,19920,22660,25390,28500,32080,37030,45910]},
-            {"name": "3_pers", "sheet": "TAILLEM_3", "col_pattern": "TME3", "data": [11440,14850,18070,21040,23960,27190,30930,36130,45680]},
-            {"name": "4_pers", "sheet": "TAILLEM_4", "col_pattern": "TME4", "data": [11920,15720,19130,22440,25540,28750,32400,37520,46870]},
-            {"name": "5_pers_or_more", "sheet": "TAILLEM_5", "col_pattern": "TME5", "data": [9320,11510,13580,16180,19920,24570,29180,35460,46370]},
+            {
+                "name": "1_pers",
+                "sheet": "TAILLEM_1",
+                "col_pattern": "TME1",
+                "data": [9820, 13380, 15730, 18140, 20060, 22050, 24710, 28120, 34150],
+            },
+            {
+                "name": "2_pers",
+                "sheet": "TAILLEM_2",
+                "col_pattern": "TME2",
+                "data": [12950, 16840, 19920, 22660, 25390, 28500, 32080, 37030, 45910],
+            },
+            {
+                "name": "3_pers",
+                "sheet": "TAILLEM_3",
+                "col_pattern": "TME3",
+                "data": [11440, 14850, 18070, 21040, 23960, 27190, 30930, 36130, 45680],
+            },
+            {
+                "name": "4_pers",
+                "sheet": "TAILLEM_4",
+                "col_pattern": "TME4",
+                "data": [11920, 15720, 19130, 22440, 25540, 28750, 32400, 37520, 46870],
+            },
+            {
+                "name": "5_pers_or_more",
+                "sheet": "TAILLEM_5",
+                "col_pattern": "TME5",
+                "data": [9320, 11510, 13580, 16180, 19920, 24570, 29180, 35460, 46370],
+            },
         ],
         "family_comp": [
-            {"name": "Single_man", "sheet": "TYPMENR_1", "col_pattern": "TYM1", "data": [9180,12830,15100,17740,19800,21890,24780,28290,34850]},
-            {"name": "Single_wom", "sheet": "TYPMENR_2", "col_pattern": "TYM2", "data": [10730,13730,16220,18420,20260,22160,24680,27990,33570]},
-            {"name": "Couple_without_child", "sheet": "TYPMENR_3", "col_pattern": "TYM3", "data": [15360,19560,22600,25260,27990,30980,34710,39640,49110]},
-            {"name": "Couple_with_child", "sheet": "TYPMENR_4", "col_pattern": "TYM4", "data": [11790,15540,19240,22670,25850,29180,33090,38570,48700]},
-            {"name": "Single_parent", "sheet": "TYPMENR_5", "col_pattern": "TYM5", "data": [9350,11150,12830,14660,16640,18760,21230,24700,31170]},
-            {"name": "complex_hh", "sheet": "TYPMENR_6", "col_pattern": "TYM6", "data": [9280,11850,14100,16740,19510,22480,26100,30640,38970]},
-        ]
+            {
+                "name": "Single_man",
+                "sheet": "TYPMENR_1",
+                "col_pattern": "TYM1",
+                "data": [9180, 12830, 15100, 17740, 19800, 21890, 24780, 28290, 34850],
+            },
+            {
+                "name": "Single_wom",
+                "sheet": "TYPMENR_2",
+                "col_pattern": "TYM2",
+                "data": [10730, 13730, 16220, 18420, 20260, 22160, 24680, 27990, 33570],
+            },
+            {
+                "name": "Couple_without_child",
+                "sheet": "TYPMENR_3",
+                "col_pattern": "TYM3",
+                "data": [15360, 19560, 22600, 25260, 27990, 30980, 34710, 39640, 49110],
+            },
+            {
+                "name": "Couple_with_child",
+                "sheet": "TYPMENR_4",
+                "col_pattern": "TYM4",
+                "data": [11790, 15540, 19240, 22670, 25850, 29180, 33090, 38570, 48700],
+            },
+            {
+                "name": "Single_parent",
+                "sheet": "TYPMENR_5",
+                "col_pattern": "TYM5",
+                "data": [9350, 11150, 12830, 14660, 16640, 18760, 21230, 24700, 31170],
+            },
+            {
+                "name": "complex_hh",
+                "sheet": "TYPMENR_6",
+                "col_pattern": "TYM6",
+                "data": [9280, 11850, 14100, 16740, 19510, 22480, 26100, 30640, 38970],
+            },
+        ],
     }
 
-    df_income = df.drop_duplicates("municipality")[["municipality"]].rename(columns = dict(municipality = "CODGEO"))
+    df_income = df.drop_duplicates("municipality")[["municipality"]].rename(
+        columns=dict(municipality="CODGEO")
+    )
 
     df_income_ensemble = df_income.copy()
 
@@ -276,9 +366,9 @@ def create(output_path):
     df_income_ensemble["D919"] = 32303.0
 
     # Deliberately remove some of them
-    df_income_ensemble = df_income_ensemble[~df_income_ensemble["CODGEO"].isin([
-        "1A015", "1A016"
-    ])]
+    df_income_ensemble = df_income_ensemble[
+        ~df_income_ensemble["CODGEO"].isin(["1A015", "1A016"])
+    ]
 
     # Deliberately only provide median for some
     f = df_income_ensemble["CODGEO"].isin(["1D002", "1D005"])
@@ -288,17 +378,25 @@ def create(output_path):
         value["df"] = df_income.copy()
         col_pattern = value["col_pattern"]
         columns = [
-            "%sD%d" % (col_pattern, q) + filosofi_year if q != 5 else col_pattern + "Q2" + filosofi_year
+            (
+                "%sD%d" % (col_pattern, q) + filosofi_year
+                if q != 5
+                else col_pattern + "Q2" + filosofi_year
+            )
             for q in range(1, 10)
         ]
         for i, column in enumerate(columns):
             value["df"][column] = value["data"][i]
-        
+
     for value in income_data["family_comp"]:
         value["df"] = df_income.copy()
         col_pattern = value["col_pattern"]
         columns = [
-            "%sD%d" % (col_pattern, q) + filosofi_year if q != 5 else col_pattern + "Q2" + filosofi_year
+            (
+                "%sD%d" % (col_pattern, q) + filosofi_year
+                if q != 5
+                else col_pattern + "Q2" + filosofi_year
+            )
             for q in range(1, 10)
         ]
         for i, column in enumerate(columns):
@@ -306,30 +404,33 @@ def create(output_path):
 
     os.mkdir("%s/filosofi_2019" % output_path)
 
-    with zipfile.ZipFile("%s/filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip" % output_path,
+        "w",
+    ) as archive:
         with archive.open("FILO2019_DISP_COM.xlsx", "w") as f:
-            with pd.ExcelWriter(f) as writer:  
+            with pd.ExcelWriter(f) as writer:
                 df_income_ensemble.to_excel(
-                    writer, sheet_name = "ENSEMBLE", startrow = 5, index = False
+                    writer, sheet_name="ENSEMBLE", startrow=5, index=False
                 )
                 for value in income_data["househod_size"]:
                     value["df"].to_excel(
-                        writer, sheet_name = value["sheet"], startrow = 5, index = False
+                        writer, sheet_name=value["sheet"], startrow=5, index=False
                     )
                 for value in income_data["family_comp"]:
                     value["df"].to_excel(
-                        writer, sheet_name = value["sheet"], startrow = 5, index = False
+                        writer, sheet_name=value["sheet"], startrow=5, index=False
                     )
 
     # Data set: ENTD
     print("Creating ENTD ...")
 
     data = dict(
-        Q_MENAGE = [],
-        Q_TCM_MENAGE = [],
-        Q_INDIVIDU = [],
-        Q_TCM_INDIVIDU = [],
-        K_DEPLOC = [],
+        Q_MENAGE=[],
+        Q_TCM_MENAGE=[],
+        Q_INDIVIDU=[],
+        Q_TCM_INDIVIDU=[],
+        K_DEPLOC=[],
     )
 
     for household_index in range(HTS_HOUSEHOLDS):
@@ -338,107 +439,182 @@ def create(output_path):
         region = random.choice([10, 20])
         department = "%d%s" % (region // 10, random.choice(["A", "B", "C", "D"]))
 
-        data["Q_MENAGE"].append(dict(
-            DEP = department, idENT_MEN = household_id, PONDV1 = 1.0,
-            RG = region, V1_JNBVELOADT = random.randint(4),
-            V1_JNBVEH = random.randint(3), V1_JNBMOTO = random.randint(2),
-            V1_JNBCYCLO = 0
-        ))
-
-        data["Q_TCM_MENAGE"].append(dict(
-            NPERS = 3, PONDV1 = 1.0, DEP = department,
-            idENT_MEN = household_id, RG = region,
-            TrancheRevenuMensuel = random.choice([
-                "Moins de 400", "De 400", "De 600", "De 800",
-                "De 1 000", "De 1 200", "De 1 500", "De 1800",
-                "De 2 000", "De 2 500", "De 3 000", "De 4 000",
-                "De 6 000", "10 000"
-            ]), numcom_UU2010 = ["B", "C", "I", "R"][household_index % 4]
-        ))
+        data["Q_MENAGE"].append(
+            dict(
+                DEP=department,
+                idENT_MEN=household_id,
+                PONDV1=1.0,
+                RG=region,
+                V1_JNBVELOADT=random.randint(4),
+                V1_JNBVEH=random.randint(3),
+                V1_JNBMOTO=random.randint(2),
+                V1_JNBCYCLO=0,
+            )
+        )
+
+        data["Q_TCM_MENAGE"].append(
+            dict(
+                NPERS=3,
+                PONDV1=1.0,
+                DEP=department,
+                idENT_MEN=household_id,
+                RG=region,
+                TrancheRevenuMensuel=random.choice(
+                    [
+                        "Moins de 400",
+                        "De 400",
+                        "De 600",
+                        "De 800",
+                        "De 1 000",
+                        "De 1 200",
+                        "De 1 500",
+                        "De 1800",
+                        "De 2 000",
+                        "De 2 500",
+                        "De 3 000",
+                        "De 4 000",
+                        "De 6 000",
+                        "10 000",
+                    ]
+                ),
+                numcom_UU2010=["B", "C", "I", "R"][household_index % 4],
+            )
+        )
 
         for person_index in range(HTS_HOUSEHOLD_MEMBERS):
             person_id = household_id * 1000 + person_index
             studies = random.random_sample() < 0.3
 
-            data["Q_INDIVIDU"].append(dict(
-                IDENT_IND = person_id, idENT_MEN = household_id,
-                RG = region,
-                V1_GPERMIS = random.choice([1, 2]), V1_GPERMIS2R = random.choice([1, 2]),
-                V1_ICARTABON = random.choice([1, 2]),
-            ))
-
-            data["Q_TCM_INDIVIDU"].append(dict(
-                AGE = random.randint(90), SEXE = random.choice([1, 2]),
-                CS24 = random.randint(8) * 10, DEP = department,
-                ETUDES = 1 if studies else 2, IDENT_IND = person_id,
-                IDENT_MEN = household_id, PONDV1 = 1.0,
-                SITUA = random.choice([1, 2])
-            ))
-
-            if person_index == 0: # Only one person per household has activity chain
+            data["Q_INDIVIDU"].append(
+                dict(
+                    IDENT_IND=person_id,
+                    idENT_MEN=household_id,
+                    RG=region,
+                    V1_GPERMIS=random.choice([1, 2]),
+                    V1_GPERMIS2R=random.choice([1, 2]),
+                    V1_ICARTABON=random.choice([1, 2]),
+                )
+            )
+
+            data["Q_TCM_INDIVIDU"].append(
+                dict(
+                    AGE=random.randint(90),
+                    SEXE=random.choice([1, 2]),
+                    CS24=random.randint(8) * 10,
+                    DEP=department,
+                    ETUDES=1 if studies else 2,
+                    IDENT_IND=person_id,
+                    IDENT_MEN=household_id,
+                    PONDV1=1.0,
+                    SITUA=random.choice([1, 2]),
+                )
+            )
+
+            if person_index == 0:  # Only one person per household has activity chain
                 home_department = department
                 work_department = random.choice(df["department"].unique())
 
                 purpose = "1.11" if studies else "9"
                 mode = random.choice(["1", "2", "2.20", "2.23", "4"])
 
-                data["K_DEPLOC"].append(dict(
-                    IDENT_IND = person_id, V2_MMOTIFDES = purpose, V2_MMOTIFORI = 1,
-                    V2_TYPJOUR = 1, V2_MORIHDEP = "08:00:00", V2_MDESHARR = "09:00:00",
-                    V2_MDISTTOT = 3, # km
-                    IDENT_JOUR = 1, V2_MTP = mode,
-                    V2_MDESDEP = work_department,
-                    V2_MORIDEP = home_department,
-                    NDEP = 4, V2_MOBILREF = 1, PONDKI = 3.0
-                ))
-
-                data["K_DEPLOC"].append(dict(
-                    IDENT_IND = person_id, V2_MMOTIFDES = 2, V2_MMOTIFORI = purpose,
-                    V2_TYPJOUR = 1, V2_MORIHDEP = "17:00:00", V2_MDESHARR = "17:30:00",
-                    V2_MDISTTOT = 3, # km
-                    IDENT_JOUR = 1, V2_MTP = mode,
-                    V2_MDESDEP = home_department,
-                    V2_MORIDEP = work_department,
-                    NDEP = 4, V2_MOBILREF = 1, PONDKI = 3.0
-                ))
-
-                data["K_DEPLOC"].append(dict(
-                    IDENT_IND = person_id, V2_MMOTIFDES = 1, V2_MMOTIFORI = 2,
-                    V2_TYPJOUR = 1, V2_MORIHDEP = "18:00:00", V2_MDESHARR = "19:00:00",
-                    V2_MDISTTOT = 3, # km
-                    IDENT_JOUR = 1, V2_MTP = mode,
-                    V2_MDESDEP = home_department,
-                    V2_MORIDEP = home_department,
-                    NDEP = 4, V2_MOBILREF = 1, PONDKI = 3.0
-                ))
+                data["K_DEPLOC"].append(
+                    dict(
+                        IDENT_IND=person_id,
+                        V2_MMOTIFDES=purpose,
+                        V2_MMOTIFORI=1,
+                        V2_TYPJOUR=1,
+                        V2_MORIHDEP="08:00:00",
+                        V2_MDESHARR="09:00:00",
+                        V2_MDISTTOT=3,  # km
+                        IDENT_JOUR=1,
+                        V2_MTP=mode,
+                        V2_MDESDEP=work_department,
+                        V2_MORIDEP=home_department,
+                        NDEP=4,
+                        V2_MOBILREF=1,
+                        PONDKI=3.0,
+                    )
+                )
+
+                data["K_DEPLOC"].append(
+                    dict(
+                        IDENT_IND=person_id,
+                        V2_MMOTIFDES=2,
+                        V2_MMOTIFORI=purpose,
+                        V2_TYPJOUR=1,
+                        V2_MORIHDEP="17:00:00",
+                        V2_MDESHARR="17:30:00",
+                        V2_MDISTTOT=3,  # km
+                        IDENT_JOUR=1,
+                        V2_MTP=mode,
+                        V2_MDESDEP=home_department,
+                        V2_MORIDEP=work_department,
+                        NDEP=4,
+                        V2_MOBILREF=1,
+                        PONDKI=3.0,
+                    )
+                )
+
+                data["K_DEPLOC"].append(
+                    dict(
+                        IDENT_IND=person_id,
+                        V2_MMOTIFDES=1,
+                        V2_MMOTIFORI=2,
+                        V2_TYPJOUR=1,
+                        V2_MORIHDEP="18:00:00",
+                        V2_MDESHARR="19:00:00",
+                        V2_MDISTTOT=3,  # km
+                        IDENT_JOUR=1,
+                        V2_MTP=mode,
+                        V2_MDESDEP=home_department,
+                        V2_MORIDEP=home_department,
+                        NDEP=4,
+                        V2_MOBILREF=1,
+                        PONDKI=3.0,
+                    )
+                )
 
                 # Add a tail
-                data["K_DEPLOC"].append(dict(
-                    IDENT_IND = person_id, V2_MMOTIFDES = 2, V2_MMOTIFORI = 1,
-                    V2_TYPJOUR = 1, V2_MORIHDEP = "21:00:00", V2_MDESHARR = "22:00:00",
-                    V2_MDISTTOT = 3, # km
-                    IDENT_JOUR = 1, V2_MTP = mode,
-                    V2_MDESDEP = home_department,
-                    V2_MORIDEP = home_department,
-                    NDEP = 4, V2_MOBILREF = 1, PONDKI = 3.0
-                ))
+                data["K_DEPLOC"].append(
+                    dict(
+                        IDENT_IND=person_id,
+                        V2_MMOTIFDES=2,
+                        V2_MMOTIFORI=1,
+                        V2_TYPJOUR=1,
+                        V2_MORIHDEP="21:00:00",
+                        V2_MDESHARR="22:00:00",
+                        V2_MDISTTOT=3,  # km
+                        IDENT_JOUR=1,
+                        V2_MTP=mode,
+                        V2_MDESDEP=home_department,
+                        V2_MORIDEP=home_department,
+                        NDEP=4,
+                        V2_MOBILREF=1,
+                        PONDKI=3.0,
+                    )
+                )
 
     os.mkdir("%s/entd_2008" % output_path)
-    pd.DataFrame.from_records(data["Q_MENAGE"]).to_csv("%s/entd_2008/Q_menage.csv" % output_path, index = False, sep = ";")
-    pd.DataFrame.from_records(data["Q_TCM_MENAGE"]).to_csv("%s/entd_2008/Q_tcm_menage_0.csv" % output_path, index = False, sep = ";")
-    pd.DataFrame.from_records(data["Q_INDIVIDU"]).to_csv("%s/entd_2008/Q_individu.csv" % output_path, index = False, sep = ";")
-    pd.DataFrame.from_records(data["Q_TCM_INDIVIDU"]).to_csv("%s/entd_2008/Q_tcm_individu.csv" % output_path, index = False, sep = ";")
-    pd.DataFrame.from_records(data["K_DEPLOC"]).to_csv("%s/entd_2008/K_deploc.csv" % output_path, index = False, sep = ";")
-
+    pd.DataFrame.from_records(data["Q_MENAGE"]).to_csv(
+        "%s/entd_2008/Q_menage.csv" % output_path, index=False, sep=";"
+    )
+    pd.DataFrame.from_records(data["Q_TCM_MENAGE"]).to_csv(
+        "%s/entd_2008/Q_tcm_menage_0.csv" % output_path, index=False, sep=";"
+    )
+    pd.DataFrame.from_records(data["Q_INDIVIDU"]).to_csv(
+        "%s/entd_2008/Q_individu.csv" % output_path, index=False, sep=";"
+    )
+    pd.DataFrame.from_records(data["Q_TCM_INDIVIDU"]).to_csv(
+        "%s/entd_2008/Q_tcm_individu.csv" % output_path, index=False, sep=";"
+    )
+    pd.DataFrame.from_records(data["K_DEPLOC"]).to_csv(
+        "%s/entd_2008/K_deploc.csv" % output_path, index=False, sep=";"
+    )
 
     # Data set: EGT
     print("Creating EGT ...")
 
-    data = dict(
-        households = [],
-        persons = [],
-        trips = []
-    )
+    data = dict(households=[], persons=[], trips=[])
 
     person_index = 0
     for household_index in range(HTS_HOUSEHOLDS):
@@ -448,30 +624,50 @@ def create(output_path):
         region = df[df["municipality"] == municipality]["region"].values[0]
         department = df[df["municipality"] == municipality]["department"].values[0]
 
-        data["households"].append(dict(
-            RESDEP = department, NQUEST = household_id, POIDSM = 1.0,
-            NB_VELO = random.randint(3), NB_VD = random.randint(2),
-            RESCOMM = municipality, NB_2RM = 0,
-            MNP = 3, REVENU = random.randint(12)
-        ))
+        data["households"].append(
+            dict(
+                RESDEP=department,
+                NQUEST=household_id,
+                POIDSM=1.0,
+                NB_VELO=random.randint(3),
+                NB_VD=random.randint(2),
+                RESCOMM=municipality,
+                NB_2RM=0,
+                MNP=3,
+                REVENU=random.randint(12),
+            )
+        )
 
         for person_id in range(1, HTS_HOUSEHOLD_MEMBERS + 1):
             studies = random.random_sample() < 0.3
 
-            data["persons"].append(dict(
-                RESDEP = department, NP = person_id, POIDSP = 1.0,
-                NQUEST = household_id, SEXE = random.choice([1, 2]),
-                AGE = random.randint(90), PERMVP = random.choice([1, 2]),
-                ABONTC = random.choice([1, 2]), OCCP = 3 if studies else 2,
-                PERM2RM = random.choice([1, 2]), NBDEPL = 2, CS8 = random.randint(9)
-            ))
+            data["persons"].append(
+                dict(
+                    RESDEP=department,
+                    NP=person_id,
+                    POIDSP=1.0,
+                    NQUEST=household_id,
+                    SEXE=random.choice([1, 2]),
+                    AGE=random.randint(90),
+                    PERMVP=random.choice([1, 2]),
+                    ABONTC=random.choice([1, 2]),
+                    OCCP=3 if studies else 2,
+                    PERM2RM=random.choice([1, 2]),
+                    NBDEPL=2,
+                    CS8=random.randint(9),
+                )
+            )
 
             home_department = department
             home_municipality = municipality
 
             work_municipality = random.choice(df["municipality"].unique())
-            work_region = df[df["municipality"] == work_municipality]["region"].values[0]
-            work_department = df[df["municipality"] == work_municipality]["department"].values[0]
+            work_region = df[df["municipality"] == work_municipality]["region"].values[
+                0
+            ]
+            work_department = df[df["municipality"] == work_municipality][
+                "department"
+            ].values[0]
 
             purpose = 4 if studies else 2
             mode = random.choice([1, 2, 3, 5, 7])
@@ -484,43 +680,97 @@ def create(output_path):
                 origin_hour = 0
                 origin_minute = 12
 
-            data["trips"].append(dict(
-                NQUEST = household_id, NP = person_id,
-                ND = 1, ORDEP = home_department, DESTDEP = work_department,
-                ORH = origin_hour, ORM = origin_minute, DESTH = 9, DESTM = 0, ORCOMM = home_municipality,
-                DESTCOMM = work_municipality, DPORTEE = 3, MODP_H7 = 2,
-                DESTMOT_H9 = purpose, ORMOT_H9 = 1
-            ))
-
-            data["trips"].append(dict(
-                NQUEST = household_id, NP = person_id,
-                ND = 2, ORDEP = work_department, DESTDEP = home_department,
-                ORH = 8, ORM = 0, DESTH = 9, DESTM = 0, ORCOMM = work_municipality,
-                DESTCOMM = home_municipality, DPORTEE = 3, MODP_H7 = 2,
-                DESTMOT_H9 = 5, ORMOT_H9 = purpose
-            ))
-
-            data["trips"].append(dict(
-                NQUEST = household_id, NP = person_id,
-                ND = 3, ORDEP = home_department, DESTDEP = home_department,
-                ORH = 17, ORM = 0, DESTH = 18, DESTM = 0, ORCOMM = home_municipality,
-                DESTCOMM = home_municipality, DPORTEE = 3, MODP_H7 = 2,
-                DESTMOT_H9 = 1, ORMOT_H9 = 5
-            ))
+            data["trips"].append(
+                dict(
+                    NQUEST=household_id,
+                    NP=person_id,
+                    ND=1,
+                    ORDEP=home_department,
+                    DESTDEP=work_department,
+                    ORH=origin_hour,
+                    ORM=origin_minute,
+                    DESTH=9,
+                    DESTM=0,
+                    ORCOMM=home_municipality,
+                    DESTCOMM=work_municipality,
+                    DPORTEE=3,
+                    MODP_H7=2,
+                    DESTMOT_H9=purpose,
+                    ORMOT_H9=1,
+                )
+            )
+
+            data["trips"].append(
+                dict(
+                    NQUEST=household_id,
+                    NP=person_id,
+                    ND=2,
+                    ORDEP=work_department,
+                    DESTDEP=home_department,
+                    ORH=8,
+                    ORM=0,
+                    DESTH=9,
+                    DESTM=0,
+                    ORCOMM=work_municipality,
+                    DESTCOMM=home_municipality,
+                    DPORTEE=3,
+                    MODP_H7=2,
+                    DESTMOT_H9=5,
+                    ORMOT_H9=purpose,
+                )
+            )
+
+            data["trips"].append(
+                dict(
+                    NQUEST=household_id,
+                    NP=person_id,
+                    ND=3,
+                    ORDEP=home_department,
+                    DESTDEP=home_department,
+                    ORH=17,
+                    ORM=0,
+                    DESTH=18,
+                    DESTM=0,
+                    ORCOMM=home_municipality,
+                    DESTCOMM=home_municipality,
+                    DPORTEE=3,
+                    MODP_H7=2,
+                    DESTMOT_H9=1,
+                    ORMOT_H9=5,
+                )
+            )
 
             # Tail
-            data["trips"].append(dict(
-                NQUEST = household_id, NP = person_id,
-                ND = 4, ORDEP = home_department, DESTDEP = home_department,
-                ORH = 22, ORM = 0, DESTH = 21, DESTM = 0, ORCOMM = home_municipality,
-                DESTCOMM = home_municipality, DPORTEE = 3, MODP_H7 = 2,
-                DESTMOT_H9 = 5, ORMOT_H9 = 1
-            ))
+            data["trips"].append(
+                dict(
+                    NQUEST=household_id,
+                    NP=person_id,
+                    ND=4,
+                    ORDEP=home_department,
+                    DESTDEP=home_department,
+                    ORH=22,
+                    ORM=0,
+                    DESTH=21,
+                    DESTM=0,
+                    ORCOMM=home_municipality,
+                    DESTCOMM=home_municipality,
+                    DPORTEE=3,
+                    MODP_H7=2,
+                    DESTMOT_H9=5,
+                    ORMOT_H9=1,
+                )
+            )
 
     os.mkdir("%s/egt_2010" % output_path)
-    pd.DataFrame.from_records(data["households"]).to_csv("%s/egt_2010/Menages_semaine.csv" % output_path, index = False, sep = ",")
-    pd.DataFrame.from_records(data["persons"]).to_csv("%s/egt_2010/Personnes_semaine.csv" % output_path, index = False, sep = ",")
-    pd.DataFrame.from_records(data["trips"]).to_csv("%s/egt_2010/Deplacements_semaine.csv" % output_path, index = False, sep = ",")
+    pd.DataFrame.from_records(data["households"]).to_csv(
+        "%s/egt_2010/Menages_semaine.csv" % output_path, index=False, sep=","
+    )
+    pd.DataFrame.from_records(data["persons"]).to_csv(
+        "%s/egt_2010/Personnes_semaine.csv" % output_path, index=False, sep=","
+    )
+    pd.DataFrame.from_records(data["trips"]).to_csv(
+        "%s/egt_2010/Deplacements_semaine.csv" % output_path, index=False, sep=","
+    )
 
     # Data set: Census
     print("Creating census ...")
@@ -532,40 +782,68 @@ def create(output_path):
 
         iris = df["iris"].iloc[random.randint(len(df))]
         department = iris[:2]
-        if iris.endswith("0000"): iris = iris[:-4] + "XXXX"
+        if iris.endswith("0000"):
+            iris = iris[:-4] + "XXXX"
 
-        if random.random_sample() < 0.1: # For some, commune is not known
+        if random.random_sample() < 0.1:  # For some, commune is not known
             iris = "ZZZZZZZZZ"
 
         destination_municipality = random.choice(df["municipality"].unique())
-        destination_department = df[df["municipality"] == destination_municipality]["department"].values[0]
+        destination_department = df[df["municipality"] == destination_municipality][
+            "department"
+        ].values[0]
 
         for person_index in range(CENSUS_HOUSEHOLD_MEMBERS):
-            persons.append(dict(
-                CANTVILLE = "ABCE", NUMMI = household_id,
-                AGED = "%03d" % random.randint(90), COUPLE = random.choice([1, 2]),
-                CS1 = random.randint(9),
-                DEPT = department, IRIS = iris, REGION = region, ETUD = random.choice([1, 2]),
-                ILETUD = 4 if department != destination_department else 0,
-                ILT = 4 if department != destination_department else 0,
-                IPONDI = float(1.0),
-                SEXE = random.choice([1, 2]),
-                TACT = random.choice([1, 2]),
-                TRANS = 4, VOIT = random.randint(3), DEROU = random.randint(2)
-            ))
+            persons.append(
+                dict(
+                    CANTVILLE="ABCE",
+                    NUMMI=household_id,
+                    AGED="%03d" % random.randint(90),
+                    COUPLE=random.choice([1, 2]),
+                    CS1=random.randint(9),
+                    DEPT=department,
+                    IRIS=iris,
+                    REGION=region,
+                    ETUD=random.choice([1, 2]),
+                    ILETUD=4 if department != destination_department else 0,
+                    ILT=4 if department != destination_department else 0,
+                    IPONDI=float(1.0),
+                    SEXE=random.choice([1, 2]),
+                    TACT=random.choice([1, 2]),
+                    TRANS=4,
+                    VOIT=random.randint(3),
+                    DEROU=random.randint(2),
+                )
+            )
 
     columns = [
-        "CANTVILLE", "NUMMI", "AGED", "COUPLE", "CS1", "DEPT", "IRIS", "REGION",
-        "ETUD", "ILETUD", "ILT", "IPONDI",
-        "SEXE", "TACT", "TRANS", "VOIT", "DEROU"
+        "CANTVILLE",
+        "NUMMI",
+        "AGED",
+        "COUPLE",
+        "CS1",
+        "DEPT",
+        "IRIS",
+        "REGION",
+        "ETUD",
+        "ILETUD",
+        "ILT",
+        "IPONDI",
+        "SEXE",
+        "TACT",
+        "TRANS",
+        "VOIT",
+        "DEROU",
     ]
 
     df_persons = pd.DataFrame.from_records(persons)[columns]
     df_persons.columns = columns
 
-    with zipfile.ZipFile("%s/rp_2019/RP2019_INDCVI_csv.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/rp_2019/RP2019_INDCVI_csv.zip" % output_path, "w"
+    ) as archive:
         with archive.open("FD_INDCVI_2019.csv", "w") as f:
-            df_persons.to_csv(f, sep = ";")
+            df_persons.to_csv(f, sep=";")
 
     # Data set: commute flows
     print("Creating commute flows ...")
@@ -574,11 +852,15 @@ def create(output_path):
     observations = COMMUTE_FLOW_OBSERVATIONS
 
     # ... work
-    df_work = pd.DataFrame(dict(
-        COMMUNE = municipalities[random.randint(0, len(municipalities), observations)],
-        DCLT = municipalities[random.randint(0, len(municipalities), observations)],
-        TRANS = random.randint(1, 6, size = (observations,))
-    ))
+    df_work = pd.DataFrame(
+        dict(
+            COMMUNE=municipalities[
+                random.randint(0, len(municipalities), observations)
+            ],
+            DCLT=municipalities[random.randint(0, len(municipalities), observations)],
+            TRANS=random.randint(1, 6, size=(observations,)),
+        )
+    )
 
     df_work["ARM"] = "Z"
     df_work["IPONDI"] = 1.0
@@ -586,25 +868,33 @@ def create(output_path):
     columns = ["COMMUNE", "DCLT", "TRANS", "ARM", "IPONDI"]
     df_work.columns = columns
 
-    with zipfile.ZipFile("%s/rp_2019/RP2019_MOBPRO_csv.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/rp_2019/RP2019_MOBPRO_csv.zip" % output_path, "w"
+    ) as archive:
         with archive.open("FD_MOBPRO_2019.csv", "w") as f:
-            df_work.to_csv(f, sep = ";")
+            df_work.to_csv(f, sep=";")
 
     # ... education
-    df_education = pd.DataFrame(dict(
-        COMMUNE = municipalities[random.randint(0, len(municipalities), observations)],
-        DCETUF = municipalities[random.randint(0, len(municipalities), observations)]
-    ))
+    df_education = pd.DataFrame(
+        dict(
+            COMMUNE=municipalities[
+                random.randint(0, len(municipalities), observations)
+            ],
+            DCETUF=municipalities[random.randint(0, len(municipalities), observations)],
+        )
+    )
     df_education["ARM"] = "Z"
     df_education["IPONDI"] = 1.0
     df_education["AGEREV10"] = 1
 
-    columns = ["COMMUNE", "DCETUF", "ARM", "IPONDI","AGEREV10"]
+    columns = ["COMMUNE", "DCETUF", "ARM", "IPONDI", "AGEREV10"]
     df_education.columns = columns
 
-    with zipfile.ZipFile("%s/rp_2019/RP2019_MOBSCO_csv.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/rp_2019/RP2019_MOBSCO_csv.zip" % output_path, "w"
+    ) as archive:
         with archive.open("FD_MOBSCO_2019.csv", "w") as f:
-            df_education.to_csv(f, sep = ";")
+            df_education.to_csv(f, sep=";")
 
     # Data set: BD-TOPO
     print("Creating BD-TOPO ...")
@@ -615,43 +905,56 @@ def create(output_path):
 
     x = df_selection["geometry"].centroid.x.values
     y = df_selection["geometry"].centroid.y.values
-    z = random.randint(100, 400, observations) # Not used but keeping unit test hashes constant
+    z = random.randint(
+        100, 400, observations
+    )  # Not used but keeping unit test hashes constant
 
     ids = [
-        "BATIMENT{:016d}".format(n) for n in random.randint(1000, 1000000, observations) 
+        "BATIMENT{:016d}".format(n) for n in random.randint(1000, 1000000, observations)
     ]
-    
-    ids[0] = ids[1] # setting multiple adresses for 1 building usecase
-
-    df_bdtopo = gpd.GeoDataFrame({
-        "nombre_de_logements": random.randint(0, 10, observations),
-        "cleabs": ids,
-        "geometry": [
-            geo.Point(x, y) for x, y in zip(x, y)
-        ]
-    }, crs = "EPSG:2154")
+
+    ids[0] = ids[1]  # setting multiple adresses for 1 building usecase
+
+    df_bdtopo = gpd.GeoDataFrame(
+        {
+            "nombre_de_logements": random.randint(0, 10, observations),
+            "cleabs": ids,
+            "geometry": [geo.Point(x, y) for x, y in zip(x, y)],
+        },
+        crs="EPSG:2154",
+    )
 
     # polygons as buildings from iris centroid points
-    df_bdtopo.set_geometry(df_bdtopo.buffer(40),inplace=True,drop=True,crs="EPSG:2154")
+    df_bdtopo.set_geometry(
+        df_bdtopo.buffer(40), inplace=True, drop=True, crs="EPSG:2154"
+    )
 
     os.mkdir("{}/bdtopo_idf".format(output_path))
-    df_bdtopo.to_file("{}/bdtopo_idf/content.gpkg".format(output_path), layer = "batiment")
+    df_bdtopo.to_file(
+        "{}/bdtopo_idf/content.gpkg".format(output_path), layer="batiment"
+    )
 
     bdtopo_date = "2022-03-15"
     bdtopo_departments = ["1A", "1B", "1C", "1D", "2A", "2B", "2C", "2D"]
 
-    with py7zr.SevenZipFile("{}/bdtopo_idf/bdtopo.7z".format(output_path), "w") as archive:
-        archive.write("{}/bdtopo_idf/content.gpkg".format(output_path), "content/content.gpkg")
+    with py7zr.SevenZipFile(
+        "{}/bdtopo_idf/bdtopo.7z".format(output_path), "w"
+    ) as archive:
+        archive.write(
+            "{}/bdtopo_idf/content.gpkg".format(output_path), "content/content.gpkg"
+        )
         os.remove("{}/bdtopo_idf/content.gpkg".format(output_path))
-    
+
     for department in bdtopo_departments:
         shutil.copyfile(
-            "{}/bdtopo_idf/bdtopo.7z".format(output_path), 
+            "{}/bdtopo_idf/bdtopo.7z".format(output_path),
             "{}/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D0{}_{}.7z".format(
-                output_path, department, bdtopo_date))
-        
+                output_path, department, bdtopo_date
+            ),
+        )
+
     os.remove("{}/bdtopo_idf/bdtopo.7z".format(output_path))
-        
+
     # Data set: BAN
     print("Creating BAN ...")
 
@@ -663,16 +966,26 @@ def create(output_path):
     y = df_selection["geometry"].centroid.y.values
     municipality = df["municipality"].unique()
 
-    df_ban = pd.DataFrame({
-        "code_insee": municipality[random.randint(0, len(municipality), observations)],
-        "x": x,
-        "y": y})
+    df_ban = pd.DataFrame(
+        {
+            "code_insee": municipality[
+                random.randint(0, len(municipality), observations)
+            ],
+            "x": x,
+            "y": y,
+        }
+    )
 
-    df_ban = df_ban[:round(len(x)*.8)]
+    df_ban = df_ban[: round(len(x) * 0.8)]
     os.mkdir("%s/ban_idf" % output_path)
 
     for dep in df["department"].unique():
-        df_ban.to_csv("%s/ban_idf/adresses-%s.csv.gz" % (output_path, dep),  compression='gzip', sep=";", index=False)
+        df_ban.to_csv(
+            "%s/ban_idf/adresses-%s.csv.gz" % (output_path, dep),
+            compression="gzip",
+            sep=";",
+            index=False,
+        )
 
     # Data set: SIRENE
     print("Creating SIRENE ...")
@@ -681,25 +994,35 @@ def create(output_path):
 
     identifiers = random.randint(0, 99999999, observations)
 
-    df_sirene = pd.DataFrame({
-        "siren": identifiers,
-        "siret": identifiers,
-        "codeCommuneEtablissement": municipalities[random.randint(0, len(municipalities), observations)],
-        "etatAdministratifEtablissement": "A"
-    })
+    df_sirene = pd.DataFrame(
+        {
+            "siren": identifiers,
+            "siret": identifiers,
+            "codeCommuneEtablissement": municipalities[
+                random.randint(0, len(municipalities), observations)
+            ],
+            "etatAdministratifEtablissement": "A",
+        }
+    )
 
     df_sirene["activitePrincipaleEtablissement"] = "52.1"
     df_sirene["trancheEffectifsEtablissement"] = "03"
 
-
     os.mkdir("%s/sirene" % output_path)
-    df_sirene.to_csv(output_path + "/sirene/StockEtablissement_utf8.zip", index = False, compression={'method': 'zip', 'archive_name': 'StockEtablissement_utf8.csv'})
-
+    df_sirene.to_csv(
+        output_path + "/sirene/StockEtablissement_utf8.zip",
+        index=False,
+        compression={"method": "zip", "archive_name": "StockEtablissement_utf8.csv"},
+    )
 
     df_sirene = df_sirene[["siren"]].copy()
     df_sirene["categorieJuridiqueUniteLegale"] = "1000"
 
-    df_sirene.to_csv(output_path + "/sirene/StockUniteLegale_utf8.zip", index = False, compression={'method': 'zip', 'archive_name': 'StockUniteLegale_utf8.csv'})
+    df_sirene.to_csv(
+        output_path + "/sirene/StockUniteLegale_utf8.zip",
+        index=False,
+        compression={"method": "zip", "archive_name": "StockUniteLegale_utf8.csv"},
+    )
 
     # Data set: SIRENE GEOLOCATION
     print("Creating SIRENE GEOLOCATION...")
@@ -708,32 +1031,53 @@ def create(output_path):
     x = df_selection["geometry"].centroid.x.values
     y = df_selection["geometry"].centroid.y.values
 
-    codes_com =  df_codes["DEPCOM"].iloc[random.randint(0, len(df_iris), observations)]
+    codes_com = df_codes["DEPCOM"].iloc[random.randint(0, len(df_iris), observations)]
+
+    df_sirene_geoloc = pd.DataFrame(
+        {
+            "siret": identifiers,
+            "x": x,
+            "y": y,
+            "plg_code_commune": codes_com,
+        }
+    )
 
-    df_sirene_geoloc = pd.DataFrame({
-        "siret": identifiers,
-        "x": x,
-        "y": y,
-        "plg_code_commune":codes_com,
-    })
-    
-    df_sirene_geoloc.to_csv("%s/sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip" % output_path, index = False, sep=";", compression={'method': 'zip', 'archive_name': 'GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv'})
+    df_sirene_geoloc.to_csv(
+        "%s/sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip"
+        % output_path,
+        index=False,
+        sep=";",
+        compression={
+            "method": "zip",
+            "archive_name": "GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.csv",
+        },
+    )
 
     # Data set: Urban type
     print("Creating urban type ...")
-    df_urban_type = df_codes[["DEPCOM"]].copy().rename(columns = { "DEPCOM": "CODGEO" })
+    df_urban_type = df_codes[["DEPCOM"]].copy().rename(columns={"DEPCOM": "CODGEO"})
     df_urban_type = df_urban_type.drop_duplicates()
-    df_urban_type["STATUT_2017"] = [["B", "C", "I", "H"][k % 4] for k in range(len(df_urban_type))]
+    df_urban_type["STATUT_2017"] = [
+        ["B", "C", "I", "H"][k % 4] for k in range(len(df_urban_type))
+    ]
 
-    df_urban_type = pd.concat([df_urban_type, pd.DataFrame({
-        "CODGEO": ["75056", "69123", "13055"],
-        "STATUT_2017": ["C", "C", "C"]
-    })])
+    df_urban_type = pd.concat(
+        [
+            df_urban_type,
+            pd.DataFrame(
+                {"CODGEO": ["75056", "69123", "13055"], "STATUT_2017": ["C", "C", "C"]}
+            ),
+        ]
+    )
 
     os.mkdir("%s/urban_type" % output_path)
-    with zipfile.ZipFile("%s/urban_type/UU2020_au_01-01-2023.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/urban_type/UU2020_au_01-01-2023.zip" % output_path, "w"
+    ) as archive:
         with archive.open("UU2020_au_01-01-2023.xlsx", "w") as f:
-            df_urban_type.to_excel(f, startrow = 5, sheet_name = "Composition_communale", index = False)
+            df_urban_type.to_excel(
+                f, startrow=5, sheet_name="Composition_communale", index=False
+            )
 
     # Data set: OSM
     # We add add a road grid of 500m
@@ -754,10 +1098,14 @@ def create(output_path):
 
     for i in range(lengthx):
         for j in range(lengthy):
-            df_nodes.append(dict(
-                id = node_index,
-                geometry = geo.Point(anchor_x + 500 * i + 250, anchor_y - 500 * j - 250)
-            ))
+            df_nodes.append(
+                dict(
+                    id=node_index,
+                    geometry=geo.Point(
+                        anchor_x + 500 * i + 250, anchor_y - 500 * j - 250
+                    ),
+                )
+            )
 
             if j < lengthy - 1:
                 links.append([node_index, node_index + 1])
@@ -767,75 +1115,117 @@ def create(output_path):
 
             node_index += 1
 
-    df_nodes = gpd.GeoDataFrame(df_nodes, crs = "EPSG:2154")
+    df_nodes = gpd.GeoDataFrame(df_nodes, crs="EPSG:2154")
     df_nodes = df_nodes.to_crs("EPSG:4326")
 
     for row in df_nodes.itertuples():
-        osm.append('<node id="%d" lat="%f" lon="%f" version="3" timestamp="2010-12-05T17:00:00" />' % (
-            row[1], row[2].y, row[2].x
-        ))
+        osm.append(
+            '<node id="%d" lat="%f" lon="%f" version="3" timestamp="2010-12-05T17:00:00" />'
+            % (row[1], row[2].y, row[2].x)
+        )
 
     for index, link in enumerate(links):
-        osm.append('<way id="%d" version="3" timestamp="2010-12-05T17:00:00">' % (index + 1))
+        osm.append(
+            '<way id="%d" version="3" timestamp="2010-12-05T17:00:00">' % (index + 1)
+        )
         osm.append('<nd ref="%d" />' % link[0])
         osm.append('<nd ref="%d" />' % link[1])
         osm.append('<tag k="highway" v="primary" />')
-        osm.append('</way>')
+        osm.append("</way>")
 
-    osm.append('</osm>')
+    osm.append("</osm>")
 
     import gzip
+
     os.mkdir("%s/osm_idf" % output_path)
     with gzip.open("%s/osm_idf/ile-de-france-220101.osm.gz" % output_path, "wb+") as f:
         f.write(bytes("\n".join(osm), "utf-8"))
 
-
     import subprocess
 
-    subprocess.check_call([
-        shutil.which("osmosis"), "--read-xml", "%s/osm_idf/ile-de-france-220101.osm.gz" % output_path,
-        "--write-pbf", "%s/osm_idf/ile-de-france-220101.osm.pbf" % output_path
-    ])
-
+    subprocess.check_call(
+        [
+            shutil.which("osmosis"),
+            "--read-xml",
+            "%s/osm_idf/ile-de-france-220101.osm.gz" % output_path,
+            "--write-pbf",
+            "%s/osm_idf/ile-de-france-220101.osm.pbf" % output_path,
+        ]
+    )
 
     # Data set: GTFS
     print("Creating GTFS ...")
 
     feed = {}
 
-    feed["agency"] = pd.DataFrame.from_records([dict(
-        agency_id = 1, agency_name = "eqasim", agency_timezone = "Europe/Paris",
-        agency_url = "https://eqasim.org"
-    )])
+    feed["agency"] = pd.DataFrame.from_records(
+        [
+            dict(
+                agency_id=1,
+                agency_name="eqasim",
+                agency_timezone="Europe/Paris",
+                agency_url="https://eqasim.org",
+            )
+        ]
+    )
 
-    feed["calendar"] = pd.DataFrame.from_records([dict(
-        service_id = 1, monday = 1, tuesday = 1, wednesday = 1,
-        thursday = 1, friday = 1, saturday = 1, sunday = 1, start_date = "20100101",
-        end_date = "20500101"
-    )])
+    feed["calendar"] = pd.DataFrame.from_records(
+        [
+            dict(
+                service_id=1,
+                monday=1,
+                tuesday=1,
+                wednesday=1,
+                thursday=1,
+                friday=1,
+                saturday=1,
+                sunday=1,
+                start_date="20100101",
+                end_date="20500101",
+            )
+        ]
+    )
 
-    feed["routes"] = pd.DataFrame.from_records([dict(
-        route_id = 1, agency_id = 1, route_short_name = "EQ",
-        route_long_name = "The eqasim train", route_desc = "",
-        route_type = 2
-    )])
+    feed["routes"] = pd.DataFrame.from_records(
+        [
+            dict(
+                route_id=1,
+                agency_id=1,
+                route_short_name="EQ",
+                route_long_name="The eqasim train",
+                route_desc="",
+                route_type=2,
+            )
+        ]
+    )
 
     df_stops = df[df["municipality"].isin(["1B019", "2D007"])].copy()
     df_stops = df_stops.to_crs("EPSG:4326")
 
-    feed["stops"] = pd.DataFrame.from_records([dict(
-        stop_id = "A", stop_code = "A", stop_name = "A",
-        stop_desc = "",
-        stop_lat = df_stops["geometry"].iloc[0].centroid.y,
-        stop_lon = df_stops["geometry"].iloc[0].centroid.x,
-        location_type = 1, parent_station = None
-    ), dict(
-        stop_id = "B", stop_code = "B", stop_name = "B",
-        stop_desc = "",
-        stop_lat = df_stops["geometry"].iloc[1].centroid.y,
-        stop_lon = df_stops["geometry"].iloc[1].centroid.x,
-        location_type = 1, parent_station = None
-    )])
+    feed["stops"] = pd.DataFrame.from_records(
+        [
+            dict(
+                stop_id="A",
+                stop_code="A",
+                stop_name="A",
+                stop_desc="",
+                stop_lat=df_stops["geometry"].iloc[0].centroid.y,
+                stop_lon=df_stops["geometry"].iloc[0].centroid.x,
+                location_type=1,
+                parent_station=None,
+            ),
+            dict(
+                stop_id="B",
+                stop_code="B",
+                stop_name="B",
+                stop_desc="",
+                stop_lat=df_stops["geometry"].iloc[1].centroid.y,
+                stop_lon=df_stops["geometry"].iloc[1].centroid.x,
+                location_type=1,
+                parent_station=None,
+            ),
+        ]
+    )
 
     trips = []
     times = []
@@ -844,19 +1234,27 @@ def create(output_path):
 
     for origin, destination in [("A", "B"), ("B", "A")]:
         for hour in np.arange(1, 24):
-            trips.append(dict(
-                route_id = 1, service_id = 1, trip_id = trip_id
-            ))
-
-            times.append(dict(
-                trip_id = trip_id, arrival_time = "%02d:00:00" % hour,
-                departure_time = "%02d:00:00" % hour, stop_id = origin, stop_sequence = 1
-            ))
+            trips.append(dict(route_id=1, service_id=1, trip_id=trip_id))
+
+            times.append(
+                dict(
+                    trip_id=trip_id,
+                    arrival_time="%02d:00:00" % hour,
+                    departure_time="%02d:00:00" % hour,
+                    stop_id=origin,
+                    stop_sequence=1,
+                )
+            )
 
-            times.append(dict(
-                trip_id = trip_id, arrival_time = "%02d:00:00" % (hour + 1),
-                departure_time = "%02d:00:00" % (hour + 1), stop_id = destination, stop_sequence = 2
-            ))
+            times.append(
+                dict(
+                    trip_id=trip_id,
+                    arrival_time="%02d:00:00" % (hour + 1),
+                    departure_time="%02d:00:00" % (hour + 1),
+                    stop_id=destination,
+                    stop_sequence=2,
+                )
+            )
 
             trip_id += 1
 
@@ -864,28 +1262,39 @@ def create(output_path):
     feed["stop_times"] = pd.DataFrame.from_records(times)
 
     # Transfers
-    feed["transfers"] = pd.DataFrame(dict(
-        from_stop_id = [], to_stop_id = [], transfer_type = []
-    ))
+    feed["transfers"] = pd.DataFrame(
+        dict(from_stop_id=[], to_stop_id=[], transfer_type=[])
+    )
 
     os.mkdir("%s/gtfs_idf" % output_path)
 
     import data.gtfs.utils
+
     data.gtfs.utils.write_feed(feed, "%s/gtfs_idf/IDFM-gtfs.zip" % output_path)
 
     # Dataset: Parc automobile
-    df_vehicles_region = pd.DataFrame(index = pd.MultiIndex.from_product([
-        df["region"].unique(),
-        np.arange(20),
-    ], names = [
-        "Code région", "Age au 01/01/2021"
-    ])).reset_index()
+    df_vehicles_region = pd.DataFrame(
+        index=pd.MultiIndex.from_product(
+            [
+                df["region"].unique(),
+                np.arange(20),
+            ],
+            names=["Code région", "Age au 01/01/2021"],
+        )
+    ).reset_index()
 
     # to enforce string
-    df_vehicles_region = pd.concat([df_vehicles_region, pd.DataFrame({
-        "Code région": ["AB"],
-        "Age au 01/01/2021": [0],
-    })])
+    df_vehicles_region = pd.concat(
+        [
+            df_vehicles_region,
+            pd.DataFrame(
+                {
+                    "Code région": ["AB"],
+                    "Age au 01/01/2021": [0],
+                }
+            ),
+        ]
+    )
 
     df_vehicles_region["Code région"] = df_vehicles_region["Code région"].astype(str)
 
@@ -893,44 +1302,58 @@ def create(output_path):
     df_vehicles_region["Energie"] = "Gazole"
     df_vehicles_region["Vignette crit'air"] = "Crit'air 1"
 
-    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].astype(str)
-    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].replace("20", ">20")
-    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"] + " ans"
+    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region[
+        "Age au 01/01/2021"
+    ].astype(str)
+    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region[
+        "Age au 01/01/2021"
+    ].replace("20", ">20")
+    df_vehicles_region["Age au 01/01/2021"] = (
+        df_vehicles_region["Age au 01/01/2021"] + " ans"
+    )
 
-    df_vehicles_commune = pd.DataFrame({
-        "municipality": df["municipality"].unique()
-    })
+    df_vehicles_commune = pd.DataFrame({"municipality": df["municipality"].unique()})
     df_vehicles_commune["Parc au 01/01/2021"] = 100
     df_vehicles_commune["Energie"] = "Gazole"
     df_vehicles_commune["Vignette Crit'air"] = "Crit'air 1"
 
-    df_vehicles_commune = pd.merge(df_vehicles_commune, df[[
-        "municipality", "region", "department"
-    ]], on = "municipality")
+    df_vehicles_commune = pd.merge(
+        df_vehicles_commune,
+        df[["municipality", "region", "department"]],
+        on="municipality",
+    )
 
-    df_vehicles_commune = df_vehicles_commune.rename(columns = {
-        "municipality": "Code commune",
-        "department": "Code départment",
-        "region": "Code région",
-    })
+    df_vehicles_commune = df_vehicles_commune.rename(
+        columns={
+            "municipality": "Code commune",
+            "department": "Code départment",
+            "region": "Code région",
+        }
+    )
 
     os.mkdir("%s/vehicles" % output_path)
-    
-    with zipfile.ZipFile("%s/vehicles/parc_vp_regions.zip" % output_path, "w") as archive:
+
+    with zipfile.ZipFile(
+        "%s/vehicles/parc_vp_regions.zip" % output_path, "w"
+    ) as archive:
         with archive.open("Parc_VP_Regions_2021.xlsx", "w") as f:
             df_vehicles_region.to_excel(f)
 
-    with zipfile.ZipFile("%s/vehicles/parc_vp_communes.zip" % output_path, "w") as archive:
+    with zipfile.ZipFile(
+        "%s/vehicles/parc_vp_communes.zip" % output_path, "w"
+    ) as archive:
         with archive.open("Parc_VP_Communes_2021.xlsx", "w") as f:
             df_vehicles_commune.to_excel(f)
 
+
 if __name__ == "__main__":
     import shutil
     import sys
     import os
+
     folder = sys.argv[1]
     os.makedirs(folder, exist_ok=True)
 
     for dir in os.listdir(folder):
-        shutil.rmtree(os.path.join(folder,dir))
+        shutil.rmtree(os.path.join(folder, dir))
     create(sys.argv[1])