diff --git a/asf_heat_pump_suitability/pipeline/prepare_features/epc.py b/asf_heat_pump_suitability/pipeline/prepare_features/epc.py index c9b386e..2425c2f 100644 --- a/asf_heat_pump_suitability/pipeline/prepare_features/epc.py +++ b/asf_heat_pump_suitability/pipeline/prepare_features/epc.py @@ -15,12 +15,14 @@ def add_col_msoa_avg_outdoor_space_property_type( Returns: pl.DataFrame: EPC dataset with secondary property type mapped from ONS garden size dataset """ + houses = ["Detached", "Semi-Detached", "Terraced (including end-terrace)"] + df = df.with_columns( - pl.when(pl.col(ptype_col).str.to_lowercase().str.contains("house")) + pl.when(pl.col(ptype_col).is_in(houses)) .then(pl.lit("Houses")) .when(pl.col(ptype_col).str.to_lowercase().str.contains("flat")) .then(pl.lit("Flats")) - .otherwise(pl.lit(ptype_col)) + .otherwise(pl.lit("unknown")) .alias("msoa_avg_outdoor_space_property_type") )