Skip to content

Commit

Permalink
Chore: consistent naming for geography codes and areas
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Aug 6, 2024
1 parent 3b74b3f commit 092858e
Show file tree
Hide file tree
Showing 19 changed files with 48,784 additions and 48,601 deletions.
19,594 changes: 9,797 additions & 9,797 deletions data/affordable-homes/by_tenure.csv

Large diffs are not rendered by default.

Binary file modified data/affordable-homes/by_tenure.parquet
Binary file not shown.
77,222 changes: 38,611 additions & 38,611 deletions data/house-prices/median_house_prices.csv

Large diffs are not rendered by default.

Binary file modified data/house-prices/median_house_prices.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion data/vacant-homes/AllCombined_Cleaned_2024.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AreaCode,AreaName,Measure,Year,Value
geography_code,geography_name,Measure,Year,Value
E06000001,Hartlepool,AllVacants,2004,1925
E06000001,Hartlepool,AllVacants,2005,1846
E06000001,Hartlepool,AllVacants,2006,1925
Expand Down
Binary file modified data/vacant-homes/AllCombined_Cleaned_2024.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion data/vacant-homes/absolute.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AreaCode,AreaName,Year,AllVacants,Dwellings,HouseholdProjection,LongTermVacants
geography_code,geography_name,Year,AllVacants,Dwellings,HouseholdProjection,LongTermVacants
E06000001,Hartlepool,2004,1925.0,40407.0,38156.0,1138.0
E06000001,Hartlepool,2005,1846.0,40626.0,38386.0,1051.0
E06000001,Hartlepool,2006,1925.0,40869.0,38621.0,664.0
Expand Down
Binary file modified data/vacant-homes/absolute.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion data/vacant-homes/headlines.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AreaCode,title,value,footnote
geography_code,title,value,footnote
E06000001,Long term vacants,1.4,of homes are long term vacant compared to 1.03% for all England
E06000002,Long term vacants,1.98,of homes are long term vacant compared to 1.03% for all England
E06000003,Long term vacants,1.34,of homes are long term vacant compared to 1.03% for all England
Expand Down
Binary file modified data/vacant-homes/headlines.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion data/vacant-homes/percentages.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AreaCode,AreaName,Year,LongTermVacants_pct,AllVacants_pct
geography_code,geography_name,Year,LongTermVacants_pct,AllVacants_pct
E06000001,Hartlepool,2004,2.82,4.76
E06000001,Hartlepool,2005,2.59,4.54
E06000001,Hartlepool,2006,1.62,4.71
Expand Down
Binary file modified data/vacant-homes/percentages.parquet
Binary file not shown.
20 changes: 14 additions & 6 deletions pipelines/affordable-housing/extract.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -13,16 +13,16 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/media/data/code/oi/housing/raw/affordable-homes')"
"PosixPath('/Users/lukestrange/Code/housing/pipelines/affordable-housing/raw/affordable-homes')"
]
},
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -59,7 +59,7 @@
"# Check if the request was successful\n",
"if response.status_code == 200:\n",
" # Write the content of the response to a local file\n",
" with open(OUT / 'affordable_homes_open_data_202223', 'wb') as file:\n",
" with open(OUT / 'affordable_homes_open_data_202223.csv', 'wb') as file:\n",
" file.write(response.content)\n",
" print('File downloaded successfully')\n",
"else:\n",
Expand All @@ -81,7 +81,15 @@
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
Expand Down
148 changes: 91 additions & 57 deletions pipelines/affordable-housing/open-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"outputs": [],
"source": [
"# Rename column\n",
"data.rename(columns={'LA code 202223': 'geography_code'}, inplace=True)"
"data.rename(columns={'LA code 202223': 'geography_code', 'LA name 202223': 'geography_name'}, inplace=True)"
]
},
{
Expand All @@ -69,9 +69,9 @@
"outputs": [],
"source": [
"# Sum values per year, per local authority, per tenure type\n",
"local_authority = data.groupby(['geography_code', 'Year', 'Tenure']).sum(numeric_only=True).reset_index()\n",
"local_authority = data.groupby(['geography_code', 'geography_name', 'Year', 'Tenure']).sum(numeric_only=True).reset_index()\n",
"\n",
"local_authority_wide = local_authority.pivot(index=['geography_code', 'Year'], columns='Tenure', values='Units')\n",
"local_authority_wide = local_authority.pivot(index=['geography_code', 'geography_name', 'Year'], columns='Tenure', values='Units')\n",
"\n",
"# Add a column for all affordable homes, which is the sum of all the tenure types.\n",
"local_authority_wide['All afforable'] = local_authority_wide.sum(axis=1)"
Expand All @@ -94,8 +94,9 @@
"\n",
"# Add the geo code for England and append it to the index.\n",
"all_england_wide['geography_code'] = 'E92000001'\n",
"all_england_wide.set_index('geography_code', append=True, inplace=True)\n",
"all_england_wide = all_england_wide.reorder_levels(['geography_code', 'Year'])"
"all_england_wide['geography_name'] = 'England'\n",
"all_england_wide.set_index(['geography_code', 'geography_name'], append=True, inplace=True)\n",
"all_england_wide = all_england_wide.reorder_levels(['geography_code', 'geography_name', 'Year'])"
]
},
{
Expand Down Expand Up @@ -146,6 +147,7 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Tenure</th>\n",
" <th>Affordable Home Ownership</th>\n",
" <th>Affordable Rent</th>\n",
Expand All @@ -159,6 +161,7 @@
" </tr>\n",
" <tr>\n",
" <th>geography_code</th>\n",
" <th>geography_name</th>\n",
" <th>Year</th>\n",
" <th></th>\n",
" <th></th>\n",
Expand All @@ -174,6 +177,7 @@
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">E06000001</th>\n",
" <th rowspan=\"5\" valign=\"top\">Hartlepool</th>\n",
" <th>1991-92</th>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
Expand Down Expand Up @@ -236,6 +240,7 @@
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
Expand All @@ -248,6 +253,7 @@
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">E92000001</th>\n",
" <th rowspan=\"5\" valign=\"top\">England</th>\n",
" <th>2018-19</th>\n",
" <td>2460.0</td>\n",
" <td>28957.0</td>\n",
Expand Down Expand Up @@ -313,61 +319,89 @@
"</div>"
],
"text/plain": [
"Tenure Affordable Home Ownership Affordable Rent \\\n",
"geography_code Year \n",
"E06000001 1991-92 13.0 NaN \n",
" 1992-93 29.0 NaN \n",
" 1993-94 20.0 NaN \n",
" 1994-95 22.0 NaN \n",
" 1995-96 23.0 NaN \n",
"... ... ... \n",
"E92000001 2018-19 2460.0 28957.0 \n",
" 2019-20 2108.0 28259.0 \n",
" 2020-21 1134.0 23786.0 \n",
" 2021-22 1047.0 26398.0 \n",
" 2022-23 1172.0 24547.0 \n",
"Tenure Affordable Home Ownership \\\n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 13.0 \n",
" 1992-93 29.0 \n",
" 1993-94 20.0 \n",
" 1994-95 22.0 \n",
" 1995-96 23.0 \n",
"... ... \n",
"E92000001 England 2018-19 2460.0 \n",
" 2019-20 2108.0 \n",
" 2020-21 1134.0 \n",
" 2021-22 1047.0 \n",
" 2022-23 1172.0 \n",
"\n",
"Tenure Affordable Rent First Homes \\\n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 NaN NaN \n",
" 1992-93 NaN NaN \n",
" 1993-94 NaN NaN \n",
" 1994-95 NaN NaN \n",
" 1995-96 NaN NaN \n",
"... ... ... \n",
"E92000001 England 2018-19 28957.0 NaN \n",
" 2019-20 28259.0 NaN \n",
" 2020-21 23786.0 NaN \n",
" 2021-22 26398.0 35.0 \n",
" 2022-23 24547.0 1021.0 \n",
"\n",
"Tenure Intermediate Rent \\\n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 NaN \n",
" 1992-93 NaN \n",
" 1993-94 NaN \n",
" 1994-95 NaN \n",
" 1995-96 NaN \n",
"... ... \n",
"E92000001 England 2018-19 1383.0 \n",
" 2019-20 1748.0 \n",
" 2020-21 2026.0 \n",
" 2021-22 1477.0 \n",
" 2022-23 2701.0 \n",
"\n",
"Tenure First Homes Intermediate Rent \\\n",
"geography_code Year \n",
"E06000001 1991-92 NaN NaN \n",
" 1992-93 NaN NaN \n",
" 1993-94 NaN NaN \n",
" 1994-95 NaN NaN \n",
" 1995-96 NaN NaN \n",
"... ... ... \n",
"E92000001 2018-19 NaN 1383.0 \n",
" 2019-20 NaN 1748.0 \n",
" 2020-21 NaN 2026.0 \n",
" 2021-22 35.0 1477.0 \n",
" 2022-23 1021.0 2701.0 \n",
"Tenure London Affordable Rent \\\n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 NaN \n",
" 1992-93 NaN \n",
" 1993-94 NaN \n",
" 1994-95 NaN \n",
" 1995-96 NaN \n",
"... ... \n",
"E92000001 England 2018-19 1002.0 \n",
" 2019-20 1797.0 \n",
" 2020-21 2102.0 \n",
" 2021-22 3101.0 \n",
" 2022-23 4296.0 \n",
"\n",
"Tenure London Affordable Rent Shared Ownership Social Rent \\\n",
"geography_code Year \n",
"E06000001 1991-92 NaN NaN 134.0 \n",
" 1992-93 NaN NaN 204.0 \n",
" 1993-94 NaN NaN 123.0 \n",
" 1994-95 NaN NaN 149.0 \n",
" 1995-96 NaN NaN 134.0 \n",
"... ... ... ... \n",
"E92000001 2018-19 1002.0 17028.0 6363.0 \n",
" 2019-20 1797.0 18239.0 6766.0 \n",
" 2020-21 2102.0 16796.0 6051.0 \n",
" 2021-22 3101.0 19338.0 7659.0 \n",
" 2022-23 4296.0 20517.0 9535.0 \n",
"Tenure Shared Ownership Social Rent Unknown \\\n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 NaN 134.0 NaN \n",
" 1992-93 NaN 204.0 NaN \n",
" 1993-94 NaN 123.0 NaN \n",
" 1994-95 NaN 149.0 NaN \n",
" 1995-96 NaN 134.0 NaN \n",
"... ... ... ... \n",
"E92000001 England 2018-19 17028.0 6363.0 33.0 \n",
" 2019-20 18239.0 6766.0 43.0 \n",
" 2020-21 16796.0 6051.0 28.0 \n",
" 2021-22 19338.0 7659.0 42.0 \n",
" 2022-23 20517.0 9535.0 120.0 \n",
"\n",
"Tenure Unknown All afforable \n",
"geography_code Year \n",
"E06000001 1991-92 NaN 147.0 \n",
" 1992-93 NaN 233.0 \n",
" 1993-94 NaN 143.0 \n",
" 1994-95 NaN 171.0 \n",
" 1995-96 NaN 157.0 \n",
"... ... ... \n",
"E92000001 2018-19 33.0 57226.0 \n",
" 2019-20 43.0 58960.0 \n",
" 2020-21 28.0 51923.0 \n",
" 2021-22 42.0 59097.0 \n",
" 2022-23 120.0 63909.0 \n",
"Tenure All afforable \n",
"geography_code geography_name Year \n",
"E06000001 Hartlepool 1991-92 147.0 \n",
" 1992-93 233.0 \n",
" 1993-94 143.0 \n",
" 1994-95 171.0 \n",
" 1995-96 157.0 \n",
"... ... \n",
"E92000001 England 2018-19 57226.0 \n",
" 2019-20 58960.0 \n",
" 2020-21 51923.0 \n",
" 2021-22 59097.0 \n",
" 2022-23 63909.0 \n",
"\n",
"[9796 rows x 9 columns]"
]
Expand Down
Loading

0 comments on commit 092858e

Please sign in to comment.