Skip to content

Commit

Permalink
Add parquet files for all data
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Aug 2, 2024
1 parent ace7e4e commit a0f40ed
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 22 deletions.
Binary file added data/house-prices/median_house_prices.parquet
Binary file not shown.
Binary file not shown.
Binary file added data/vacant-homes/absolute.parquet
Binary file not shown.
Binary file added data/vacant-homes/percentages.parquet
Binary file not shown.
4 changes: 4 additions & 0 deletions pipelines/affordable-housing/extract.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down
5 changes: 2 additions & 3 deletions pipelines/affordable-housing/open-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"ROOT = Path('.')\n",
Expand Down Expand Up @@ -117,7 +116,7 @@
"source": [
"# Write the files to CSV\n",
"combined.to_csv(ROOT / 'data/affordable-homes/by_tenure.csv')\n",
"combined.to_parquet(ROOT / 'data/affordable-homes/by_tenure.parquet');\n",
"combined.to_parquet(ROOT / 'data/affordable-homes/by_tenure.parquet')\n",
"# all_england.to_csv('data/affordable-homes/by_tenure_england.csv')"
]
},
Expand Down Expand Up @@ -176,7 +175,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
23 changes: 18 additions & 5 deletions pipelines/house-prices/transform.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,23 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/Users/lukestrange/Code/housing')"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import os \n",
"os.chdir('../..')"
"from pathlib import Path\n",
"ROOT = Path('../..')\n",
"ROOT.resolve()"
]
},
{
Expand Down Expand Up @@ -46,7 +58,7 @@
"metadata": {},
"outputs": [],
"source": [
"fpath = 'raw/house-prices/hpssadataset9medianpricepaidforadministrativegeographies.xls'"
"fpath = ROOT / 'raw/house-prices/hpssadataset9medianpricepaidforadministrativegeographies.xls'"
]
},
{
Expand Down Expand Up @@ -75,7 +87,8 @@
"metadata": {},
"outputs": [],
"source": [
"combined.to_csv('data/house-prices/median_house_prices.csv')"
"combined.to_csv(ROOT / 'data/house-prices/median_house_prices.csv')\n",
"combined.to_parquet(ROOT / 'data/house-prices/median_house_prices.parquet')"
]
}
],
Expand Down
19 changes: 6 additions & 13 deletions src/data/areas/place-page/_data/headlines.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
SELECT "AreaCode", "AreaName", "Year", "LongTermVacants_pct", "AllVacants_pct"
FROM read_csv("data/vacant-homes/percentages.csv",
columns = {
'AreaCode': 'VARCHAR',
'AreaName': 'VARCHAR',
'Year': 'INTEGER',
'LongTermVacants_pct': 'DOUBLE',
'AllVacants_pct': 'DOUBLE'
},
ignore_errors = true
)
WHERE "AreaName" IN (?, 'England')
AND "Year" == ?;
SELECT
"AreaCode", "title", "value", "footnote"
FROM
read_parquet("data/vacant-homes/headlines.parquet")
WHERE
"AreaCode" == ?;
3 changes: 2 additions & 1 deletion src/data/areas/place-page/_data/simple.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SELECT "AreaCode", "AreaName", "Year", "AllVacants", "Dwellings", "HouseholdProjection", "LongTermVacants"
SELECT
"AreaCode", "AreaName", "Year", "AllVacants", "Dwellings", "HouseholdProjection", "LongTermVacants"
FROM read_csv("data/vacant-homes/absolute.csv",
columns = {
'AreaCode': 'VARCHAR',
Expand Down

0 comments on commit a0f40ed

Please sign in to comment.