Skip to content

Commit

Permalink
Experiment converting data to parquet format
Browse files Browse the repository at this point in the history
  • Loading branch information
gilesdring committed Aug 2, 2024
1 parent 1de90fa commit f875c15
Show file tree
Hide file tree
Showing 7 changed files with 490 additions and 50 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ bs4 = "*"
dvc = "*"
petl = "*"
xlrd = "*"
pyarrow = "*"

[dev-packages]
ipykernel = "*"

[requires]
python_version = "3.12"
409 changes: 398 additions & 11 deletions Pipfile.lock

Large diffs are not rendered by default.

Binary file added data/affordable-homes/by_tenure.parquet
Binary file not shown.
51 changes: 29 additions & 22 deletions pipelines/affordable-housing/extract.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,43 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/lukestrange/Code/housing'"
"PosixPath('/media/data/code/oi/housing/raw/affordable-homes')"
]
},
"execution_count": 1,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import requests\n",
"ROOT = Path(\".\").resolve()\n",
"\n",
"os.chdir(\"../..\")\n",
"os.getcwd()"
"OUT = ROOT / \"raw/affordable-homes/\"\n",
"OUT.mkdir(parents=True, exist_ok=True)\n",
"\n",
"OUT"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand All @@ -47,31 +59,26 @@
"# Check if the request was successful\n",
"if response.status_code == 200:\n",
" # Write the content of the response to a local file\n",
" with open('raw/affordable-homes/affordable_homes_open_data_202223', 'wb') as file:\n",
" with open(OUT / 'affordable_homes_open_data_202223', 'wb') as file:\n",
" file.write(response.content)\n",
" print('File downloaded successfully')\n",
"else:\n",
" print('Failed to download file. Status code:', response.status_code)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "housing-2Roxq_cV",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down
72 changes: 58 additions & 14 deletions pipelines/affordable-housing/open-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,38 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/media/data/code/oi/housing')"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"os.chdir(\"../..\")"
"ROOT = Path('.')\n",
"ROOT.resolve()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('raw/affordable-homes/affordable_homes_open_data_202223')"
"data = pd.read_csv(ROOT / 'raw/affordable-homes/affordable_homes_open_data_202223')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -32,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -52,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -67,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -88,7 +101,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -98,15 +111,46 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Write the files to CSV\n",
"combined.to_csv('data/affordable-homes/by_tenure.csv')\n",
"combined.to_csv(ROOT / 'data/affordable-homes/by_tenure.csv')\n",
"combined.to_parquet(ROOT / 'data/affordable-homes/by_tenure.parquet');\n",
"# all_england.to_csv('data/affordable-homes/by_tenure_england.csv')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tenure\n",
"Affordable Home Ownership float64\n",
"Affordable Rent float64\n",
"First Homes float64\n",
"Intermediate Rent float64\n",
"London Affordable Rent float64\n",
"Shared Ownership float64\n",
"Social Rent float64\n",
"Unknown float64\n",
"All afforable float64\n",
"dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"combined.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -118,7 +162,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "housing-2Roxq_cV",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -132,7 +176,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"version": "3.12.4"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions src/_includes/template/areas.vto
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ vispage: Areas
{{ /if }}

<h3>Dwellings and household projections in {{ title }} <a id='dwellings-and-projections' class='anchor' href='#dwellings-and-projections' data-dependencies='/assets/js/section-links.js'>§</a></h3>
{{ console.time('My code') }}
{{# {{ console.time('My code') }} #}}
{{ set chart_data = simple(areacode, dwellingsLastPublished) }}
{{ set y_spacing = chart_data |> ySpacing('Dwellings') }}
{{ console.timeEnd('My code') }}
{{# {{ console.timeEnd('My code') }} #}}
{{ if chart_data.length > 0 && percentage_chart_data.length > 0 }}
{{ set seriesObject = [
{ title: 'Dwellings', x: 'Year', y: 'Dwellings', tooltip: '<strong class="subtitle">Dwellings</strong><br />{{ Year }}: <strong>{{ Dwellings | toLocaleString() }}</strong>' },
Expand Down
2 changes: 1 addition & 1 deletion src/data/areas/place-page/_data/affordableHomes.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
*
FROM
read_csv("data/affordable-homes/by_tenure.csv")
read_parquet("data/affordable-homes/by_tenure.parquet")
WHERE
"geography_code" == ?
ORDER BY
Expand Down

0 comments on commit f875c15

Please sign in to comment.