Skip to content

Commit

Permalink
Update pyairtable and unpick schools pull
Browse files Browse the repository at this point in the history
  • Loading branch information
gilesdring committed Oct 18, 2023
1 parent 5714bb8 commit 8c6da3c
Show file tree
Hide file tree
Showing 9 changed files with 1,819 additions and 1,583 deletions.
3,255 changes: 1,758 additions & 1,497 deletions Pipfile.lock

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions scripts/metrics/schools/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import ast


WORKING_DIR = os.path.join('working', 'metrics', 'airtable')
DATA_DIR = os.path.join("data", "metrics", "schools")

os.makedirs(WORKING_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

RAW_SCHOOLS_DATA = os.path.join(WORKING_DIR, 'schools_events.csv')
SCHOOLS_DATA = os.path.join(DATA_DIR, 'schools_events.csv')


def literal_converter(series):
def convert(value):
try:
return ast.literal_eval(value)
except (SyntaxError, ValueError):
return value
return series.apply(convert)
26 changes: 13 additions & 13 deletions scripts/metrics/schools/dvc.lock
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
schema: '2.0'
stages:
transform:
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/transform.py
cmd: PYTHONPATH=. python scripts/metrics/schools/transform.py
deps:
- path: scripts/metrics/schools/transform.py
hash: md5
md5: f9d56b1dfd266dbc988aa16efb51e45b
size: 2970
md5: 9f855fa85c5d5667f5cb25db54dc4e12
size: 2597
- path: working/metrics/airtable/schools_events.csv
hash: md5
md5: 3a91673eeb29b41175d06b9d47d82f98
Expand All @@ -17,19 +17,19 @@ stages:
md5: 0e54e5a0dbe6a3d9e14afb5b1cfcc1c8
size: 61461
extract:
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/extract.py
cmd: PYTHONPATH=. python scripts/metrics/schools/extract.py
deps:
- path: scripts/metrics/schools/extract.py
hash: md5
md5: e29be7bcd549e8cf37dd1aa0135a9027
size: 2071
md5: ca0d85afa313500f9affd3151b5be5d5
size: 1802
outs:
- path: working/metrics/airtable/schools_events.csv
hash: md5
md5: 3a91673eeb29b41175d06b9d47d82f98
size: 150940
prepare:
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/prepare.py
cmd: PYTHONPATH=. python scripts/metrics/schools/prepare.py
deps:
- path: data/metrics/schools/schools_events.csv
hash: md5
Expand All @@ -45,8 +45,8 @@ stages:
size: 19576
- path: scripts/metrics/schools/prepare.py
hash: md5
md5: 359f1c098e76fecfe474202e30e19175
size: 5014
md5: 87e2785d3b5f2a89e370001fd1ec26be
size: 5027
outs:
- path: docs/metrics/schools/_data/engagements_by_ward.csv
hash: md5
Expand All @@ -58,19 +58,19 @@ stages:
size: 1537
- path: docs/metrics/schools/_data/headlines.json
hash: md5
md5: 6f1db54e255f75df43b090312b5a881f
md5: e93ea7ec2f9eccde71f46f5258f423e0
size: 373
- path: docs/metrics/schools/_data/school_engagement_counts.csv
hash: md5
md5: bcaf6677d54af4f59a6ed8a8e96c05d9
size: 9271
ref_data:
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/get_reference_data.py
cmd: PYTHONPATH=. python scripts/metrics/schools/get_reference_data.py
deps:
- path: scripts/metrics/schools/get_reference_data.py
hash: md5
md5: 9f71435b8679733697fd6bb909349a7a
size: 686
md5: cb33136049d882178f56ed3e9ad8db67
size: 620
outs:
- path: data/reference/schools.csv
hash: md5
Expand Down
12 changes: 4 additions & 8 deletions scripts/metrics/schools/dvc.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
stages:
ref_data:
wdir: ../../..
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/get_reference_data.py
always_changed: true
cmd: PYTHONPATH=. python scripts/metrics/schools/get_reference_data.py
deps:
- scripts/metrics/schools/get_reference_data.py
outs:
Expand All @@ -12,8 +11,7 @@ stages:

extract:
wdir: ../../..
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/extract.py
always_changed: true
cmd: PYTHONPATH=. python scripts/metrics/schools/extract.py
deps:
- scripts/metrics/schools/extract.py
outs:
Expand All @@ -23,9 +21,7 @@ stages:

transform:
wdir: ../../..
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/transform.py
always_changed: true
frozen: false
cmd: PYTHONPATH=. python scripts/metrics/schools/transform.py
deps:
- scripts/metrics/schools/transform.py
- working/metrics/airtable/schools_events.csv
Expand All @@ -36,7 +32,7 @@ stages:

prepare:
wdir: ../../..
cmd: PYTHONPATH=scripts/ python scripts/metrics/schools/prepare.py
cmd: PYTHONPATH=. python scripts/metrics/schools/prepare.py
deps:
- scripts/metrics/schools/prepare.py
- data/metrics/schools/schools_events.csv
Expand Down
25 changes: 9 additions & 16 deletions scripts/metrics/schools/extract.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,25 @@
import os
from pyairtable import Table
import pyairtable.formulas as f
import pandas as pd
from util.logger import logging, log_formatter
from scripts.util.logger import logging, log_formatter


logger = logging.getLogger('schools.extract')
log_handler = logging.FileHandler('working/log/schools_extract.log', mode='w', encoding='utf-8')
log_handler = logging.FileHandler(
'working/log/schools_extract.log', mode='w', encoding='utf-8')
log_handler.setLevel(logging.INFO)
log_handler.setFormatter(log_formatter)
logger.addHandler(log_handler)
logger.info('Set up logging')

API_KEY = os.environ['AIRTABLE_API_KEY']

WORKING_DIR = os.path.join('working', 'metrics', 'airtable')
os.makedirs(WORKING_DIR, exist_ok=True)

SCHOOLS_DATA = os.path.join(WORKING_DIR, 'schools_events.csv')
import lib.sources.airtable as airtable
from config import SCHOOLS_DATA


def fetch_data():
BASE_ID = 'appHAh7IYG6p2w5Yo'
TABLE_NAME = 'EVENTS'
VIEW_NAME = 'OI Creative Learning Evaluation Data'

table = Table(API_KEY, BASE_ID, TABLE_NAME)

school_events = table.all(
school_events = airtable.query(
BASE_ID, TABLE_NAME,
view=VIEW_NAME,
fields=['Event Unique Identifier',
'Event name',
Expand All @@ -51,7 +44,7 @@ def fetch_data():
'Ward (from School)',
'Number of booked participants',
])
return pd.json_normalize([x['fields'] for x in school_events])
return school_events


if __name__ == '__main__':
Expand Down
23 changes: 9 additions & 14 deletions scripts/metrics/schools/get_reference_data.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,20 @@
import os
import re
import pandas as pd
from pyairtable import Table

from util.geography import fuzzy_match_leeds_wards

API_KEY = os.environ['AIRTABLE_API_KEY']
from lib.sources.airtable import query
from scripts.util.geography import fuzzy_match_leeds_wards


def fetch_data():
BASE_ID = 'appHAh7IYG6p2w5Yo'
TABLE_NAME = 'Schools'

table = Table(API_KEY, BASE_ID, TABLE_NAME)
schools = query(BASE_ID, TABLE_NAME,
fields=[
'School Name',
'Postcode',
'Ward',
])

schools = table.all(
fields=['School Name',
'Postcode',
'Ward',
])
return pd.json_normalize([x['fields'] for x in schools]).rename(columns = lambda x: re.sub(r'\W+', '_', x.lower()))
return schools.rename(columns=lambda x: re.sub(r'\W+', '_', x.lower()))


if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions scripts/metrics/schools/prepare.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import pandas as pd

from transform import SCHOOLS_DATA, literal_converter
from util.geography import fuzzy_match_leeds_wards
from util.logger import logging, log_formatter
from scripts.util.geography import fuzzy_match_leeds_wards
from scripts.util.logger import logging, log_formatter
from config import SCHOOLS_DATA, literal_converter

logger = logging.getLogger('schools.prepare')
log_handler = logging.FileHandler('working/log/schools_prepare.log', mode='w', encoding='utf-8')
Expand Down
19 changes: 2 additions & 17 deletions scripts/metrics/schools/transform.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,9 @@
import json
import os
import re
import ast

import pandas as pd
from extract import SCHOOLS_DATA as RAW_SCHOOLS_DATA
from util.geography import fuzzy_match_ward_name_to_code

DATA_DIR = os.path.join("data", "metrics", "schools")
os.makedirs(DATA_DIR, exist_ok=True)
SCHOOLS_DATA = os.path.join(DATA_DIR, 'schools_events.csv')


def literal_converter(series):
def convert(value):
try:
return ast.literal_eval(value)
except (SyntaxError, ValueError):
return value
return series.apply(convert)
from config import literal_converter, RAW_SCHOOLS_DATA, SCHOOLS_DATA
from scripts.util.geography import fuzzy_match_ward_name_to_code


def read_raw_data():
Expand Down
15 changes: 0 additions & 15 deletions scripts/util/airtable.py

This file was deleted.

0 comments on commit 8c6da3c

Please sign in to comment.