From c4e0e38045278cd43ea86ac56ba85f51b7949ddb Mon Sep 17 00:00:00 2001
From: Rob Harrison <48765695+robjharrison@users.noreply.github.com>
Date: Wed, 14 Aug 2024 08:39:58 +0000
Subject: [PATCH] minor html reformatting
---
index.html | 4 ++--
...ed_childrens_services_inspection_scrape.py | 18 ++++++++++++------
ofsted_childrens_services_jtai_overview.xlsx | Bin 36432 -> 36431 bytes
setup.sh | 2 ++
4 files changed, 16 insertions(+), 8 deletions(-)
mode change 100644 => 100755 ofsted_childrens_services_inspection_scrape.py
diff --git a/index.html b/index.html
index 2df323f..590eb9c 100644
--- a/index.html
+++ b/index.html
@@ -25,8 +25,8 @@
Ofsted CS JTAI Inpections Overview
Summarised outcomes of published JTAI inspection reports by Ofsted, refreshed weekly.
An expanded version of the shown summary sheet, refreshed concurrently, is available to download here as an .xlsx file.
Data summary is based on the original JTAI Outcomes Summary published periodically by the ADCS: https://www.adcs.org.uk/inspection-of-childrens-services/. Read the source ILACS tool/project background details and future work..
- Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files. As a result of the nuances|variance within the inspection report content or pdf encoding, we're noting some problematic data extraction for a small number of LAs*.
*Known extraction issues: JTAI report structure varies pre|post 2023. ADCS published inspection Themes unavailable via current scrape process. Publication date is based on CSS tag data and may not always reflect actual report publication. Where 1+ case studies are reported on, only 1 is pulled through.
Feedback on specific problems|inaccuracies|suggestions welcomed.*
- Summary data last updated: 13 08 2024 17:53
+ Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files.
As a result of the nuances|variance within the inspection report content or pdf encoding, we're noting problematic data extraction for a small number of LAs*.
*Known extraction issues:
- JTAI report structure varies pre|post 2023(?), hence sparse|mixed summary columns until improved|agreed approach finalised.
- ADCS published inspection Themes unavailable via current scrape process. This being worked on currently.
- Publication date, isn't available within inspection reports and is therefore based on CSS tag data and may not always reflect actual report publication.
- Where 1+ case studies are reported on (e.g. Peterborough City), only 1 summary is pulled through.
Feedback highlighting problems|inaccuracies|suggestions welcomed.
+ Summary data last updated: 14 08 2024 09:36
LA inspections last updated: []
diff --git a/ofsted_childrens_services_inspection_scrape.py b/ofsted_childrens_services_inspection_scrape.py
old mode 100644
new mode 100755
index 2123081..cd4b06d
--- a/ofsted_childrens_services_inspection_scrape.py
+++ b/ofsted_childrens_services_inspection_scrape.py
@@ -1035,10 +1035,15 @@ def save_to_html(data, column_order, local_link_column=None, web_link_column=Non
)
disclaimer_text = (
- 'Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files. '
- 'As a result of the nuances|variance within the inspection report content or pdf encoding, we\'re noting some problematic data extraction for a small number of LAs*.
'
- '*Known extraction issues: JTAI report structure varies pre|post 2023. ADCS published inspection Themes unavailable via current scrape process. Publication date is based on CSS tag data and may not always reflect actual report publication. Where 1+ case studies are reported on, only 1 is pulled through.
'
- 'Feedback on specific problems|inaccuracies|suggestions welcomed.*'
+ 'Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files.
'
+ 'As a result of the nuances|variance within the inspection report content or pdf encoding, we\'re noting problematic data extraction for a small number of LAs*.
'
+ '*Known extraction issues: '
+ '- JTAI report structure varies pre|post 2023(?), hence sparse|mixed summary columns until improved|agreed approach finalised.
'
+ '- ADCS published inspection Themes unavailable via current scrape process. This being worked on currently.
'
+ '- Publication date, isn\'t available within inspection reports and is therefore based on CSS tag data and may not always reflect actual report publication.
'
+ '- Where 1+ case studies are reported on (e.g. Peterborough City), only 1 summary is pulled through.
'
+ '
'
+ 'Feedback highlighting problems|inaccuracies|suggestions welcomed.'
)
# # testing
@@ -1069,8 +1074,9 @@ def save_to_html(data, column_order, local_link_column=None, web_link_column=Non
# # If a web link column is specified, convert that column's values to HTML hyperlinks
# # Shortening the hyperlink text by taking the part after the last '/'
if web_link_column:
- data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}') # publ_date
- # if web_link_column:
+ data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}')
+
+ # if web_link_column: # if the link is a bytes obj, this might be problematic
# data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}' if isinstance(x, str) else x) # publ_date
# Convert column names to title/upper case
diff --git a/ofsted_childrens_services_jtai_overview.xlsx b/ofsted_childrens_services_jtai_overview.xlsx
index 36f2bd05c70054b3ce27afc9ad3d60391492424e..ac6af74b10e869a60476ca79991b26aa226d6511 100644
GIT binary patch
delta 366
zcmV-!0g?XDoC43B03=Xa9hmsk>f3$^Hs|{E$z(oP1
z9ATzHNAC^6*s&u>>XP74LELLAsSl++U_;qAJpj(rbcH2&%BiQg1IA_|q5;X-gfu_4
z=>eQ!Xh4GU4rfV*uU-#Uy4i+axp7M2&jx0#qd2)wPE5wOZIgDGSgb;%iF7O2;@NG|bh5f0Z-=eOa+djb6M9Q2vDVUbH8@F)Zzm$;+yv3PpkZBU)3
z`7%z6I9u-0f-F`fUww5W@A|JmISlzXaRgj)-+zMo53{w27z70{q=A$rlhTb;0ko4R
Mjw1$WivR!s03|x6IsgCw
delta 375
zcmV--0f_$3oC46C0zVy_cI9Gu
zly&qA)(NdjlqD&OKrzjQ+L!2Kvx*lea-J$qYptL}EjYBS?h0lIf72G;tTtf102c+2
za)g-@9lSRLW5*64sY`-G1#zdXq&}2(j}2wtbPsrzrgJR8Q%*g_9WXW{5uK2njY#uj
z>mI-vh8iR&?{Jo6_~P|orJHQ%l`E$dzBMpuJ&U9J=)`DjnkH%T-Wu{}c)Nam?^lh5
z>Q2P~RRw2+S)g9qO{&1xML1kdonMC&?*#CtHs}*?r$vT7;86%bhPY?tGk<>BtWY&e
zXZLZsh_if?&B;Sf(#NlE;PQ~|Y