From c4e0e38045278cd43ea86ac56ba85f51b7949ddb Mon Sep 17 00:00:00 2001 From: Rob Harrison <48765695+robjharrison@users.noreply.github.com> Date: Wed, 14 Aug 2024 08:39:58 +0000 Subject: [PATCH] minor html reformatting --- index.html | 4 ++-- ...ed_childrens_services_inspection_scrape.py | 18 ++++++++++++------ ofsted_childrens_services_jtai_overview.xlsx | Bin 36432 -> 36431 bytes setup.sh | 2 ++ 4 files changed, 16 insertions(+), 8 deletions(-) mode change 100644 => 100755 ofsted_childrens_services_inspection_scrape.py diff --git a/index.html b/index.html index 2df323f..590eb9c 100644 --- a/index.html +++ b/index.html @@ -25,8 +25,8 @@

Ofsted CS JTAI Inpections Overview

Summarised outcomes of published JTAI inspection reports by Ofsted, refreshed weekly.
An expanded version of the shown summary sheet, refreshed concurrently, is available to download here as an .xlsx file.
Data summary is based on the original JTAI Outcomes Summary published periodically by the ADCS: https://www.adcs.org.uk/inspection-of-childrens-services/. Read the source ILACS tool/project background details and future work..

-

Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files. As a result of the nuances|variance within the inspection report content or pdf encoding, we're noting some problematic data extraction for a small number of LAs*.
*Known extraction issues: JTAI report structure varies pre|post 2023. ADCS published inspection Themes unavailable via current scrape process. Publication date is based on CSS tag data and may not always reflect actual report publication. Where 1+ case studies are reported on, only 1 is pulled through.
Feedback on specific problems|inaccuracies|suggestions welcomed.*

-

Summary data last updated: 13 08 2024 17:53

+

Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files.
As a result of the nuances|variance within the inspection report content or pdf encoding, we're noting problematic data extraction for a small number of LAs*.
*Known extraction issues:

Feedback highlighting problems|inaccuracies|suggestions welcomed.

+

Summary data last updated: 14 08 2024 09:36

LA inspections last updated: []

diff --git a/ofsted_childrens_services_inspection_scrape.py b/ofsted_childrens_services_inspection_scrape.py old mode 100644 new mode 100755 index 2123081..cd4b06d --- a/ofsted_childrens_services_inspection_scrape.py +++ b/ofsted_childrens_services_inspection_scrape.py @@ -1035,10 +1035,15 @@ def save_to_html(data, column_order, local_link_column=None, web_link_column=Non ) disclaimer_text = ( - 'Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files. ' - 'As a result of the nuances|variance within the inspection report content or pdf encoding, we\'re noting some problematic data extraction for a small number of LAs*.
' - '*Known extraction issues: JTAI report structure varies pre|post 2023. ADCS published inspection Themes unavailable via current scrape process. Publication date is based on CSS tag data and may not always reflect actual report publication. Where 1+ case studies are reported on, only 1 is pulled through.
' - 'Feedback on specific problems|inaccuracies|suggestions welcomed.*' + 'Disclaimer: This summary is built from scraped data direct from https://reports.ofsted.gov.uk/ published PDF inspection report files.
' + 'As a result of the nuances|variance within the inspection report content or pdf encoding, we\'re noting problematic data extraction for a small number of LAs*.
' + '*Known extraction issues: ' + 'Feedback highlighting problems|inaccuracies|suggestions welcomed.' ) # # testing @@ -1069,8 +1074,9 @@ def save_to_html(data, column_order, local_link_column=None, web_link_column=Non # # If a web link column is specified, convert that column's values to HTML hyperlinks # # Shortening the hyperlink text by taking the part after the last '/' if web_link_column: - data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}') # publ_date - # if web_link_column: + data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}') + + # if web_link_column: # if the link is a bytes obj, this might be problematic # data[web_link_column] = data[web_link_column].apply(lambda x: f'ofsted.gov.uk/{x.rsplit("/", 1)[-1]}' if isinstance(x, str) else x) # publ_date # Convert column names to title/upper case diff --git a/ofsted_childrens_services_jtai_overview.xlsx b/ofsted_childrens_services_jtai_overview.xlsx index 36f2bd05c70054b3ce27afc9ad3d60391492424e..ac6af74b10e869a60476ca79991b26aa226d6511 100644 GIT binary patch delta 366 zcmV-!0g?XDoC43B03=Xa9hmsk>f3$^Hs|{E$z(oP1 z9ATzHNAC^6*s&u>>XP74LELLAsSl++U_;qAJpj(rbcH2&%BiQg1IA_|q5;X-gfu_4 z=>eQ!Xh4GU4rfV*uU-#Uy4i+axp7M2&jx0#qd2)wPE5wOZIgDGSgb;%iF7O2;@NG|bh5f0Z-=eOa+djb6M9Q2vDVUbH8@F)Zzm$;+yv3PpkZBU)3 z`7%z6I9u-0f-F`fUww5W@A|JmISlzXaRgj)-+zMo53{w27z70{q=A$rlhTb;0ko4R Mjw1$WivR!s03|x6IsgCw delta 375 zcmV--0f_$3oC46C0zVy_cI9Gu zly&qA)(NdjlqD&OKrzjQ+L!2Kvx*lea-J$qYptL}EjYBS?h0lIf72G;tTtf102c+2 za)g-@9lSRLW5*64sY`-G1#zdXq&}2(j}2wtbPsrzrgJR8Q%*g_9WXW{5uK2njY#uj z>mI-vh8iR&?{Jo6_~P|orJHQ%l`E$dzBMpuJ&U9J=)`DjnkH%T-Wu{}c)Nam?^lh5 z>Q2P~RRw2+S)g9qO{&1xML1kdonMC&?*#CtHs}*?r$vT7;86%bhPY?tGk<>BtWY&e zXZLZsh_if?&B;Sf(#NlE;PQ~|Y