diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index c319483598..84705a0336 100644 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -1,4 +1,5 @@ import argparse +import csv import glob import logging import os @@ -257,6 +258,36 @@ def generate_all_suburbs_nbn_tallies(): utils.write_json_file("results/all-suburbs-nbn-tallies.json", tallies, indent=1) +def generate_state_breakdown(): + """Generate results/breakdown.STATE.csv containing history of connection-types by state""" + output = {} + all_ctypes = set() + for date, state_info in utils.read_json_file("results/breakdown-suburbs.json").items(): + logging.info("Processing %s", date) + output[date] = {} + for state, suburb_list in state_info.items(): + # logging.info(" State: %s", state) + state_tally = {} + for suburb, connections in suburb_list.items(): + # logging.info(" State: %s", suburb) + for ctype, ccount in connections.items(): + state_tally[ctype] = state_tally.get(ctype, 0) + ccount + all_ctypes.add(ctype) + output[date][state] = state_tally + utils.write_json_file("results/breakdown-state.json", output) + + # write CSV per state + for state in data.STATES: + rows = [ + {"date": date} | {ctype: output[date].get(state, {}).get(ctype, 0) for ctype in all_ctypes} + for date in output + ] + with open(f"results/breakdown.{state}.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(rows[0].keys()) + writer.writerows(r.values() for r in rows) + + if __name__ == "__main__": LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() logging.basicConfig(level=LOGLEVEL, format="%(asctime)s %(levelname)s %(threadName)s %(message)s") diff --git a/code/nbn.py b/code/nbn.py index ddcd403577..14e78332a3 100644 --- a/code/nbn.py +++ b/code/nbn.py @@ -1,7 +1,7 @@ +import difflib import logging import urllib.parse -import difflib import diskcache import requests from requests.adapters import HTTPAdapter @@ -43,7 +43,7 @@ def get_nbn_loc_id(self, key: str, address: str) -> str: suggestions = sorted( suggestions, key=lambda s: difflib.SequenceMatcher(None, address, s["formattedAddress"]).ratio(), - reverse=True + reverse=True, ) if suggestions: loc_id = result["suggestions"][0]["id"] diff --git a/code/update_breakdown.py b/code/update_breakdown.py index ea73738f7b..3d78faa1b2 100755 --- a/code/update_breakdown.py +++ b/code/update_breakdown.py @@ -5,7 +5,7 @@ from datetime import datetime import utils -from adhoc_tools import get_tech_and_upgrade_breakdown +from adhoc_tools import generate_state_breakdown, get_tech_and_upgrade_breakdown from tabulate import tabulate @@ -40,3 +40,4 @@ def print_breakdowns(breakdowns): logging.basicConfig(level=logging.INFO) bd = update_breakdown() print_breakdowns(bd) + generate_state_breakdown()