diff --git a/notebooks/2024_12_MS_cybersecurity/config.yaml b/notebooks/2024_12_MS_cybersecurity/config.yaml new file mode 100644 index 0000000..913a06c --- /dev/null +++ b/notebooks/2024_12_MS_cybersecurity/config.yaml @@ -0,0 +1,59 @@ +--- +search_recipe: + scope_statements: + - "Child online safety and privacy" + - "Protecting children in the digital age" + - "Protecing children from online threats" + - "Protecting children from cyber bullying" + - "Ensuring children's safety online" + - "Ensuring children's privacy online" + - "Helping parents protect their children online" + - "Helping parents inform their children about online safety" + - "Helping parents monitor their children's online activity" + - "Helping children with digital addiction" + + keyword_sets: + - set_name: "Domain keywords" + keywords: + - "child" + - "children" + - "infant" + - "baby" + - "babies" + - "toddler" + - "toddlers" + - "pregnancy" + - "parent" + - "mother" + - "father" + - "family" + - "pupil" + - "teenager" + - "teen" + + - set_name: "Tech keywords" + keywords: + - "cybersecurity" + - "online safety" + - "data protection" + - "encryption" + - "malware" + - "phishing" + - "firewall" + - "identity theft" + - "cyber threats" + - "cyberattack" + - "cyber attack" + - "data breach" + - "social engineering" + - "threat intelligence" + - "penetration testing" + - "cyber defence" + - "cyber defense" + - "cyber forensics" + - "ransomware" + - "zero day exploit" + - "cloud security" + - "cyber bullying" + - "digital addiction" + - "inappropriate content" diff --git a/notebooks/2024_12_MS_cybersecurity/kk_01_cybersec_search.ipynb b/notebooks/2024_12_MS_cybersecurity/kk_01_cybersec_search.ipynb new file mode 100644 index 0000000..2c88bb4 --- /dev/null +++ b/notebooks/2024_12_MS_cybersecurity/kk_01_cybersec_search.ipynb @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from discovery_utils.getters import gtr\n", + "from discovery_utils.getters import crunchbase\n", + "from discovery_utils.utils import search\n", + "\n", + "from src import PROJECT_DIR\n", + "from src import VECTOR_DB_DIR\n", + "\n", + "OUTPUT_DIR = PROJECT_DIR / 'data/2024_12_MS/'\n", + "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gateway to Research search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "GTR = gtr.GtrGetter(vector_db_path=VECTOR_DB_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Search = search.SearchDataset(GTR, GTR.projects_enriched, \"config.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_df = Search.do_search()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_df[['title', 'url', '_score_keywords', '_score_vectors', '_score_avg']].head(50)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_df.iloc[0:1000].to_csv(OUTPUT_DIR / 'gtr_search_results.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check organisations and people\n", + "\n", + "Note, we can select only the-most-likely-to-be-relevant projects by thresholding the score and project start dates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_organisations_df = GTR.get_aggregated_organisations((\n", + " search_df\n", + " .query('_score_avg > 0.3')\n", + " .query('start > \"2015\"')\n", + "))\n", + "top_organisations_df.head(15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_persons_df = GTR.get_aggregated_persons((\n", + " search_df\n", + " .query('_score_avg > 0.3')\n", + " .query('start > \"2015\"')\n", + "))\n", + "top_persons_df.head(15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top_persons_df.to_csv(OUTPUT_DIR / 'gtr_top_persons.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also export the people and organisations relevant to each project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "GTR.get_project_stakeholders(search_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Crunchbase search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "CB = crunchbase.CrunchbaseGetter(vector_db_path=VECTOR_DB_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SearchCB = search.SearchDataset(CB, CB.organisations_enriched, \"config.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_cb_df = SearchCB.do_search()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_cb_df[['name', 'short_description', 'homepage_url', '_score_keywords', '_score_vectors', '_score_avg']]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search_cb_df.iloc[0:1000].to_csv(OUTPUT_DIR / 'cruchbase_search_results.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check relevant people" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Produce a table with people for all selected organisations\n", + "CB.get_organisations_people(search_cb_df).head(25)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cb_people_df = CB.get_aggregated_people(search_cb_df.query('_score_avg > 0.3')).reset_index()\n", + "cb_people_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cb_people_df.to_csv(OUTPUT_DIR / 'crunchbase_top_persons.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "discovery-mission-radar-prototyping-ejbE0IFh-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/2024_12_MS_cybersecurity/kk_02_cybersec_analysis.ipynb b/notebooks/2024_12_MS_cybersecurity/kk_02_cybersec_analysis.ipynb new file mode 100644 index 0000000..1bf79ca --- /dev/null +++ b/notebooks/2024_12_MS_cybersecurity/kk_02_cybersec_analysis.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Examples of using analysis functionalities\n", + "\n", + "Using discovery_utils analyses functionalities for investments data\n", + "\n", + "Here, we'll find companies using their categories, but you can also use search results from the process shown in cybersec_search.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from discovery_utils.utils import (\n", + " analysis_crunchbase,\n", + " analysis,\n", + " charts\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from src import PROJECT_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from discovery_utils.getters import crunchbase\n", + "CB = crunchbase.CrunchbaseGetter()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "included = ['y', 'Yes - CR', 'Yes - CR ', 'y-CR', 'Maybe - CR', 'maybe', 'Maybe']\n", + "\n", + "reviewed_data_df = (\n", + " pd.read_csv(PROJECT_DIR / \"data/2024_12_MS/Cybersecurity - Mission studio 2012-12-16 - crunchbase.csv\")\n", + " .rename(columns={\"RELEVANT?\": \"relevant\"})\n", + " .query(\"relevant in @included\")\n", + ")\n", + "matching_ids = reviewed_data_df.id.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(matching_ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can check these companies by querying the ids" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "matchings_orgs_df = CB.organisations_enriched.query(\"id in @matching_ids\")\n", + "matchings_orgs_df[['name', 'homepage_url', 'short_description']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now get the funding rounds for the matching companies - you can specify what type of funding rounds you need" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check what type of funding rounds there are\n", + "CB.unique_funding_round_types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "funding_rounds_df = (\n", + " CB.select_funding_rounds(org_ids=matching_ids, funding_round_types=[\"angel\", \"pre_seed\", \"seed\", \"series_a\"])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "investors_df = (\n", + " CB.funding_rounds_enriched\n", + " .query(\"funding_round_id in @funding_rounds_df.funding_round_id\")\n", + " .groupby(\"funding_round_id\")\n", + " .agg(investor_name=(\"investor_name\", list))\n", + " .reset_index()\n", + ")\n", + "\n", + "funding_rounds_df = (\n", + " funding_rounds_df\n", + " .drop(columns=[\"investor_name\"])\n", + " .merge(investors_df, on=\"funding_round_id\", how=\"left\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(funding_rounds_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "funding_rounds_df.to_csv(PROJECT_DIR / \"data/2024_12_MS/crunchbase_funding_rounds.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's generate some basic time series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_df = analysis_crunchbase.get_timeseries(matchings_orgs_df, funding_rounds_df, period='year', min_year=2014, max_year=2024)\n", + "ts_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = charts.ts_bar(\n", + " ts_df,\n", + " variable='raised_amount_gbp_total',\n", + " variable_title=\"Raised amount, £ millions\",\n", + " category_column=\"_category\",\n", + ")\n", + "charts.configure_plots(fig, chart_title=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look into breakdown of deal types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "deals_df, deal_counts_df = analysis_crunchbase.get_funding_by_year_and_range(funding_rounds_df, 2014, 2024)\n", + "aggregated_funding_types_df = analysis_crunchbase.aggregate_by_funding_round_types(funding_rounds_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "aggregated_funding_types_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "deals_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "deal_counts_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "analysis_crunchbase.chart_investment_types(aggregated_funding_types_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "analysis_crunchbase.chart_investment_types_counts(aggregated_funding_types_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "analysis_crunchbase.chart_deal_sizes(deals_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "analysis_crunchbase.chart_deal_sizes_counts(deal_counts_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "discovery-mission-radar-prototyping-ejbE0IFh-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/2024_12_MS_cybersecurity/kk_03_cybersec_ukri.ipynb b/notebooks/2024_12_MS_cybersecurity/kk_03_cybersec_ukri.ipynb new file mode 100644 index 0000000..39542c0 --- /dev/null +++ b/notebooks/2024_12_MS_cybersecurity/kk_03_cybersec_ukri.ipynb @@ -0,0 +1,1094 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# UKRI funding trends" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from discovery_utils.utils import (\n", + " analysis_gtr,\n", + " analysis,\n", + " charts\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from src import PROJECT_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-12-20 12:45:46,283 - discovery_utils.getters.gtr - INFO - Checking for latest version of data in S3 bucket: discovery-iss\n", + "2024-12-20 12:45:46,767 - discovery_utils.getters.gtr - INFO - Latest version found: GtR_20241215\n" + ] + } + ], + "source": [ + "from discovery_utils.getters import gtr\n", + "GTR = gtr.GtrGetter()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "included = ['y', 'Maybe?', 'yes']\n", + "\n", + "reviewed_data_df = (\n", + " pd.read_csv(PROJECT_DIR / \"data/2024_12_MS/Cybersecurity - Mission studio 2012-12-16 - ukri.csv\")\n", + " .rename(columns={\"RELEVANT?\": \"relevant\"})\n", + " .query(\"relevant in @included\")\n", + ")\n", + "matching_ids = reviewed_data_df.id.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(matching_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleurlamountstartend
2401Hark: A VR based interactive narrative game / ...https://gtr.ukri.org/projects?ref=10055721674882023-04-012023-12-31
4078Project Safety Net: The risks and opportunitie...https://gtr.ukri.org/projects?ref=288174702023-10-012027-09-30
8477Digital Defence & Activism Lessons: Equipp...https://gtr.ukri.org/projects?ref=AH/T008938/1793632020-08-172022-11-11
14526UnBias: Emancipating Users Against Algorithmic...https://gtr.ukri.org/projects?ref=EP/N02785X/111405932016-09-012019-05-31
15485Kidslox 2014; setting boundaries in the age of...https://gtr.ukri.org/projects?ref=7205671724522015-01-012015-12-31
172792PS - Prevent & Protect Through Supporthttps://gtr.ukri.org/projects?ref=100478673829892022-10-012025-09-30
19029ISIS: Protecting children in online social net...https://gtr.ukri.org/projects?ref=EP/F035438/14304672008-06-012011-10-18
26148Using Natural Language Processing and Machine ...https://gtr.ukri.org/projects?ref=225344502019-10-012022-05-31
26425Dynamic, Real time, On-demand Personalisation ...https://gtr.ukri.org/projects?ref=EP/R033838/16826962018-09-012021-02-28
29301Supporting the use of digital media in researc...https://gtr.ukri.org/projects?ref=ES/J009814/1476022011-11-012012-10-31
34745#metoo and pre-teen relationship cultures: Inv...https://gtr.ukri.org/projects?ref=243379902020-10-012025-04-11
37304BULLYING, CYBERBULLYING, AND PUPIL SAFETY AND ...https://gtr.ukri.org/projects?ref=ES/K00591X/1284722012-09-032015-09-02
41555Collaborative tools to assist carers with prot...https://gtr.ukri.org/projects?ref=180896302016-10-012017-01-19
46147Understanding young people's online sexual beh...https://gtr.ukri.org/projects?ref=288800202023-10-012027-03-31
487032PS - Prevent & Protect Through Supporthttps://gtr.ukri.org/projects?ref=100805642377652022-10-012025-09-30
56269Creating The First Safe Internet Generation – ...https://gtr.ukri.org/projects?ref=133586156002018-02-012018-03-31
58641It depends: Exploring the role of context in s...https://gtr.ukri.org/projects?ref=207211002018-10-012022-09-30
60177How Much is Too Much? Leveraging Existing and ...https://gtr.ukri.org/projects?ref=ES/T008709/12317692020-06-012023-05-31
62329Learning hub enhancements to support schools w...https://gtr.ukri.org/projects?ref=62792716392020-06-012021-03-31
64100SENTINEL: Securing Age Estimation and Digital ...https://gtr.ukri.org/projects?ref=100887403982362023-12-012025-05-31
73943Staying Safe in an Online World- Adolescents a...https://gtr.ukri.org/projects?ref=291938302024-10-012028-12-31
75451Promoting Digital Resilience in Adolescencehttps://gtr.ukri.org/projects?ref=242591702020-10-012025-04-04
79089Live interactive online classes for schoolshttps://gtr.ukri.org/projects?ref=58028748652020-05-012021-02-28
81943ISIS: Protecting children in online social net...https://gtr.ukri.org/projects?ref=EP/F035071/1338262008-06-262011-10-18
86913CyberHeels – AI powered cyber security educati...https://gtr.ukri.org/projects?ref=10078031320002023-05-012023-09-30
87660ISIS: Protecting children in online social net...https://gtr.ukri.org/projects?ref=EP/F035454/12603252008-10-092011-10-08
884490-3-year-old children's language and literacy ...https://gtr.ukri.org/projects?ref=ES/W001020/16886542022-06-062024-12-05
91192Harnessing digital data to study 21st-century ...https://gtr.ukri.org/projects?ref=MR/X028801/11549122023-04-012024-09-30
92805Safe Internet surfing with an intelligent chil...https://gtr.ukri.org/projects?ref=105366305022019-04-012019-07-31
96067CHIPgroup - Protecting Children Online from Cy...https://gtr.ukri.org/projects?ref=87581124102020-12-012022-03-31
100654Understanding Design Features of Family Apps a...https://gtr.ukri.org/projects?ref=211600002018-10-012022-11-30
118331Investigating the balance of privacy versus se...https://gtr.ukri.org/projects?ref=273855402022-10-012026-09-18
118371ProTechThem: Building Awareness for Safer and ...https://gtr.ukri.org/projects?ref=ES/V011278/17709512021-06-012025-09-30
126569Analysing and Preventing Unintended Privacy Vi...https://gtr.ukri.org/projects?ref=209789802018-09-012022-03-31
128562Play2Secure: An AI enabled personalised cybers...https://gtr.ukri.org/projects?ref=105354149502019-04-012019-07-31
129349Online Safety in Autistic Childrenhttps://gtr.ukri.org/projects?ref=212488302018-01-152021-07-16
133660'Tracking People':controversies and challengeshttps://gtr.ukri.org/projects?ref=AH/N005929/1359942016-10-012019-03-31
135456Social Media Mechanisms Affecting Adolescent M...https://gtr.ukri.org/projects?ref=MR/X034925/115869882024-03-012028-02-29
136070Reducing Online Harms via Content Regulation: ...https://gtr.ukri.org/projects?ref=243989202020-10-012025-08-31
140149Protecting Children and their Data Online: Reg...https://gtr.ukri.org/projects?ref=903362779602021-02-012022-03-31
141429Adolescent Mental Health and Development in th...https://gtr.ukri.org/projects?ref=MR/W002450/139350732021-09-012025-08-31
145252Cyberstalking Risk Indicatorhttps://gtr.ukri.org/projects?ref=75046250002013-04-012013-10-31
145589It depends: Exploring the role of context in s...https://gtr.ukri.org/projects?ref=281604702018-10-012022-09-30
148473EPSRC-Royal Society fellowship engagement (201...https://gtr.ukri.org/projects?ref=EP/L003406/11901672013-10-072017-04-06
148907Digital Personhood: Charting the digital lifespanhttps://gtr.ukri.org/projects?ref=EP/L00383X/17042202013-08-282016-08-27
151031Cyber-Security across the Life Span (cSaLSA)https://gtr.ukri.org/projects?ref=EP/P011454/15323972017-02-012020-09-02
154018(In)visible Women: Experiences of and Response...https://gtr.ukri.org/projects?ref=274601402022-09-262026-09-30
155766Testing for Transparency: designing privacy-in...https://gtr.ukri.org/projects?ref=227882802019-10-012023-12-30
159110Teaching for Digital Citizenship: Digital ethi...https://gtr.ukri.org/projects?ref=ES/X002756/16015602022-12-012025-08-31
160728Combatting gendered, sexual risks and harms on...https://gtr.ukri.org/projects?ref=AH/W000423/11730092021-02-132022-06-28
\n", + "
" + ], + "text/plain": [ + " title \\\n", + "2401 Hark: A VR based interactive narrative game / ... \n", + "4078 Project Safety Net: The risks and opportunitie... \n", + "8477 Digital Defence & Activism Lessons: Equipp... \n", + "14526 UnBias: Emancipating Users Against Algorithmic... \n", + "15485 Kidslox 2014; setting boundaries in the age of... \n", + "17279 2PS - Prevent & Protect Through Support \n", + "19029 ISIS: Protecting children in online social net... \n", + "26148 Using Natural Language Processing and Machine ... \n", + "26425 Dynamic, Real time, On-demand Personalisation ... \n", + "29301 Supporting the use of digital media in researc... \n", + "34745 #metoo and pre-teen relationship cultures: Inv... \n", + "37304 BULLYING, CYBERBULLYING, AND PUPIL SAFETY AND ... \n", + "41555 Collaborative tools to assist carers with prot... \n", + "46147 Understanding young people's online sexual beh... \n", + "48703 2PS - Prevent & Protect Through Support \n", + "56269 Creating The First Safe Internet Generation – ... \n", + "58641 It depends: Exploring the role of context in s... \n", + "60177 How Much is Too Much? Leveraging Existing and ... \n", + "62329 Learning hub enhancements to support schools w... \n", + "64100 SENTINEL: Securing Age Estimation and Digital ... \n", + "73943 Staying Safe in an Online World- Adolescents a... \n", + "75451 Promoting Digital Resilience in Adolescence \n", + "79089 Live interactive online classes for schools \n", + "81943 ISIS: Protecting children in online social net... \n", + "86913 CyberHeels – AI powered cyber security educati... \n", + "87660 ISIS: Protecting children in online social net... \n", + "88449 0-3-year-old children's language and literacy ... \n", + "91192 Harnessing digital data to study 21st-century ... \n", + "92805 Safe Internet surfing with an intelligent chil... \n", + "96067 CHIPgroup - Protecting Children Online from Cy... \n", + "100654 Understanding Design Features of Family Apps a... \n", + "118331 Investigating the balance of privacy versus se... \n", + "118371 ProTechThem: Building Awareness for Safer and ... \n", + "126569 Analysing and Preventing Unintended Privacy Vi... \n", + "128562 Play2Secure: An AI enabled personalised cybers... \n", + "129349 Online Safety in Autistic Children \n", + "133660 'Tracking People':controversies and challenges \n", + "135456 Social Media Mechanisms Affecting Adolescent M... \n", + "136070 Reducing Online Harms via Content Regulation: ... \n", + "140149 Protecting Children and their Data Online: Reg... \n", + "141429 Adolescent Mental Health and Development in th... \n", + "145252 Cyberstalking Risk Indicator \n", + "145589 It depends: Exploring the role of context in s... \n", + "148473 EPSRC-Royal Society fellowship engagement (201... \n", + "148907 Digital Personhood: Charting the digital lifespan \n", + "151031 Cyber-Security across the Life Span (cSaLSA) \n", + "154018 (In)visible Women: Experiences of and Response... \n", + "155766 Testing for Transparency: designing privacy-in... \n", + "159110 Teaching for Digital Citizenship: Digital ethi... \n", + "160728 Combatting gendered, sexual risks and harms on... \n", + "\n", + " url amount start \\\n", + "2401 https://gtr.ukri.org/projects?ref=10055721 67488 2023-04-01 \n", + "4078 https://gtr.ukri.org/projects?ref=2881747 0 2023-10-01 \n", + "8477 https://gtr.ukri.org/projects?ref=AH/T008938/1 79363 2020-08-17 \n", + "14526 https://gtr.ukri.org/projects?ref=EP/N02785X/1 1140593 2016-09-01 \n", + "15485 https://gtr.ukri.org/projects?ref=720567 172452 2015-01-01 \n", + "17279 https://gtr.ukri.org/projects?ref=10047867 382989 2022-10-01 \n", + "19029 https://gtr.ukri.org/projects?ref=EP/F035438/1 430467 2008-06-01 \n", + "26148 https://gtr.ukri.org/projects?ref=2253445 0 2019-10-01 \n", + "26425 https://gtr.ukri.org/projects?ref=EP/R033838/1 682696 2018-09-01 \n", + "29301 https://gtr.ukri.org/projects?ref=ES/J009814/1 47602 2011-11-01 \n", + "34745 https://gtr.ukri.org/projects?ref=2433799 0 2020-10-01 \n", + "37304 https://gtr.ukri.org/projects?ref=ES/K00591X/1 28472 2012-09-03 \n", + "41555 https://gtr.ukri.org/projects?ref=1808963 0 2016-10-01 \n", + "46147 https://gtr.ukri.org/projects?ref=2888002 0 2023-10-01 \n", + "48703 https://gtr.ukri.org/projects?ref=10080564 237765 2022-10-01 \n", + "56269 https://gtr.ukri.org/projects?ref=133586 15600 2018-02-01 \n", + "58641 https://gtr.ukri.org/projects?ref=2072110 0 2018-10-01 \n", + "60177 https://gtr.ukri.org/projects?ref=ES/T008709/1 231769 2020-06-01 \n", + "62329 https://gtr.ukri.org/projects?ref=62792 71639 2020-06-01 \n", + "64100 https://gtr.ukri.org/projects?ref=10088740 398236 2023-12-01 \n", + "73943 https://gtr.ukri.org/projects?ref=2919383 0 2024-10-01 \n", + "75451 https://gtr.ukri.org/projects?ref=2425917 0 2020-10-01 \n", + "79089 https://gtr.ukri.org/projects?ref=58028 74865 2020-05-01 \n", + "81943 https://gtr.ukri.org/projects?ref=EP/F035071/1 33826 2008-06-26 \n", + "86913 https://gtr.ukri.org/projects?ref=10078031 32000 2023-05-01 \n", + "87660 https://gtr.ukri.org/projects?ref=EP/F035454/1 260325 2008-10-09 \n", + "88449 https://gtr.ukri.org/projects?ref=ES/W001020/1 688654 2022-06-06 \n", + "91192 https://gtr.ukri.org/projects?ref=MR/X028801/1 154912 2023-04-01 \n", + "92805 https://gtr.ukri.org/projects?ref=105366 30502 2019-04-01 \n", + "96067 https://gtr.ukri.org/projects?ref=87581 12410 2020-12-01 \n", + "100654 https://gtr.ukri.org/projects?ref=2116000 0 2018-10-01 \n", + "118331 https://gtr.ukri.org/projects?ref=2738554 0 2022-10-01 \n", + "118371 https://gtr.ukri.org/projects?ref=ES/V011278/1 770951 2021-06-01 \n", + "126569 https://gtr.ukri.org/projects?ref=2097898 0 2018-09-01 \n", + "128562 https://gtr.ukri.org/projects?ref=105354 14950 2019-04-01 \n", + "129349 https://gtr.ukri.org/projects?ref=2124883 0 2018-01-15 \n", + "133660 https://gtr.ukri.org/projects?ref=AH/N005929/1 35994 2016-10-01 \n", + "135456 https://gtr.ukri.org/projects?ref=MR/X034925/1 1586988 2024-03-01 \n", + "136070 https://gtr.ukri.org/projects?ref=2439892 0 2020-10-01 \n", + "140149 https://gtr.ukri.org/projects?ref=90336 277960 2021-02-01 \n", + "141429 https://gtr.ukri.org/projects?ref=MR/W002450/1 3935073 2021-09-01 \n", + "145252 https://gtr.ukri.org/projects?ref=750462 5000 2013-04-01 \n", + "145589 https://gtr.ukri.org/projects?ref=2816047 0 2018-10-01 \n", + "148473 https://gtr.ukri.org/projects?ref=EP/L003406/1 190167 2013-10-07 \n", + "148907 https://gtr.ukri.org/projects?ref=EP/L00383X/1 704220 2013-08-28 \n", + "151031 https://gtr.ukri.org/projects?ref=EP/P011454/1 532397 2017-02-01 \n", + "154018 https://gtr.ukri.org/projects?ref=2746014 0 2022-09-26 \n", + "155766 https://gtr.ukri.org/projects?ref=2278828 0 2019-10-01 \n", + "159110 https://gtr.ukri.org/projects?ref=ES/X002756/1 601560 2022-12-01 \n", + "160728 https://gtr.ukri.org/projects?ref=AH/W000423/1 173009 2021-02-13 \n", + "\n", + " end \n", + "2401 2023-12-31 \n", + "4078 2027-09-30 \n", + "8477 2022-11-11 \n", + "14526 2019-05-31 \n", + "15485 2015-12-31 \n", + "17279 2025-09-30 \n", + "19029 2011-10-18 \n", + "26148 2022-05-31 \n", + "26425 2021-02-28 \n", + "29301 2012-10-31 \n", + "34745 2025-04-11 \n", + "37304 2015-09-02 \n", + "41555 2017-01-19 \n", + "46147 2027-03-31 \n", + "48703 2025-09-30 \n", + "56269 2018-03-31 \n", + "58641 2022-09-30 \n", + "60177 2023-05-31 \n", + "62329 2021-03-31 \n", + "64100 2025-05-31 \n", + "73943 2028-12-31 \n", + "75451 2025-04-04 \n", + "79089 2021-02-28 \n", + "81943 2011-10-18 \n", + "86913 2023-09-30 \n", + "87660 2011-10-08 \n", + "88449 2024-12-05 \n", + "91192 2024-09-30 \n", + "92805 2019-07-31 \n", + "96067 2022-03-31 \n", + "100654 2022-11-30 \n", + "118331 2026-09-18 \n", + "118371 2025-09-30 \n", + "126569 2022-03-31 \n", + "128562 2019-07-31 \n", + "129349 2021-07-16 \n", + "133660 2019-03-31 \n", + "135456 2028-02-29 \n", + "136070 2025-08-31 \n", + "140149 2022-03-31 \n", + "141429 2025-08-31 \n", + "145252 2013-10-31 \n", + "145589 2022-09-30 \n", + "148473 2017-04-06 \n", + "148907 2016-08-27 \n", + "151031 2020-09-02 \n", + "154018 2026-09-30 \n", + "155766 2023-12-30 \n", + "159110 2025-08-31 \n", + "160728 2022-06-28 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matchings_projects_df = (\n", + " GTR.projects_enriched.query(\"id in @matching_ids\")\n", + " .assign(text = lambda x: x.title + \" \" + x.abstractText)\n", + ")\n", + "matchings_projects_df[['title', 'url', 'amount', 'start', 'end']]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "matchings_projects_df[['id', 'title', 'url', 'amount', 'start', 'end']].to_csv(PROJECT_DIR / \"data/2024_12_MS/gtr_relevant_projects.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "45" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(matchings_projects_df.query(\"start > '2013'\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
time_periodyearn_projectsamountamount_median
02014-01-01201400.0000000.0
12015-01-01201510.172452172452.0
22016-01-01201631.17658735994.0
32017-01-01201710.532397532397.0
42018-01-01201860.6982960.0
52019-01-01201940.0454527475.0
62020-01-01202080.47004642024.5
72021-01-01202145.156993524455.5
82022-01-01202261.910968310377.0
92023-01-01202360.65263649744.0
102024-01-01202421.586988793494.0
\n", + "
" + ], + "text/plain": [ + " time_period year n_projects amount amount_median\n", + "0 2014-01-01 2014 0 0.000000 0.0\n", + "1 2015-01-01 2015 1 0.172452 172452.0\n", + "2 2016-01-01 2016 3 1.176587 35994.0\n", + "3 2017-01-01 2017 1 0.532397 532397.0\n", + "4 2018-01-01 2018 6 0.698296 0.0\n", + "5 2019-01-01 2019 4 0.045452 7475.0\n", + "6 2020-01-01 2020 8 0.470046 42024.5\n", + "7 2021-01-01 2021 4 5.156993 524455.5\n", + "8 2022-01-01 2022 6 1.910968 310377.0\n", + "9 2023-01-01 2023 6 0.652636 49744.0\n", + "10 2024-01-01 2024 2 1.586988 793494.0" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts_df = (\n", + " analysis_gtr.get_timeseries(matchings_projects_df, 'year', 2014, 2024)\n", + " .assign(amount = lambda df: df.amount / 1_000_000)\n", + ")\n", + "ts_df" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig = charts.ts_bar(\n", + " ts_df,\n", + " variable='n_projects',\n", + " variable_title=\"Number of projects\",\n", + " category_column=\"_category\",\n", + ")\n", + "charts.configure_plots(fig, chart_title=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fig = charts.ts_bar(\n", + " ts_df,\n", + " variable='amount',\n", + " variable_title=\"Amount, £ millions\",\n", + " category_column=\"_category\",\n", + ")\n", + "charts.configure_plots(fig, chart_title=\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "discovery-mission-radar-prototyping-ejbE0IFh-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/2024_12_MS_cybersecurity/kk_04_cybersec_vis.ipynb b/notebooks/2024_12_MS_cybersecurity/kk_04_cybersec_vis.ipynb new file mode 100644 index 0000000..0f50d7b --- /dev/null +++ b/notebooks/2024_12_MS_cybersecurity/kk_04_cybersec_vis.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualising the company landscape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}