Skip to content

Commit

Permalink
Plot counts per MCC/t-SNE cluster pair for HA/NA
Browse files Browse the repository at this point in the history
Related to #92
  • Loading branch information
huddlej committed Apr 26, 2024
1 parent 6a4a46f commit 91253db
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 0 deletions.
153 changes: 153 additions & 0 deletions ha-na-nextstrain/2022-02-23-seasonal-flu-ha-na-reassortment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
"output_mds_png = snakemake.output.HANAFullChartBrushableMDSPNG\n",
"output_tsne_html = snakemake.output.HANAFullChartBrushableTSNEHTML\n",
"output_tsne_png = snakemake.output.HANAFullChartBrushableTSNEPNG\n",
"output_tsne_mcc_counts_png = snakemake.output.tsne_mcc_counts\n",
"output_umap_html = snakemake.output.HANAFullChartBrushableUMAPHTML\n",
"output_umap_png = snakemake.output.HANAFullChartBrushableUMAPPNG\n",
"output_ha_na_html = snakemake.output.HANAChartHTML\n",
Expand Down Expand Up @@ -964,6 +965,158 @@
"tsne_final_chart.save(output_tsne_png, format=\"png\", scale_factor=2.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aacebf2a",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts = embeddings_df.loc[\n",
" (\n",
" (embeddings_df[\"is_internal_node\"] == False) &\n",
" (embeddings_df[\"MCC\"] != \"unassigned\") &\n",
" (embeddings_df[\"t-sne_concatenated_label\"] != -1)\n",
" ),\n",
" [\"MCC\", \"t-sne_concatenated_label\"]\n",
"].value_counts().reset_index(name=\"count\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39522c1f",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts[\"MCC_number\"] = tsne_MCC_counts[\"MCC\"].apply(lambda mcc: int(mcc.split(\"_\")[-1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e794f98",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts = tsne_MCC_counts.sort_values([\"t-sne_concatenated_label\", \"MCC_number\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "246332c3",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts[\"t-sne_concatenated_label\"] = tsne_MCC_counts[\"t-sne_concatenated_label\"].astype(int).astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b586e9b",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cde59df4",
"metadata": {},
"outputs": [],
"source": [
"tsne_mcc_counts_chart = alt.Chart(tsne_MCC_counts.query(\"count >= 10\")).mark_circle().encode(\n",
" x=alt.X(\"MCC:N\", title=\"Reassortment group from TreeKnit\", sort={\"field\": \"MCC_number\"}),\n",
" y=alt.Y(\"t-sne_concatenated_label:N\", title=\"Cluster from t-SNE of HA and NA\", sort=None),\n",
" size=\"count:Q\",\n",
")\n",
"tsne_mcc_counts_chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b1ea4e2",
"metadata": {},
"outputs": [],
"source": [
"tsne_mcc_counts_chart.save(output_tsne_mcc_counts_png, format=\"png\", scale_factor=2.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "efb96a6b",
"metadata": {},
"outputs": [],
"source": [
"tsne_MCC_counts[\"MCC\"].drop_duplicates().shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a1459d4",
"metadata": {},
"outputs": [],
"source": [
"embeddings_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3b31327",
"metadata": {},
"outputs": [],
"source": [
"tsne_clade_counts = embeddings_df.loc[\n",
" (\n",
" (embeddings_df[\"is_internal_node\"] == False) &\n",
" (embeddings_df[\"t-sne_concatenated_label\"] != -1)\n",
" ),\n",
" [\"clade_membership\", \"t-sne_concatenated_label\"]\n",
"].value_counts().reset_index(name=\"count\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c828c993",
"metadata": {},
"outputs": [],
"source": [
"tsne_clade_counts[\"t-sne_concatenated_label\"] = tsne_clade_counts[\"t-sne_concatenated_label\"].astype(int).astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2c24b8a0",
"metadata": {},
"outputs": [],
"source": [
"tsne_clade_counts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1550932d",
"metadata": {},
"outputs": [],
"source": [
"alt.Chart(tsne_clade_counts.query(\"count >= 10\")).mark_circle().encode(\n",
" x=\"clade_membership:N\",\n",
" y=\"t-sne_concatenated_label:N\",\n",
" size=\"count:Q\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "ce97a775",
Expand Down
1 change: 1 addition & 0 deletions ha-na-nextstrain/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ rule seasonal_flu_reassortment_create_notebook_docs:
HANAFullChartBrushableMDSPNG = "manuscript/figures/flu-2016-2018-ha-na-mds-by-cluster.png",
HANAFullChartBrushableTSNEHTML = "manuscript/figures/flu-2016-2018-ha-na-tsne-by-cluster.html",
HANAFullChartBrushableTSNEPNG = "manuscript/figures/flu-2016-2018-ha-na-tsne-by-cluster.png",
tsne_mcc_counts = "manuscript/figures/flu-2016-2018-ha-na-tsne-mcc-counts.png",
HANAFullChartBrushableUMAPHTML = "manuscript/figures/flu-2016-2018-ha-na-umap-by-cluster.html",
HANAFullChartBrushableUMAPPNG = "manuscript/figures/flu-2016-2018-ha-na-umap-by-cluster.png",
params:
Expand Down

0 comments on commit 91253db

Please sign in to comment.