Skip to content

Commit

Permalink
Plot counts per recombinant lineage/t-SNE cluster
Browse files Browse the repository at this point in the history
Related to #92
  • Loading branch information
huddlej committed Apr 26, 2024
1 parent 43916cd commit 5d438e3
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 1 deletion.
1 change: 1 addition & 0 deletions sars-cov-2-nextstrain-2022-2023/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,7 @@ rule sarscov2_test_create_notebook_docs:
PCA_Supplement_PNG="manuscript/figures/sarscov2-test-pca-by-{clade_membership}-clade.png",
MDS_Supplement="manuscript/figures/sarscov2-test-mds-by-{clade_membership}-clade.html",
MDS_Supplement_PNG="manuscript/figures/sarscov2-test-mds-by-{clade_membership}-clade.png",
tsne_recombinant_counts="manuscript/figures/sarscov2-test-tsne-recombinant-counts-{clade_membership}.png",
params:
clade_membership=lambda wildcards: wildcards.clade_membership,
pca_label=lambda wildcards: f"pca_label_for_{wildcards.clade_membership}",
Expand Down
161 changes: 160 additions & 1 deletion sars-cov-2-nextstrain/2022-03-29-final-figures.py.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@
"static_pca_chart = snakemake.output.PCA_Supplement_PNG\n",
"\n",
"interactive_mds_chart = snakemake.output.MDS_Supplement\n",
"static_mds_chart = snakemake.output.MDS_Supplement_PNG"
"static_mds_chart = snakemake.output.MDS_Supplement_PNG\n",
"\n",
"output_tsne_recombinant_counts_png = snakemake.output.tsne_recombinant_counts"
]
},
{
Expand Down Expand Up @@ -1132,6 +1134,163 @@
"full_chart_by_cluster.save(static_chart_by_clusters, format=\"png\", scale_factor=2.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ca62b00",
"metadata": {},
"outputs": [],
"source": [
"tsne_label_column = [column for column in embeddings_df.columns if column.startswith(\"t-sne_label\")][0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8059ea3a",
"metadata": {},
"outputs": [],
"source": [
"tsne_label_column"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a15fbda6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2f3af09",
"metadata": {},
"outputs": [],
"source": [
"embeddings_df[tsne_label_column] != -1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a548586a",
"metadata": {},
"outputs": [],
"source": [
"(embeddings_df[\"Nextclade_pango_collapsed\"].str.startswith(\"X\").fillna(False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bfa689b3",
"metadata": {},
"outputs": [],
"source": [
"embeddings_df.loc[embeddings_df[\"is_internal_node\"] == False]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b249281c",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts = embeddings_df.loc[\n",
" (\n",
" (embeddings_df[\"is_internal_node\"] == False) &\n",
" (embeddings_df[\"Nextclade_pango_collapsed\"].str.startswith(\"X\").fillna(False)) &\n",
" (embeddings_df[tsne_label_column] != -1)\n",
" ),\n",
" [\n",
" \"Nextclade_pango_collapsed\",\n",
" tsne_label_column,\n",
" ]\n",
"].value_counts().reset_index(name=\"count\").query(\"count >= 10\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae75274e",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0670bb6",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts_chart = alt.Chart(tsne_recombinant_counts).mark_circle().encode(\n",
" x=alt.X(\"Nextclade_pango_collapsed:N\", title=\"Recombinant Pango lineage\"),\n",
" y=alt.Y(f\"{tsne_label_column}:N\", title=\"Cluster from t-SNE\"),\n",
" size=\"count:Q\",\n",
" tooltip=[\"Nextclade_pango_collapsed:N\", f\"{tsne_label_column}:N\", \"count:Q\"],\n",
").properties(\n",
" width=600,\n",
" height=600,\n",
")\n",
"tsne_recombinant_counts_chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f4dad63",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts_chart.save(output_tsne_recombinant_counts_png, format=\"png\", scale_factor=2.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee0c9198",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts[\"Nextclade_pango_collapsed\"].value_counts().shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56c46ef2",
"metadata": {},
"outputs": [],
"source": [
"(tsne_recombinant_counts[\"Nextclade_pango_collapsed\"].value_counts() == 1).sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0b7d48f",
"metadata": {},
"outputs": [],
"source": [
"tsne_recombinant_counts[tsne_label_column].value_counts().shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "942f5d1f",
"metadata": {},
"outputs": [],
"source": [
"(tsne_recombinant_counts[tsne_label_column].value_counts() == 1).sum()"
]
},
{
"cell_type": "markdown",
"id": "e303df26",
Expand Down

0 comments on commit 5d438e3

Please sign in to comment.