Skip to content

Commit

Permalink
Clean up uber-lyft notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Mar 20, 2024
1 parent 9eb8cd2 commit c52a121
Showing 1 changed file with 26 additions and 16 deletions.
42 changes: 26 additions & 16 deletions uber-lyft.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,26 @@
"This data is available here:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09728a96-0c84-4198-ab52-4dcdfd704606",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# We can read a small piece of data with pandas\n",
"# but this is slow and not scalable\n",
"\n",
"import pandas as pd\n",
"\n",
"df = pd.read_parquet(\n",
" \"s3://coiled-data/uber/part.0.parquet\",\n",
")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -31,12 +51,12 @@
},
"outputs": [],
"source": [
"import dask.distributed\n",
"import coiled\n",
"\n",
"cluster = coiled.Cluster(\n",
" n_workers=30,\n",
" region=\"us-east-2\", # start workers close to data to minimize costs\n",
" worker_memory=\"16 GiB\",\n",
" region=\"us-east-2\",\n",
")\n",
"\n",
"client = cluster.get_client()"
Expand All @@ -52,12 +72,10 @@
"import dask\n",
"import dask.dataframe as dd\n",
"\n",
"dask.config.set({\"dataframe.convert-string\": True}) # use PyArrow strings by default\n",
"\n",
"df = dd.read_parquet(\n",
" \"s3://coiled-datasets/uber-lyft-tlc/\",\n",
" \"s3://coiled-data/uber/\",\n",
")\n",
"df.head()"
"df"
]
},
{
Expand All @@ -76,7 +94,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "95a96932-2109-447c-9eb3-0d235de5e973",
"id": "8c589e9f-f3e9-41d3-b34c-ca42fee44729",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -85,14 +103,6 @@
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1febb99a-d608-4b12-8789-c6c7cf7f8d25",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -291,7 +301,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down

0 comments on commit c52a121

Please sign in to comment.