From 1be5604abe4ad5df76b6d6b68cb5cc6f0309d867 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 20 Nov 2024 11:16:52 +0800
Subject: [PATCH] DOC: particlefile metadata (fixes #1701)

---
 docs/examples/tutorial_output.ipynb           | 111 ++++++++++--------
 .../examples/tutorial_parcels_structure.ipynb |   2 +-
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/docs/examples/tutorial_output.ipynb b/docs/examples/tutorial_output.ipynb
index db5f62586..31ab1360d 100644
--- a/docs/examples/tutorial_output.ipynb
+++ b/docs/examples/tutorial_output.ipynb
@@ -21,7 +21,7 @@
     "- [**Plotting**](#Plotting)\n",
     "- [**Animations**](#Animations)\n",
     "\n",
-    "First we need to create some parcels output to analyze. We simulate a set of particles using the setup described in the [Delay start tutorial](https://docs.oceanparcels.org/en/latest/examples/tutorial_delaystart.html).\n"
+    "First we need to create some parcels output to analyze. We simulate a set of particles using the setup described in the [Delay start tutorial](https://docs.oceanparcels.org/en/latest/examples/tutorial_delaystart.html). We will also add some user defined metadata to the output file.\n"
    ]
   },
   {
@@ -30,7 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from datetime import timedelta\n",
+    "from datetime import datetime, timedelta\n",
     "\n",
     "import numpy as np\n",
     "\n",
@@ -41,16 +41,7 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO: Output files are stored in Output.zarr.\n",
-      "100%|██████████| 86400.0/86400.0 [00:00<00:00, 93792.23it/s] \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "example_dataset_folder = parcels.download_example_dataset(\"Peninsula_data\")\n",
     "fieldset = parcels.FieldSet.from_parcels(\n",
@@ -68,8 +59,40 @@
     "    fieldset=fieldset, pclass=parcels.JITParticle, lon=lon, lat=lat, time=time\n",
     ")\n",
     "\n",
-    "output_file = pset.ParticleFile(name=\"Output.zarr\", outputdt=timedelta(hours=2))\n",
-    "\n",
+    "output_file = pset.ParticleFile(name=\"Output.zarr\", outputdt=timedelta(hours=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Parcels saves some metadata in the output file with every simulation (Parcels version, CF convention information, etc.). This metadata is just a dictionary which is propogated to `xr.Dataset(attrs=...)` and is stored in the `.metadata` attribute. The user is free to manipulate this dictionary to add any custom, xarray-compatible metadata relevant to their simulation. Here we add a custom metadata field `date_created` to the output file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_file.metadata[\"date_created\"] = datetime.now().isoformat()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO: Output files are stored in Output.zarr.\n",
+      "100%|██████████| 86400.0/86400.0 [00:01<00:00, 82356.55it/s]\n"
+     ]
+    }
+   ],
+   "source": [
     "pset.execute(\n",
     "    parcels.AdvectionRK4,\n",
     "    runtime=timedelta(hours=24),\n",
@@ -85,35 +108,36 @@
    "source": [
     "## Reading the output file\n",
     "\n",
-    "Parcels exports output trajectories in `zarr` [format](https://zarr.readthedocs.io/en/stable/). Files in `zarr` are typically _much_ smaller in size than netcdf, although may be slightly more challenging to handle (but `xarray` has a fairly seamless `open_zarr()` method).\n"
+    "Parcels exports output trajectories in `zarr` [format](https://zarr.readthedocs.io/en/stable/). Files in `zarr` are typically _much_ smaller in size than netcdf, although may be slightly more challenging to handle (but `xarray` has a fairly seamless `open_zarr()` method). Note when when we display the dataset we cam see our custom metadata field `date_created`.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "<xarray.Dataset>\n",
+      "<xarray.Dataset> Size: 3kB\n",
       "Dimensions:     (trajectory: 10, obs: 12)\n",
       "Coordinates:\n",
-      "  * obs         (obs) int32 0 1 2 3 4 5 6 7 8 9 10 11\n",
-      "  * trajectory  (trajectory) int64 0 1 2 3 4 5 6 7 8 9\n",
+      "  * obs         (obs) int32 48B 0 1 2 3 4 5 6 7 8 9 10 11\n",
+      "  * trajectory  (trajectory) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "Data variables:\n",
-      "    lat         (trajectory, obs) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
-      "    lon         (trajectory, obs) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
-      "    time        (trajectory, obs) timedelta64[ns] dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
-      "    z           (trajectory, obs) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
+      "    lat         (trajectory, obs) float32 480B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
+      "    lon         (trajectory, obs) float32 480B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
+      "    time        (trajectory, obs) timedelta64[ns] 960B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
+      "    z           (trajectory, obs) float32 480B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
       "Attributes:\n",
       "    Conventions:            CF-1.6/CF-1.7\n",
+      "    date_created:           2024-11-20T11:07:47.494911\n",
       "    feature_type:           trajectory\n",
       "    ncei_template_version:  NCEI_NetCDF_Trajectory_Template_v2.0\n",
       "    parcels_kernels:        JITParticleAdvectionRK4\n",
       "    parcels_mesh:           flat\n",
-      "    parcels_version:        v2.4.2-367-gd9b7b447\n"
+      "    parcels_version:        3.1.1.dev4\n"
      ]
     }
    ],
@@ -126,17 +150,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "<xarray.DataArray 'trajectory' (trajectory: 10)>\n",
+      "<xarray.DataArray 'trajectory' (trajectory: 10)> Size: 80B\n",
       "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
       "Coordinates:\n",
-      "  * trajectory  (trajectory) int64 0 1 2 3 4 5 6 7 8 9\n"
+      "  * trajectory  (trajectory) int64 80B 0 1 2 3 4 5 6 7 8 9\n"
      ]
     }
    ],
@@ -168,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -217,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -276,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {
     "scrolled": true
    },
@@ -312,18 +336,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/erik/miniconda3/envs/parcels/lib/python3.11/site-packages/pandas/core/arrays/timedeltas.py:1093: RuntimeWarning: invalid value encountered in cast\n",
-      "  data = (base * m + (frac * m).astype(np.int64)).view(\"timedelta64[ns]\")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Using xarray\n",
     "mean_lon_x = []\n",
@@ -349,7 +364,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -399,7 +414,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -454,7 +469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -473,7 +488,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -513,7 +528,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -5945,7 +5960,7 @@
        "<IPython.core.display.HTML object>"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/docs/examples/tutorial_parcels_structure.ipynb b/docs/examples/tutorial_parcels_structure.ipynb
index 95543ab8a..dbd751f4f 100644
--- a/docs/examples/tutorial_parcels_structure.ipynb
+++ b/docs/examples/tutorial_parcels_structure.ipynb
@@ -350,7 +350,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The final part executes the simulation, given the `ParticleSet`, `FieldSet` and `Kernels`, that have been defined in the previous steps. If you like to store the particle data generated in the simulation, you define the `ParticleFile` to which the output of the kernel execution will be written. Then, on the `ParticleSet` you have defined, you can use the method `ParticleSet.execute()` which requires the following arguments:\n",
+    "The final part executes the simulation, given the `ParticleSet`, `FieldSet` and `Kernels`, that have been defined in the previous steps. If you like to store the particle data generated in the simulation, you define the `ParticleFile` to which the output of the kernel execution as well as - optionally - any user-specified metadata (see the [Working with Parcels output](https://docs.oceanparcels.org/en/latest/examples/tutorial_output.html) tutorial for more info) will be written. Then, on the `ParticleSet` you have defined, you can use the method `ParticleSet.execute()` which requires the following arguments:\n",
     "\n",
     "1. The kernels to be executed.\n",
     "2. The `runtime` defining how long the execution loop runs. Alternatively, you may define the `endtime` at which the execution loop stops.\n",