From e72fe9c6f82d9cdcefeba5777feeb96375bcf5b1 Mon Sep 17 00:00:00 2001 From: gvanhoy Date: Mon, 25 Sep 2023 09:29:38 -0400 Subject: [PATCH] Minor edits --- .../00_Sig53DatasetTutorial.ipynb | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/docs/00_dataset_tutorials/00_Sig53DatasetTutorial.ipynb b/docs/00_dataset_tutorials/00_Sig53DatasetTutorial.ipynb index 7475a21..da586dd 100644 --- a/docs/00_dataset_tutorials/00_Sig53DatasetTutorial.ipynb +++ b/docs/00_dataset_tutorials/00_Sig53DatasetTutorial.ipynb @@ -28,7 +28,7 @@ "#### Generating\n", "A Sig53-like dataset can be generated from scratch, on-the-fly, yielding a dataset with essentially inifinite number of unique exemplars (called samples in machine-learning circles) per class. The downside to generating things on-the-fly is that **training is slower.** \n", "\n", - "Before we jump into generating the whole dataset, let's start with a basic example which uses the ```ModulationsDataset```, which is the underlying class used to generate ```Sig53Dataset```. The class has a number of parameters" + "Before we jump into generating the whole dataset, let's start with a basic example which uses the ```ModulationsDataset```, which is the underlying class used to generate ```Sig53``` Dataset. The class has a number of parameters" ] }, { @@ -152,7 +152,7 @@ "metadata": {}, "source": [ "#### Iterating Over the Dataset\n", - "Now the Sig53 dataset is a specific configuration of the ```ModulationsDataset``` class. To keep the configuration fixed, we implement classes with fixed parameters of type ```Sig53Conf```. We use the ```Sig53CleanTrianQAConfig```. In this configuration, the dataset ```label``` is actually a Python ```tuple``` which includes the estimated SNR of the produced sample. \n", + "Now the Sig53 dataset is a specific configuration of the ```ModulationsDataset``` class. To keep the configuration fixed, we implement classes with fixed parameters of type ```Sig53Conf```. We use the ```Sig53CleanTrainQAConfig```. In this configuration, the dataset ```label``` is actually a Python ```tuple``` which includes the estimated SNR of the produced sample. \n", "\n", "Using this example, you can choose one of the fixed configurations and generate a Sig53-like dataset on-the-fly as you iterate over it, and train an ML model, store it, or perform some analysis on it!" ] @@ -180,8 +180,6 @@ ")\n", "\n", "data, (modulation, snr) = ds[0]\n", - "print(data)\n", - "print(modulation, snr)\n", "\n", "# Plot it.\n", "plt.subplot(3, 1, 1)\n", @@ -253,7 +251,7 @@ "\n", "# You can iterate through this in a similar way.\n", "# The DataLoader produces a torch.Tensor\n", - "loader = DataLoader(ds, batch_size=16, num_workers=16)\n", + "loader = DataLoader(ds, batch_size=os.cpu_count() // 2, num_workers=os.cpu_count() // 2)\n", "\n", "if os.path.exists(\"data.fc32\"):\n", " os.remove(\"data.fc32\")\n", @@ -277,7 +275,7 @@ "metadata": {}, "source": [ "#### Reading a Static Dataset\n", - "We've chosen to write our data as raw binary. We can write a Dataset class that allows us to iterate over that stored data. A Dataset class only needs to implement ```__init__``` and ```__getitem__```. We talk more about creating a custom dataset in a different tutorial." + "We've chosen to write our data as raw binary. We can write a Dataset class that allows us to iterate over that stored data. A Dataset class only needs to implement ```__init__```, ```__getitem__```, and ```__len__```. We talk more about creating a custom dataset in a different tutorial." ] }, { @@ -304,6 +302,10 @@ " self.label_file = os.path.join(path, \"label.int8\")\n", "\n", " super().__init__(transform, target_transform, seed)\n", + " \n", + " def __len__(self) -> int:\n", + " # Each sample is 8 * 4096 * 2 bytes large.\n", + " return os.path.getsize(self.data_file) // 8 // 4092 // 2\n", "\n", " def __getitem__(self, index: int) -> Tuple:\n", " # the sample at index is 8192 double-precision floating-point numbers long.\n", @@ -448,7 +450,7 @@ "\n", "# Plot it.\n", "plt.subplot(3, 1, 1)\n", - "plt.title(\"Modulation Type = {}\".format(ModulationsDataset.default_classes[int(label)]))\n", + "plt.title(\"Modulation Type = {}\".format(ModulationsDataset.default_classes[int(mod)]))\n", "plt.ylabel(\"Time Domain\")\n", "plt.plot(data[:100].real, marker=\".\")\n", "plt.plot(data[:100].imag, marker=\".\")\n", @@ -475,14 +477,6 @@ "ax.set_xticks([])\n", "ax.set_yticks([])\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fca0286", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {