diff --git a/geniusrise_text/instruction/bulk.py b/geniusrise_text/instruction/bulk.py
index 6c3ca2f..fb3f641 100644
--- a/geniusrise_text/instruction/bulk.py
+++ b/geniusrise_text/instruction/bulk.py
@@ -416,7 +416,50 @@ def perform_vllm(
         batch_size: int = 32,
         **kwargs: Any,
     ) -> None:
-        """ """
+        """
+        Performs bulk text generation using the Versatile Language Learning Model (VLLM) with specified parameters
+        for fine-tuning model behavior, including quantization and parallel processing settings. This method is designed
+        to process large datasets efficiently by leveraging VLLM capabilities for generating high-quality text completions
+        based on provided prompts.
+
+        Args:
+            model_name (str): The name or path of the VLLM model to use for text generation.
+            use_cuda (bool): Flag indicating whether to use CUDA for GPU acceleration.
+            precision (str): Precision of computations, can be "float16", "bfloat16", etc.
+            quantization (int): Level of quantization for model weights, 0 for none.
+            device_map (str | Dict | None): Specific device(s) to use for model inference.
+            vllm_tokenizer_mode (str): Mode of the tokenizer ("auto", "fast", or "slow").
+            vllm_download_dir (Optional[str]): Directory to download and load the model and tokenizer.
+            vllm_load_format (str): Format to load the model, e.g., "auto", "pt".
+            vllm_seed (int): Seed for random number generation.
+            vllm_max_model_len (int): Maximum sequence length the model can handle.
+            vllm_enforce_eager (bool): Enforce eager execution instead of using optimization techniques.
+            vllm_max_context_len_to_capture (int): Maximum context length for CUDA graph capture.
+            vllm_block_size (int): Block size for caching mechanism.
+            vllm_gpu_memory_utilization (float): Fraction of GPU memory to use.
+            vllm_swap_space (int): Amount of swap space to use in GiB.
+            vllm_sliding_window (Optional[int]): Size of the sliding window for processing.
+            vllm_pipeline_parallel_size (int): Number of pipeline parallel groups.
+            vllm_tensor_parallel_size (int): Number of tensor parallel groups.
+            vllm_worker_use_ray (bool): Whether to use Ray for model workers.
+            vllm_max_parallel_loading_workers (Optional[int]): Maximum number of workers for parallel loading.
+            vllm_disable_custom_all_reduce (bool): Disable custom all-reduce kernel and fall back to NCCL.
+            vllm_max_num_batched_tokens (Optional[int]): Maximum number of tokens to be processed in a single iteration.
+            vllm_max_num_seqs (int): Maximum number of sequences to be processed in a single iteration.
+            vllm_max_paddings (int): Maximum number of paddings to be added to a batch.
+            vllm_max_lora_rank (Optional[int]): Maximum rank for LoRA adjustments.
+            vllm_max_loras (Optional[int]): Maximum number of LoRA adjustments.
+            vllm_max_cpu_loras (Optional[int]): Maximum number of LoRA adjustments stored on CPU.
+            vllm_lora_extra_vocab_size (int): Additional vocabulary size for LoRA.
+            vllm_placement_group (Optional[dict]): Ray placement group for distributed execution.
+            vllm_log_stats (bool): Whether to log statistics during model operation.
+            notification_email (Optional[str]): Email to send notifications upon completion.
+            batch_size (int): Number of prompts to process in each batch for efficient memory usage.
+            **kwargs: Additional keyword arguments for generation settings like temperature, top_p, etc.
+
+        This method automates the loading of large datasets, generation of text completions, and saving results,
+        facilitating efficient and scalable text generation tasks.
+        """
         if ":" in model_name:
             model_revision = model_name.split(":")[1]
             tokenizer_revision = model_name.split(":")[1]