deploy: 1f4b186

hezarai · Oct 29, 2023 · 399d0c1 · 399d0c1
commit 399d0c1
Show file tree

Hide file tree

Showing 493 changed files with 88,891 additions and 0 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 7254a81eeaa30845194afa3d93625a5e
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.doctrees/contribute/add_datasets.doctree b/.doctrees/contribute/add_datasets.doctree
diff --git a/.doctrees/contribute/add_docs.doctree b/.doctrees/contribute/add_docs.doctree
diff --git a/.doctrees/contribute/add_models.doctree b/.doctrees/contribute/add_models.doctree
diff --git a/.doctrees/contribute/add_tests.doctree b/.doctrees/contribute/add_tests.doctree
diff --git a/.doctrees/contribute/contribute_to_hezar.doctree b/.doctrees/contribute/contribute_to_hezar.doctree
diff --git a/.doctrees/contribute/index.doctree b/.doctrees/contribute/index.doctree
diff --git a/.doctrees/contribute/pull_requests.doctree b/.doctrees/contribute/pull_requests.doctree
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
diff --git a/.doctrees/get_started/index.doctree b/.doctrees/get_started/index.doctree
diff --git a/.doctrees/get_started/installation.doctree b/.doctrees/get_started/installation.doctree
diff --git a/.doctrees/get_started/overview.doctree b/.doctrees/get_started/overview.doctree
diff --git a/.doctrees/get_started/quick_tour.doctree b/.doctrees/get_started/quick_tour.doctree
diff --git a/.doctrees/guide/advanced_training.doctree b/.doctrees/guide/advanced_training.doctree
diff --git a/.doctrees/guide/hezar_architecture.doctree b/.doctrees/guide/hezar_architecture.doctree
diff --git a/.doctrees/guide/index.doctree b/.doctrees/guide/index.doctree
diff --git a/.doctrees/guide/models_advanced.doctree b/.doctrees/guide/models_advanced.doctree
diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree
diff --git a/.doctrees/source/hezar.builders.doctree b/.doctrees/source/hezar.builders.doctree
diff --git a/.doctrees/source/hezar.configs.doctree b/.doctrees/source/hezar.configs.doctree
diff --git a/.doctrees/source/hezar.constants.doctree b/.doctrees/source/hezar.constants.doctree
diff --git a/.doctrees/source/hezar.data.data_collators.doctree b/.doctrees/source/hezar.data.data_collators.doctree
diff --git a/.doctrees/source/hezar.data.datasets.dataset.doctree b/.doctrees/source/hezar.data.datasets.dataset.doctree
diff --git a/.doctrees/source/hezar.data.datasets.doctree b/.doctrees/source/hezar.data.datasets.doctree
diff --git a/.doctrees/source/hezar.data.datasets.ocr_dataset.doctree b/.doctrees/source/hezar.data.datasets.ocr_dataset.doctree
diff --git a/.doctrees/source/hezar.data.datasets.sequence_labeling_dataset.doctree b/.doctrees/source/hezar.data.datasets.sequence_labeling_dataset.doctree
diff --git a/.doctrees/source/hezar.data.datasets.text_classification_dataset.doctree b/.doctrees/source/hezar.data.datasets.text_classification_dataset.doctree
diff --git a/.doctrees/source/hezar.data.datasets.text_summarization_dataset.doctree b/.doctrees/source/hezar.data.datasets.text_summarization_dataset.doctree
diff --git a/.doctrees/source/hezar.data.doctree b/.doctrees/source/hezar.data.doctree
diff --git a/.doctrees/source/hezar.doctree b/.doctrees/source/hezar.doctree
diff --git a/.doctrees/source/hezar.embeddings.doctree b/.doctrees/source/hezar.embeddings.doctree
diff --git a/.doctrees/source/hezar.embeddings.embedding.doctree b/.doctrees/source/hezar.embeddings.embedding.doctree
diff --git a/.doctrees/source/hezar.embeddings.fasttext.doctree b/.doctrees/source/hezar.embeddings.fasttext.doctree
diff --git a/.doctrees/source/hezar.embeddings.word2vec.doctree b/.doctrees/source/hezar.embeddings.word2vec.doctree
diff --git a/.doctrees/source/hezar.metrics.accuracy.doctree b/.doctrees/source/hezar.metrics.accuracy.doctree
diff --git a/.doctrees/source/hezar.metrics.bleu.doctree b/.doctrees/source/hezar.metrics.bleu.doctree
diff --git a/.doctrees/source/hezar.metrics.cer.doctree b/.doctrees/source/hezar.metrics.cer.doctree
diff --git a/.doctrees/source/hezar.metrics.doctree b/.doctrees/source/hezar.metrics.doctree
diff --git a/.doctrees/source/hezar.metrics.f1.doctree b/.doctrees/source/hezar.metrics.f1.doctree
diff --git a/.doctrees/source/hezar.metrics.metric.doctree b/.doctrees/source/hezar.metrics.metric.doctree
diff --git a/.doctrees/source/hezar.metrics.precision.doctree b/.doctrees/source/hezar.metrics.precision.doctree
diff --git a/.doctrees/source/hezar.metrics.recall.doctree b/.doctrees/source/hezar.metrics.recall.doctree
diff --git a/.doctrees/source/hezar.metrics.seqeval.doctree b/.doctrees/source/hezar.metrics.seqeval.doctree
diff --git a/.doctrees/source/hezar.metrics.wer.doctree b/.doctrees/source/hezar.metrics.wer.doctree
diff --git a/.doctrees/source/hezar.models.audio_classification.doctree b/.doctrees/source/hezar.models.audio_classification.doctree
diff --git a/.doctrees/source/hezar.models.backbone.bert.bert.doctree b/.doctrees/source/hezar.models.backbone.bert.bert.doctree
diff --git a/.doctrees/source/hezar.models.backbone.bert.bert_config.doctree b/.doctrees/source/hezar.models.backbone.bert.bert_config.doctree
diff --git a/.doctrees/source/hezar.models.backbone.bert.doctree b/.doctrees/source/hezar.models.backbone.bert.doctree
diff --git a/.doctrees/source/hezar.models.backbone.distilbert.distilbert.doctree b/.doctrees/source/hezar.models.backbone.distilbert.distilbert.doctree
diff --git a/.doctrees/source/hezar.models.backbone.distilbert.distilbert_config.doctree b/.doctrees/source/hezar.models.backbone.distilbert.distilbert_config.doctree
diff --git a/.doctrees/source/hezar.models.backbone.distilbert.doctree b/.doctrees/source/hezar.models.backbone.distilbert.doctree
diff --git a/.doctrees/source/hezar.models.backbone.doctree b/.doctrees/source/hezar.models.backbone.doctree
diff --git a/.doctrees/source/hezar.models.backbone.roberta.doctree b/.doctrees/source/hezar.models.backbone.roberta.doctree
diff --git a/.doctrees/source/hezar.models.backbone.roberta.roberta.doctree b/.doctrees/source/hezar.models.backbone.roberta.roberta.doctree
diff --git a/.doctrees/source/hezar.models.backbone.roberta.roberta_config.doctree b/.doctrees/source/hezar.models.backbone.roberta.roberta_config.doctree
diff --git a/.doctrees/source/hezar.models.doctree b/.doctrees/source/hezar.models.doctree
diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.doctree
diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.doctree
diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_decode_utils.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_decode_utils.doctree
diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text_config.doctree
diff --git a/.doctrees/source/hezar.models.image2text.crnn.doctree b/.doctrees/source/hezar.models.image2text.crnn.doctree
diff --git a/.doctrees/source/hezar.models.image2text.doctree b/.doctrees/source/hezar.models.image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.trocr.doctree b/.doctrees/source/hezar.models.image2text.trocr.doctree
diff --git a/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text.doctree b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text_config.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.doctree
diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.bert.bert_lm.doctree b/.doctrees/source/hezar.models.language_modeling.bert.bert_lm.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.bert.bert_lm_config.doctree b/.doctrees/source/hezar.models.language_modeling.bert.bert_lm_config.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.bert.doctree b/.doctrees/source/hezar.models.language_modeling.bert.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.distilbert.distilbert_lm.doctree b/.doctrees/source/hezar.models.language_modeling.distilbert.distilbert_lm.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.distilbert.distilbert_lm_config.doctree b/.doctrees/source/hezar.models.language_modeling.distilbert.distilbert_lm_config.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.distilbert.doctree b/.doctrees/source/hezar.models.language_modeling.distilbert.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.doctree b/.doctrees/source/hezar.models.language_modeling.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.roberta.doctree b/.doctrees/source/hezar.models.language_modeling.roberta.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.roberta.roberta_lm.doctree b/.doctrees/source/hezar.models.language_modeling.roberta.roberta_lm.doctree
diff --git a/.doctrees/source/hezar.models.language_modeling.roberta.roberta_lm_config.doctree b/.doctrees/source/hezar.models.language_modeling.roberta.roberta_lm_config.doctree
diff --git a/.doctrees/source/hezar.models.model.doctree b/.doctrees/source/hezar.models.model.doctree
diff --git a/.doctrees/source/hezar.models.model_outputs.doctree b/.doctrees/source/hezar.models.model_outputs.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.doctree
diff --git a/...ees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.doctree b/...ees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.doctree
diff --git a/...rce/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.doctree b/...rce/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.distilbert.doctree b/.doctrees/source/hezar.models.sequence_labeling.distilbert.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.roberta.doctree b/.doctrees/source/hezar.models.sequence_labeling.roberta.doctree
diff --git a/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.doctree
diff --git a/...es/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.doctree b/...es/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.doctree
diff --git a/.doctrees/source/hezar.models.speech_recognition.doctree b/.doctrees/source/hezar.models.speech_recognition.doctree
diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.doctree
diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.doctree
diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.doctree
diff --git a/.../source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.doctree b/.../source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.doctree
diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.doctree
diff --git a/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification.doctree b/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification.doctree
diff --git a/...rees/source/hezar.models.text_classification.bert.bert_text_classification_config.doctree b/...rees/source/hezar.models.text_classification.bert.bert_text_classification_config.doctree
diff --git a/.doctrees/source/hezar.models.text_classification.bert.doctree b/.doctrees/source/hezar.models.text_classification.bert.doctree
diff --git a/...source/hezar.models.text_classification.distilbert.distilbert_text_classification.doctree b/...source/hezar.models.text_classification.distilbert.distilbert_text_classification.doctree
diff --git a/...hezar.models.text_classification.distilbert.distilbert_text_classification_config.doctree b/...hezar.models.text_classification.distilbert.distilbert_text_classification_config.doctree
diff --git a/.doctrees/source/hezar.models.text_classification.distilbert.doctree b/.doctrees/source/hezar.models.text_classification.distilbert.doctree
diff --git a/.doctrees/source/hezar.models.text_classification.doctree b/.doctrees/source/hezar.models.text_classification.doctree
diff --git a/.doctrees/source/hezar.models.text_classification.roberta.doctree b/.doctrees/source/hezar.models.text_classification.roberta.doctree
diff --git a/...trees/source/hezar.models.text_classification.roberta.roberta_text_classification.doctree b/...trees/source/hezar.models.text_classification.roberta.roberta_text_classification.doctree
diff --git a/...ource/hezar.models.text_classification.roberta.roberta_text_classification_config.doctree b/...ource/hezar.models.text_classification.roberta.roberta_text_classification_config.doctree
diff --git a/.doctrees/source/hezar.models.text_detection.doctree b/.doctrees/source/hezar.models.text_detection.doctree
diff --git a/.doctrees/source/hezar.models.text_embedding.doctree b/.doctrees/source/hezar.models.text_embedding.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.doctree b/.doctrees/source/hezar.models.text_generation.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.t5.doctree b/.doctrees/source/hezar.models.text_generation.t5.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation.doctree b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation.doctree
diff --git a/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation_config.doctree b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation_config.doctree
diff --git a/.doctrees/source/hezar.preprocessors.audio_feature_extractor.doctree b/.doctrees/source/hezar.preprocessors.audio_feature_extractor.doctree
diff --git a/.doctrees/source/hezar.preprocessors.doctree b/.doctrees/source/hezar.preprocessors.doctree
diff --git a/.doctrees/source/hezar.preprocessors.image_processor.doctree b/.doctrees/source/hezar.preprocessors.image_processor.doctree
diff --git a/.doctrees/source/hezar.preprocessors.preprocessor.doctree b/.doctrees/source/hezar.preprocessors.preprocessor.doctree
diff --git a/.doctrees/source/hezar.preprocessors.text_normalizer.doctree b/.doctrees/source/hezar.preprocessors.text_normalizer.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.bpe.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.bpe.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.tokenizer.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.tokenizer.doctree
diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.wordpiece.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.wordpiece.doctree
diff --git a/.doctrees/source/hezar.registry.doctree b/.doctrees/source/hezar.registry.doctree
diff --git a/.doctrees/source/hezar.trainer.doctree b/.doctrees/source/hezar.trainer.doctree
diff --git a/.doctrees/source/hezar.trainer.metrics_handlers.doctree b/.doctrees/source/hezar.trainer.metrics_handlers.doctree
diff --git a/.doctrees/source/hezar.trainer.trainer.doctree b/.doctrees/source/hezar.trainer.trainer.doctree
diff --git a/.doctrees/source/hezar.trainer.trainer_utils.doctree b/.doctrees/source/hezar.trainer.trainer_utils.doctree
diff --git a/.doctrees/source/hezar.utils.audio_utils.doctree b/.doctrees/source/hezar.utils.audio_utils.doctree
diff --git a/.doctrees/source/hezar.utils.common_utils.doctree b/.doctrees/source/hezar.utils.common_utils.doctree
diff --git a/.doctrees/source/hezar.utils.context_managers.doctree b/.doctrees/source/hezar.utils.context_managers.doctree
diff --git a/.doctrees/source/hezar.utils.core_utils.doctree b/.doctrees/source/hezar.utils.core_utils.doctree
diff --git a/.doctrees/source/hezar.utils.data_utils.doctree b/.doctrees/source/hezar.utils.data_utils.doctree
diff --git a/.doctrees/source/hezar.utils.doctree b/.doctrees/source/hezar.utils.doctree
diff --git a/.doctrees/source/hezar.utils.file_utils.doctree b/.doctrees/source/hezar.utils.file_utils.doctree
diff --git a/.doctrees/source/hezar.utils.hub_utils.doctree b/.doctrees/source/hezar.utils.hub_utils.doctree
diff --git a/.doctrees/source/hezar.utils.image_utils.doctree b/.doctrees/source/hezar.utils.image_utils.doctree
diff --git a/.doctrees/source/hezar.utils.integration_utils.doctree b/.doctrees/source/hezar.utils.integration_utils.doctree
diff --git a/.doctrees/source/hezar.utils.logging.doctree b/.doctrees/source/hezar.utils.logging.doctree
diff --git a/.doctrees/source/hezar.utils.registry_utils.doctree b/.doctrees/source/hezar.utils.registry_utils.doctree
diff --git a/.doctrees/source/index.doctree b/.doctrees/source/index.doctree
diff --git a/.doctrees/source/modules.doctree b/.doctrees/source/modules.doctree
diff --git a/.doctrees/tutorial/datasets.doctree b/.doctrees/tutorial/datasets.doctree
diff --git a/.doctrees/tutorial/index.doctree b/.doctrees/tutorial/index.doctree
diff --git a/.doctrees/tutorial/models.doctree b/.doctrees/tutorial/models.doctree
diff --git a/.doctrees/tutorial/preprocessors.doctree b/.doctrees/tutorial/preprocessors.doctree
diff --git a/.doctrees/tutorial/training.doctree b/.doctrees/tutorial/training.doctree
diff --git a/.nojekyll b/.nojekyll
diff --git a/_sources/contribute/add_datasets.md.txt b/_sources/contribute/add_datasets.md.txt
@@ -0,0 +1,21 @@
+# Add a Dataset
+Adding datasets involves two main steps:
+1. Uploading the dataset to the Hub and providing a load script.
+2. Providing a proper dataset class in Hezar.
+
+## Uploading dataset to the Hub
+Datasets of different types, require different format in terms of raw files and annotations. In Hezar, we prefer
+uploading the files to the same repo of the dataset. The way the datasets are provided on the Hub is really up to you,
+but conventionally, it's better to follow the same procedure for every dataset. The recommended way is to get
+inspiration from other datasets on the Hub that have a similar task. Either datasets provided by Hezar or others.
+Some notes to consider:
+- Providing zip files rather that folder of raw files is recommended.
+- For datasets containing raw files like images, audio files, etc. use a csv annotation file mapping files to labels.
+- Providing both train and test splits is a must, but validation set is optional.
+- Put all files in the `data` folder and put `X_train.zip`, `X_test.zip`, `X_validation.zip` inside it or put all files named after splits inside a `data.zip` file.
+- Don't forget to provide a dataset card (`README.md`) and specify properties such as task, license, tags, etc.
+
+## Providing a loading script
+Hezar has some ready to use templates for dataset loading scripts. You can find them [here](https://github.com/hezarai/hezar/tree/main/templates/dataset_scripts).
+You can learn more about dataset loading scripts [here](https://huggingface.co/docs/datasets/dataset_script).
+It's recommended to upload the dataset to the Hub to test that it works properly.
diff --git a/_sources/contribute/add_docs.md.txt b/_sources/contribute/add_docs.md.txt
@@ -0,0 +1 @@
+# Contribute to Docs
diff --git a/_sources/contribute/add_models.md.txt b/_sources/contribute/add_models.md.txt
@@ -0,0 +1 @@
+# Add a Model
diff --git a/_sources/contribute/add_tests.md.txt b/_sources/contribute/add_tests.md.txt
@@ -0,0 +1 @@
+# Add Tests
diff --git a/_sources/contribute/contribute_to_hezar.md.txt b/_sources/contribute/contribute_to_hezar.md.txt
@@ -0,0 +1 @@
+# Contribute to Hezar
diff --git a/_sources/contribute/index.md.txt b/_sources/contribute/index.md.txt
@@ -0,0 +1,10 @@
+# Contribute
+
+```{toctree}
+contribute_to_hezar.md
+add_models.md
+add_datasets.md
+add_docs.md
+add_tests.md
+pull_requests.md
+```
diff --git a/_sources/contribute/pull_requests.md.txt b/_sources/contribute/pull_requests.md.txt
@@ -0,0 +1 @@
+# Sending a Pull Request
diff --git a/_sources/get_started/index.md.txt b/_sources/get_started/index.md.txt
@@ -0,0 +1,8 @@
+# Get Started
+```{toctree}
+:maxdepth: 1
+
+overview.md
+installation.md
+quick_tour.md
+```
diff --git a/_sources/get_started/installation.md.txt b/_sources/get_started/installation.md.txt
@@ -0,0 +1,41 @@
+# Installation
+
+## Install from PyPi
+Installing Hezar is as easy as any other Python library! Most of the requirements are cross-platform and installing
+them on any machine is a piece of cake!
+
+```
+pip install hezar
+```
+### Installation variations
+Hezar is packed with a lot of tools that are dependent on other packages. Most of the
+time you might not want everything to be installed, hence, providing multiple variations of
+Hezar so that the installation is light and fast for general use.
+
+You can install optional dependencies for each mode like so:
+```
+pip install hezar[nlp]  # For natural language processing
+pip install hezar[vision]  # For computer vision and image processing
+pip install hezar[audio]  # For audio and speech processing
+pip install hezar[embeddings]  # For word embeddings
+```
+Or you can also install everything using:
+```
+pip install hezar[all]
+```
+## Install from source
+Also, you can install the dev version of the library using the source:
+```
+pip install git+https://github.com/hezarai/hezar.git
+```
+
+## Test installation
+From a Python console or in CLI just import `hezar` and check the version:
+```python
+import hezar
+
+print(hezar.__version__)
+```
+```
+0.23.1
+```
diff --git a/_sources/get_started/overview.md.txt b/_sources/get_started/overview.md.txt
@@ -0,0 +1,20 @@
+# Overview
+
+Welcome to Hezar! A library that makes state-of-the-art machine learning as easy as possible aimed for the Persian
+language, built by the Persian community!
+
+In Hezar, the primary goal is to provide plug-and-play AI/ML utilities so that you don't need to know much about what's
+going on under the hood. Hezar is not just a model library, but instead it's packed with every aspect you need for any
+ML pipeline like datasets, trainers, preprocessors, feature extractors, etc.
+
+Hezar is a library that:
+- brings together all the best works in AI for Persian
+- makes using AI models as easy as a couple of lines of code
+- seamlessly integrates with Hugging Face Hub for all of its models
+- has a highly developer-friendly interface
+- has a task-based model interface which is more convenient for general users.
+- is packed with additional tools like word embeddings, tokenizers, feature extractors, etc.
+- comes with a lot of supplementary ML tools for deployment, benchmarking, optimization, etc.
+- and more!
+
+To find out more, just take the [quick tour](quick_tour.md)!
diff --git a/_sources/get_started/quick_tour.md.txt b/_sources/get_started/quick_tour.md.txt
@@ -0,0 +1,190 @@
+# Quick Tour
+Let's have a quick tour on some of the most important features of Hezar!
+
+### Models
+There's a bunch of ready to use trained models for different tasks on the Hub. To see all the models see [here](https://huggingface.co/hezarai)!
+
+- **Text classification (sentiment analysis, categorization, etc)**
+```python
+from hezar import Model
+
+example = ["هزار، کتابخانه‌ای کامل برای به کارگیری آسان هوش مصنوعی"]
+model = Model.load("hezarai/bert-fa-sentiment-dksf")
+outputs = model.predict(example)
+print(outputs)
+```
+```
+{'labels': ['positive'], 'probs': [0.812910258769989]}
+```
+- **Sequence Labeling (POS, NER, etc.)**
+```python
+from hezar import Model
+
+pos_model = Model.load("hezarai/bert-fa-pos-lscp-500k")  # Part-of-speech
+ner_model = Model.load("hezarai/bert-fa-ner-arman")  # Named entity recognition
+inputs = ["شرکت هوش مصنوعی هزار"]
+pos_outputs = pos_model.predict(inputs)
+ner_outputs = ner_model.predict(inputs)
+print(f"POS: {pos_outputs}")
+print(f"NER: {ner_outputs}")
+```
+```
+POS: [[{'token': 'شرکت', 'tag': 'Ne'}, {'token': 'هوش', 'tag': 'Ne'}, {'token': 'مصنوعی', 'tag': 'AJe'}, {'token': 'هزار', 'tag': 'NUM'}]]
+NER: [[{'token': 'شرکت', 'tag': 'B-org'}, {'token': 'هوش', 'tag': 'I-org'}, {'token': 'مصنوعی', 'tag': 'I-org'}, {'token': 'هزار', 'tag': 'I-org'}]]
+```
+- **Language Modeling**
+```python
+from hezar import Model
+
+roberta_mlm = Model.load("hezarai/roberta-fa-mlm")
+inputs = ["سلام بچه ها حالتون <mask>"]
+outputs = roberta_mlm.predict(inputs)
+print(outputs)
+```
+```
+{'filled_texts': ['سلام بچه ها حالتون چطوره'], 'filled_tokens': [' چطوره']}
+```
+- **Speech Recognition**
+```python
+from hezar import Model
+
+whisper = Model.load("hezarai/whisper-small-fa")
+transcripts = whisper.predict("examples/assets/speech_example.mp3")
+print(transcripts)
+```
+```
+{'transcripts': ['و این تنها محدود به محیط کار نیست']}
+```
+- **Image to Text (OCR)**
+```python
+from hezar import Model
+# OCR with TrOCR
+model = Model.load("hezarai/trocr-base-fa-v1")
+texts = model.predict(["examples/assets/ocr_example.jpg"])
+print(f"TrOCR Output: {texts}")
+
+# OCR with CRNN
+model = Model.load("hezarai/crnn-base-fa-64x256")
+texts = model.predict("examples/assets/ocr_example.jpg")
+print(f"CRNN Output: {texts}")
+```
+```
+TrOCR Output: {'texts': [' چه میشه کرد، باید صبر کنیم']}
+CRNN Output: {'texts': ['چه میشه کرد، باید صبر کنیم']}
+```
+
+- **Image to Text (Image Captioning)**
+```python
+from hezar import Model
+
+model = Model.load("hezarai/vit-roberta-fa-image-captioning-flickr30k")
+texts = model.predict("examples/assets/image_captioning_example.jpg")
+print(texts)
+```
+```
+{'texts': ['سگی با توپ تنیس در دهانش می دود.']}
+```
+We constantly keep working on adding and training new models and this section will hopefully be expanding over time ;)
+### Word Embeddings
+- **FastText**
+```python
+from hezar import Embedding
+
+fasttext = Embedding.load("hezarai/fasttext-fa-300")
+most_similar = fasttext.most_similar("هزار")
+print(most_similar)
+```
+```
+[{'score': 0.7579, 'word': 'میلیون'},
+ {'score': 0.6943, 'word': '21هزار'},
+ {'score': 0.6861, 'word': 'میلیارد'},
+ {'score': 0.6825, 'word': '26هزار'},
+ {'score': 0.6803, 'word': '٣هزار'}]
+```
+- **Word2Vec (Skip-gram)**
+```python
+from hezar import Embedding
+
+word2vec = Embedding.load("hezarai/word2vec-skipgram-fa-wikipedia")
+most_similar = word2vec.most_similar("هزار")
+print(most_similar)
+```
+```
+[{'score': 0.7885, 'word': 'چهارهزار'},
+ {'score': 0.7788, 'word': '۱۰هزار'},
+ {'score': 0.7727, 'word': 'دویست'},
+ {'score': 0.7679, 'word': 'میلیون'},
+ {'score': 0.7602, 'word': 'پانصد'}]
+```
+- **Word2Vec (CBOW)**
+```python
+from hezar import Embedding
+
+word2vec = Embedding.load("hezarai/word2vec-cbow-fa-wikipedia")
+most_similar = word2vec.most_similar("هزار")
+print(most_similar)
+```
+```
+[{'score': 0.7407, 'word': 'دویست'},
+ {'score': 0.7400, 'word': 'میلیون'},
+ {'score': 0.7326, 'word': 'صد'},
+ {'score': 0.7276, 'word': 'پانصد'},
+ {'score': 0.7011, 'word': 'سیصد'}]
+```
+
+### Datasets
+You can load any of the datasets on the [Hub](https://huggingface.co/hezarai) like below:
+```python
+from hezar import Dataset
+
+sentiment_dataset = Dataset.load("hezarai/sentiment-dksf")  # A TextClassificationDataset instance
+lscp_dataset = Dataset.load("hezarai/lscp-pos-500k")  # A SequenceLabelingDataset instance
+xlsum_dataset = Dataset.load("hezarai/xlsum-fa")  # A TextSummarizationDataset instance
+...
+```
+
+### Training
+Hezar makes it super easy to train models using out-of-the-box models and datasets provided in the library.
+```python
+from hezar import (
+    BertSequenceLabeling,
+    BertSequenceLabelingConfig,
+    Trainer,
+    TrainerConfig,
+    Dataset,
+    Preprocessor,
+)
+
+base_model_path = "hezarai/bert-base-fa"
+dataset_path = "hezarai/lscp-pos-500k"
+
+train_dataset = Dataset.load(dataset_path, split="train", tokenizer_path=base_model_path)
+eval_dataset = Dataset.load(dataset_path, split="test", tokenizer_path=base_model_path)
+
+model = BertSequenceLabeling(BertSequenceLabelingConfig(id2label=train_dataset.config.id2label))
+preprocessor = Preprocessor.load(base_model_path)
+
+train_config = TrainerConfig(
+    task="sequence_labeling",
+    device="cuda",
+    init_weights_from=base_model_path,
+    batch_size=8,
+    num_epochs=5,
+    checkpoints_dir="checkpoints/",
+    metrics=["seqeval"],
+)
+
+trainer = Trainer(
+    config=train_config,
+    model=model,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    data_collator=train_dataset.data_collator,
+    preprocessor=preprocessor,
+)
+trainer.train()
+
+trainer.push_to_hub("bert-fa-pos-lscp-500k")  # push model, config, preprocessor, trainer files and configs
+```
+
+Want to go deeper? Check out the [guides](../guide/index.md).
diff --git a/_sources/guide/advanced_training.md.txt b/_sources/guide/advanced_training.md.txt
@@ -0,0 +1,2 @@
+# Advanced Training
+Docs coming soon, stay tuned!