diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 00000000..19a85665 --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 46dac3e782f238b726e25024d3fa6112 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/contributing.doctree b/.doctrees/contributing.doctree new file mode 100644 index 00000000..15526f40 Binary files /dev/null and b/.doctrees/contributing.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 00000000..6519348e Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/get_started/index.doctree b/.doctrees/get_started/index.doctree new file mode 100644 index 00000000..0d634fdb Binary files /dev/null and b/.doctrees/get_started/index.doctree differ diff --git a/.doctrees/get_started/installation.doctree b/.doctrees/get_started/installation.doctree new file mode 100644 index 00000000..7bf3367d Binary files /dev/null and b/.doctrees/get_started/installation.doctree differ diff --git a/.doctrees/get_started/overview.doctree b/.doctrees/get_started/overview.doctree new file mode 100644 index 00000000..f92b8f01 Binary files /dev/null and b/.doctrees/get_started/overview.doctree differ diff --git a/.doctrees/get_started/quick_tour.doctree b/.doctrees/get_started/quick_tour.doctree new file mode 100644 index 00000000..ce9ce9d4 Binary files /dev/null and b/.doctrees/get_started/quick_tour.doctree differ diff --git a/.doctrees/guide/advanced_training.doctree b/.doctrees/guide/advanced_training.doctree new file mode 100644 index 00000000..a8b785f5 Binary files /dev/null and b/.doctrees/guide/advanced_training.doctree differ diff --git a/.doctrees/guide/hezar_architecture.doctree b/.doctrees/guide/hezar_architecture.doctree new file mode 100644 index 00000000..429154e1 Binary files /dev/null and b/.doctrees/guide/hezar_architecture.doctree differ diff --git a/.doctrees/guide/index.doctree b/.doctrees/guide/index.doctree new file mode 100644 index 00000000..49161331 Binary files /dev/null and b/.doctrees/guide/index.doctree differ diff --git a/.doctrees/guide/models_advanced.doctree b/.doctrees/guide/models_advanced.doctree new file mode 100644 index 00000000..e22e0639 Binary files /dev/null and b/.doctrees/guide/models_advanced.doctree differ diff --git a/.doctrees/guide/trainer_in_depth.doctree b/.doctrees/guide/trainer_in_depth.doctree new file mode 100644 index 00000000..aaa88628 Binary files /dev/null and b/.doctrees/guide/trainer_in_depth.doctree differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 00000000..2ee06a63 Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/source/hezar.builders.doctree b/.doctrees/source/hezar.builders.doctree new file mode 100644 index 00000000..27211409 Binary files /dev/null and b/.doctrees/source/hezar.builders.doctree differ diff --git a/.doctrees/source/hezar.configs.doctree b/.doctrees/source/hezar.configs.doctree new file mode 100644 index 00000000..db391ca1 Binary files /dev/null and b/.doctrees/source/hezar.configs.doctree differ diff --git a/.doctrees/source/hezar.constants.doctree b/.doctrees/source/hezar.constants.doctree new file mode 100644 index 00000000..afdcdcac Binary files /dev/null and b/.doctrees/source/hezar.constants.doctree differ diff --git a/.doctrees/source/hezar.data.data_collators.doctree b/.doctrees/source/hezar.data.data_collators.doctree new file mode 100644 index 00000000..e6a0ba83 Binary files /dev/null and b/.doctrees/source/hezar.data.data_collators.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.dataset.doctree b/.doctrees/source/hezar.data.datasets.dataset.doctree new file mode 100644 index 00000000..938d0aa4 Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.dataset.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.doctree b/.doctrees/source/hezar.data.datasets.doctree new file mode 100644 index 00000000..6058e31e Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.image_captioning_dataset.doctree b/.doctrees/source/hezar.data.datasets.image_captioning_dataset.doctree new file mode 100644 index 00000000..8dcc5da9 Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.image_captioning_dataset.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.ocr_dataset.doctree b/.doctrees/source/hezar.data.datasets.ocr_dataset.doctree new file mode 100644 index 00000000..b3ea6385 Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.ocr_dataset.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.sequence_labeling_dataset.doctree b/.doctrees/source/hezar.data.datasets.sequence_labeling_dataset.doctree new file mode 100644 index 00000000..84677937 Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.sequence_labeling_dataset.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.text_classification_dataset.doctree b/.doctrees/source/hezar.data.datasets.text_classification_dataset.doctree new file mode 100644 index 00000000..0343701d Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.text_classification_dataset.doctree differ diff --git a/.doctrees/source/hezar.data.datasets.text_summarization_dataset.doctree b/.doctrees/source/hezar.data.datasets.text_summarization_dataset.doctree new file mode 100644 index 00000000..77ca2f48 Binary files /dev/null and b/.doctrees/source/hezar.data.datasets.text_summarization_dataset.doctree differ diff --git a/.doctrees/source/hezar.data.doctree b/.doctrees/source/hezar.data.doctree new file mode 100644 index 00000000..6a6de533 Binary files /dev/null and b/.doctrees/source/hezar.data.doctree differ diff --git a/.doctrees/source/hezar.doctree b/.doctrees/source/hezar.doctree new file mode 100644 index 00000000..35f66441 Binary files /dev/null and b/.doctrees/source/hezar.doctree differ diff --git a/.doctrees/source/hezar.embeddings.doctree b/.doctrees/source/hezar.embeddings.doctree new file mode 100644 index 00000000..f8fafab7 Binary files /dev/null and b/.doctrees/source/hezar.embeddings.doctree differ diff --git a/.doctrees/source/hezar.embeddings.embedding.doctree b/.doctrees/source/hezar.embeddings.embedding.doctree new file mode 100644 index 00000000..afd60551 Binary files /dev/null and b/.doctrees/source/hezar.embeddings.embedding.doctree differ diff --git a/.doctrees/source/hezar.embeddings.fasttext.doctree b/.doctrees/source/hezar.embeddings.fasttext.doctree new file mode 100644 index 00000000..07b51177 Binary files /dev/null and b/.doctrees/source/hezar.embeddings.fasttext.doctree differ diff --git a/.doctrees/source/hezar.embeddings.word2vec.doctree b/.doctrees/source/hezar.embeddings.word2vec.doctree new file mode 100644 index 00000000..462a5e5f Binary files /dev/null and b/.doctrees/source/hezar.embeddings.word2vec.doctree differ diff --git a/.doctrees/source/hezar.metrics.accuracy.doctree b/.doctrees/source/hezar.metrics.accuracy.doctree new file mode 100644 index 00000000..236f4e37 Binary files /dev/null and b/.doctrees/source/hezar.metrics.accuracy.doctree differ diff --git a/.doctrees/source/hezar.metrics.bleu.doctree b/.doctrees/source/hezar.metrics.bleu.doctree new file mode 100644 index 00000000..f4592e6f Binary files /dev/null and b/.doctrees/source/hezar.metrics.bleu.doctree differ diff --git a/.doctrees/source/hezar.metrics.cer.doctree b/.doctrees/source/hezar.metrics.cer.doctree new file mode 100644 index 00000000..ec1d621d Binary files /dev/null and b/.doctrees/source/hezar.metrics.cer.doctree differ diff --git a/.doctrees/source/hezar.metrics.doctree b/.doctrees/source/hezar.metrics.doctree new file mode 100644 index 00000000..c59748b8 Binary files /dev/null and b/.doctrees/source/hezar.metrics.doctree differ diff --git a/.doctrees/source/hezar.metrics.f1.doctree b/.doctrees/source/hezar.metrics.f1.doctree new file mode 100644 index 00000000..290b8697 Binary files /dev/null and b/.doctrees/source/hezar.metrics.f1.doctree differ diff --git a/.doctrees/source/hezar.metrics.metric.doctree b/.doctrees/source/hezar.metrics.metric.doctree new file mode 100644 index 00000000..135ef59e Binary files /dev/null and b/.doctrees/source/hezar.metrics.metric.doctree differ diff --git a/.doctrees/source/hezar.metrics.precision.doctree b/.doctrees/source/hezar.metrics.precision.doctree new file mode 100644 index 00000000..0088431a Binary files /dev/null and b/.doctrees/source/hezar.metrics.precision.doctree differ diff --git a/.doctrees/source/hezar.metrics.recall.doctree b/.doctrees/source/hezar.metrics.recall.doctree new file mode 100644 index 00000000..4de3a90b Binary files /dev/null and b/.doctrees/source/hezar.metrics.recall.doctree differ diff --git a/.doctrees/source/hezar.metrics.rouge.doctree b/.doctrees/source/hezar.metrics.rouge.doctree new file mode 100644 index 00000000..d81fd694 Binary files /dev/null and b/.doctrees/source/hezar.metrics.rouge.doctree differ diff --git a/.doctrees/source/hezar.metrics.seqeval.doctree b/.doctrees/source/hezar.metrics.seqeval.doctree new file mode 100644 index 00000000..10bf520a Binary files /dev/null and b/.doctrees/source/hezar.metrics.seqeval.doctree differ diff --git a/.doctrees/source/hezar.metrics.wer.doctree b/.doctrees/source/hezar.metrics.wer.doctree new file mode 100644 index 00000000..5cf44ed6 Binary files /dev/null and b/.doctrees/source/hezar.metrics.wer.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.bert.bert.doctree b/.doctrees/source/hezar.models.backbone.bert.bert.doctree new file mode 100644 index 00000000..f966ef5d Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.bert.bert.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.bert.bert_config.doctree b/.doctrees/source/hezar.models.backbone.bert.bert_config.doctree new file mode 100644 index 00000000..06b8e2d2 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.bert.bert_config.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.bert.doctree b/.doctrees/source/hezar.models.backbone.bert.doctree new file mode 100644 index 00000000..f2396572 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.bert.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.distilbert.distilbert.doctree b/.doctrees/source/hezar.models.backbone.distilbert.distilbert.doctree new file mode 100644 index 00000000..9ee27588 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.distilbert.distilbert.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.distilbert.distilbert_config.doctree b/.doctrees/source/hezar.models.backbone.distilbert.distilbert_config.doctree new file mode 100644 index 00000000..6591ad74 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.distilbert.distilbert_config.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.distilbert.doctree b/.doctrees/source/hezar.models.backbone.distilbert.doctree new file mode 100644 index 00000000..e486eba7 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.distilbert.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.doctree b/.doctrees/source/hezar.models.backbone.doctree new file mode 100644 index 00000000..1ab3b012 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.roberta.doctree b/.doctrees/source/hezar.models.backbone.roberta.doctree new file mode 100644 index 00000000..1750deef Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.roberta.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.roberta.roberta.doctree b/.doctrees/source/hezar.models.backbone.roberta.roberta.doctree new file mode 100644 index 00000000..8bce6f80 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.roberta.roberta.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.roberta.roberta_config.doctree b/.doctrees/source/hezar.models.backbone.roberta.roberta_config.doctree new file mode 100644 index 00000000..ea7ea4d3 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.roberta.roberta_config.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.vit.doctree b/.doctrees/source/hezar.models.backbone.vit.doctree new file mode 100644 index 00000000..829f2e10 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.vit.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.vit.vit.doctree b/.doctrees/source/hezar.models.backbone.vit.vit.doctree new file mode 100644 index 00000000..c6d13463 Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.vit.vit.doctree differ diff --git a/.doctrees/source/hezar.models.backbone.vit.vit_config.doctree b/.doctrees/source/hezar.models.backbone.vit.vit_config.doctree new file mode 100644 index 00000000..90ed197a Binary files /dev/null and b/.doctrees/source/hezar.models.backbone.vit.vit_config.doctree differ diff --git a/.doctrees/source/hezar.models.doctree b/.doctrees/source/hezar.models.doctree new file mode 100644 index 00000000..8cda518f Binary files /dev/null and b/.doctrees/source/hezar.models.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.doctree new file mode 100644 index 00000000..ce08d2ce Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.doctree new file mode 100644 index 00000000..468b2157 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.beit_roberta.doctree b/.doctrees/source/hezar.models.image2text.beit_roberta.doctree new file mode 100644 index 00000000..752d258e Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.beit_roberta.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_decode_utils.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_decode_utils.doctree new file mode 100644 index 00000000..2cb1e7df Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.crnn.crnn_decode_utils.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text.doctree new file mode 100644 index 00000000..df902234 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text_config.doctree new file mode 100644 index 00000000..dbacb49e Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.crnn.crnn_image2text_config.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.crnn.doctree b/.doctrees/source/hezar.models.image2text.crnn.doctree new file mode 100644 index 00000000..5f464fec Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.crnn.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.doctree b/.doctrees/source/hezar.models.image2text.doctree new file mode 100644 index 00000000..31d325c9 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.trocr.doctree b/.doctrees/source/hezar.models.image2text.trocr.doctree new file mode 100644 index 00000000..f277f7d8 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.trocr.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text.doctree b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text.doctree new file mode 100644 index 00000000..9a326233 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text_config.doctree new file mode 100644 index 00000000..a02dcacf Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.trocr.trocr_image2text_config.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.doctree new file mode 100644 index 00000000..05e4732c Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_gpt2.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.doctree new file mode 100644 index 00000000..775f5baf Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.doctree new file mode 100644 index 00000000..598a8ad7 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.doctree new file mode 100644 index 00000000..939393d4 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_roberta.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.doctree new file mode 100644 index 00000000..66655542 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.doctree differ diff --git a/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.doctree b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.doctree new file mode 100644 index 00000000..9c7b0cc1 Binary files /dev/null and b/.doctrees/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling.doctree b/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling.doctree new file mode 100644 index 00000000..63500ac0 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling_config.doctree b/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling_config.doctree new file mode 100644 index 00000000..097c4891 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.bert.bert_mask_filling_config.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.bert.doctree b/.doctrees/source/hezar.models.mask_filling.bert.doctree new file mode 100644 index 00000000..45f0cbff Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.bert.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.doctree b/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.doctree new file mode 100644 index 00000000..41336a57 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.doctree b/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.doctree new file mode 100644 index 00000000..39a437a5 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.distilbert.doctree b/.doctrees/source/hezar.models.mask_filling.distilbert.doctree new file mode 100644 index 00000000..94b49bf9 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.distilbert.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.doctree b/.doctrees/source/hezar.models.mask_filling.doctree new file mode 100644 index 00000000..9bba3294 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.roberta.doctree b/.doctrees/source/hezar.models.mask_filling.roberta.doctree new file mode 100644 index 00000000..b977418d Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.roberta.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling.doctree b/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling.doctree new file mode 100644 index 00000000..84a231cd Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling.doctree differ diff --git a/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.doctree b/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.doctree new file mode 100644 index 00000000..beef27f8 Binary files /dev/null and b/.doctrees/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.doctree differ diff --git a/.doctrees/source/hezar.models.model.doctree b/.doctrees/source/hezar.models.model.doctree new file mode 100644 index 00000000..4483888e Binary files /dev/null and b/.doctrees/source/hezar.models.model.doctree differ diff --git a/.doctrees/source/hezar.models.model_outputs.doctree b/.doctrees/source/hezar.models.model_outputs.doctree new file mode 100644 index 00000000..9c362018 Binary files /dev/null and b/.doctrees/source/hezar.models.model_outputs.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.doctree new file mode 100644 index 00000000..c9f2eb5b Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.doctree new file mode 100644 index 00000000..612a16da Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.bert.doctree b/.doctrees/source/hezar.models.sequence_labeling.bert.doctree new file mode 100644 index 00000000..a894ba29 Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.bert.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.doctree new file mode 100644 index 00000000..a2511597 Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.doctree b/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.doctree new file mode 100644 index 00000000..b692819a Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.distilbert.doctree b/.doctrees/source/hezar.models.sequence_labeling.distilbert.doctree new file mode 100644 index 00000000..dd03f64e Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.distilbert.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.doctree new file mode 100644 index 00000000..c05f92fc Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.roberta.doctree b/.doctrees/source/hezar.models.sequence_labeling.roberta.doctree new file mode 100644 index 00000000..af792ac9 Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.roberta.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.doctree b/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.doctree new file mode 100644 index 00000000..1c345e56 Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.doctree differ diff --git a/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.doctree b/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.doctree new file mode 100644 index 00000000..05a0ee4e Binary files /dev/null and b/.doctrees/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.doctree b/.doctrees/source/hezar.models.speech_recognition.doctree new file mode 100644 index 00000000..4cd80fb7 Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.doctree new file mode 100644 index 00000000..e6c7a7ab Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.whisper.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.doctree new file mode 100644 index 00000000..0a734f57 Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.doctree new file mode 100644 index 00000000..00c3f203 Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.doctree new file mode 100644 index 00000000..01e8f005 Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.doctree differ diff --git a/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.doctree b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.doctree new file mode 100644 index 00000000..ae383da7 Binary files /dev/null and b/.doctrees/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification.doctree b/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification.doctree new file mode 100644 index 00000000..60d34a5a Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification_config.doctree b/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification_config.doctree new file mode 100644 index 00000000..9efbe60d Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.bert.bert_text_classification_config.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.bert.doctree b/.doctrees/source/hezar.models.text_classification.bert.doctree new file mode 100644 index 00000000..905d80bf Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.bert.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification.doctree b/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification.doctree new file mode 100644 index 00000000..82e73d24 Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.doctree b/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.doctree new file mode 100644 index 00000000..49e6665f Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.distilbert.doctree b/.doctrees/source/hezar.models.text_classification.distilbert.doctree new file mode 100644 index 00000000..fa0b6137 Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.distilbert.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.doctree b/.doctrees/source/hezar.models.text_classification.doctree new file mode 100644 index 00000000..3689e1cc Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.roberta.doctree b/.doctrees/source/hezar.models.text_classification.roberta.doctree new file mode 100644 index 00000000..24544da1 Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.roberta.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification.doctree b/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification.doctree new file mode 100644 index 00000000..a1298d23 Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification.doctree differ diff --git a/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification_config.doctree b/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification_config.doctree new file mode 100644 index 00000000..2c28991b Binary files /dev/null and b/.doctrees/source/hezar.models.text_classification.roberta.roberta_text_classification_config.doctree differ diff --git a/.doctrees/source/hezar.models.text_embedding.doctree b/.doctrees/source/hezar.models.text_embedding.doctree new file mode 100644 index 00000000..2faaa5fa Binary files /dev/null and b/.doctrees/source/hezar.models.text_embedding.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.doctree b/.doctrees/source/hezar.models.text_generation.doctree new file mode 100644 index 00000000..7c051f70 Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.doctree new file mode 100644 index 00000000..26e50929 Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.gpt2.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation.doctree new file mode 100644 index 00000000..9e436cd4 Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.doctree b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.doctree new file mode 100644 index 00000000..0103599b Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.t5.doctree b/.doctrees/source/hezar.models.text_generation.t5.doctree new file mode 100644 index 00000000..bb89b7ce Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.t5.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation.doctree b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation.doctree new file mode 100644 index 00000000..f2ee97bb Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation.doctree differ diff --git a/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation_config.doctree b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation_config.doctree new file mode 100644 index 00000000..9880d411 Binary files /dev/null and b/.doctrees/source/hezar.models.text_generation.t5.t5_text_generation_config.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.audio_feature_extractor.doctree b/.doctrees/source/hezar.preprocessors.audio_feature_extractor.doctree new file mode 100644 index 00000000..3a070ae5 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.audio_feature_extractor.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.doctree b/.doctrees/source/hezar.preprocessors.doctree new file mode 100644 index 00000000..3dc46736 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.image_processor.doctree b/.doctrees/source/hezar.preprocessors.image_processor.doctree new file mode 100644 index 00000000..5886a3ad Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.image_processor.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.preprocessor.doctree b/.doctrees/source/hezar.preprocessors.preprocessor.doctree new file mode 100644 index 00000000..9b29832e Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.preprocessor.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.text_normalizer.doctree b/.doctrees/source/hezar.preprocessors.text_normalizer.doctree new file mode 100644 index 00000000..bc15c5c8 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.text_normalizer.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.bpe.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.bpe.doctree new file mode 100644 index 00000000..32855484 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.bpe.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.doctree new file mode 100644 index 00000000..aa83f646 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.doctree new file mode 100644 index 00000000..46ea835e Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.doctree new file mode 100644 index 00000000..f10d278d Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.tokenizer.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.tokenizer.doctree new file mode 100644 index 00000000..2341ceb2 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.tokenizer.doctree differ diff --git a/.doctrees/source/hezar.preprocessors.tokenizers.wordpiece.doctree b/.doctrees/source/hezar.preprocessors.tokenizers.wordpiece.doctree new file mode 100644 index 00000000..79cc1223 Binary files /dev/null and b/.doctrees/source/hezar.preprocessors.tokenizers.wordpiece.doctree differ diff --git a/.doctrees/source/hezar.registry.doctree b/.doctrees/source/hezar.registry.doctree new file mode 100644 index 00000000..2d5cf39a Binary files /dev/null and b/.doctrees/source/hezar.registry.doctree differ diff --git a/.doctrees/source/hezar.trainer.doctree b/.doctrees/source/hezar.trainer.doctree new file mode 100644 index 00000000..204459e1 Binary files /dev/null and b/.doctrees/source/hezar.trainer.doctree differ diff --git a/.doctrees/source/hezar.trainer.metrics_handlers.doctree b/.doctrees/source/hezar.trainer.metrics_handlers.doctree new file mode 100644 index 00000000..245af7ca Binary files /dev/null and b/.doctrees/source/hezar.trainer.metrics_handlers.doctree differ diff --git a/.doctrees/source/hezar.trainer.trainer.doctree b/.doctrees/source/hezar.trainer.trainer.doctree new file mode 100644 index 00000000..64e3913c Binary files /dev/null and b/.doctrees/source/hezar.trainer.trainer.doctree differ diff --git a/.doctrees/source/hezar.trainer.trainer_utils.doctree b/.doctrees/source/hezar.trainer.trainer_utils.doctree new file mode 100644 index 00000000..43fcc3d7 Binary files /dev/null and b/.doctrees/source/hezar.trainer.trainer_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.audio_utils.doctree b/.doctrees/source/hezar.utils.audio_utils.doctree new file mode 100644 index 00000000..ed03dba0 Binary files /dev/null and b/.doctrees/source/hezar.utils.audio_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.common_utils.doctree b/.doctrees/source/hezar.utils.common_utils.doctree new file mode 100644 index 00000000..d269bc13 Binary files /dev/null and b/.doctrees/source/hezar.utils.common_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.data_utils.doctree b/.doctrees/source/hezar.utils.data_utils.doctree new file mode 100644 index 00000000..903f14e8 Binary files /dev/null and b/.doctrees/source/hezar.utils.data_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.doctree b/.doctrees/source/hezar.utils.doctree new file mode 100644 index 00000000..def85591 Binary files /dev/null and b/.doctrees/source/hezar.utils.doctree differ diff --git a/.doctrees/source/hezar.utils.file_utils.doctree b/.doctrees/source/hezar.utils.file_utils.doctree new file mode 100644 index 00000000..a55593f4 Binary files /dev/null and b/.doctrees/source/hezar.utils.file_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.hub_utils.doctree b/.doctrees/source/hezar.utils.hub_utils.doctree new file mode 100644 index 00000000..437f658f Binary files /dev/null and b/.doctrees/source/hezar.utils.hub_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.image_utils.doctree b/.doctrees/source/hezar.utils.image_utils.doctree new file mode 100644 index 00000000..eb5a1459 Binary files /dev/null and b/.doctrees/source/hezar.utils.image_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.integration_utils.doctree b/.doctrees/source/hezar.utils.integration_utils.doctree new file mode 100644 index 00000000..afe2e80a Binary files /dev/null and b/.doctrees/source/hezar.utils.integration_utils.doctree differ diff --git a/.doctrees/source/hezar.utils.logging.doctree b/.doctrees/source/hezar.utils.logging.doctree new file mode 100644 index 00000000..3143031b Binary files /dev/null and b/.doctrees/source/hezar.utils.logging.doctree differ diff --git a/.doctrees/source/hezar.utils.registry_utils.doctree b/.doctrees/source/hezar.utils.registry_utils.doctree new file mode 100644 index 00000000..8abb9e2c Binary files /dev/null and b/.doctrees/source/hezar.utils.registry_utils.doctree differ diff --git a/.doctrees/source/index.doctree b/.doctrees/source/index.doctree new file mode 100644 index 00000000..80bf6c73 Binary files /dev/null and b/.doctrees/source/index.doctree differ diff --git a/.doctrees/source/modules.doctree b/.doctrees/source/modules.doctree new file mode 100644 index 00000000..7bf61ac4 Binary files /dev/null and b/.doctrees/source/modules.doctree differ diff --git a/.doctrees/tutorial/datasets.doctree b/.doctrees/tutorial/datasets.doctree new file mode 100644 index 00000000..07a41439 Binary files /dev/null and b/.doctrees/tutorial/datasets.doctree differ diff --git a/.doctrees/tutorial/embeddings.doctree b/.doctrees/tutorial/embeddings.doctree new file mode 100644 index 00000000..2d153171 Binary files /dev/null and b/.doctrees/tutorial/embeddings.doctree differ diff --git a/.doctrees/tutorial/index.doctree b/.doctrees/tutorial/index.doctree new file mode 100644 index 00000000..b48f61f5 Binary files /dev/null and b/.doctrees/tutorial/index.doctree differ diff --git a/.doctrees/tutorial/models.doctree b/.doctrees/tutorial/models.doctree new file mode 100644 index 00000000..1289ed40 Binary files /dev/null and b/.doctrees/tutorial/models.doctree differ diff --git a/.doctrees/tutorial/preprocessors.doctree b/.doctrees/tutorial/preprocessors.doctree new file mode 100644 index 00000000..57878dc8 Binary files /dev/null and b/.doctrees/tutorial/preprocessors.doctree differ diff --git a/.doctrees/tutorial/training.doctree b/.doctrees/tutorial/training.doctree new file mode 100644 index 00000000..0f37cc1a Binary files /dev/null and b/.doctrees/tutorial/training.doctree differ diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/_modules/hezar/builders.html b/_modules/hezar/builders.html new file mode 100644 index 00000000..7330e698 --- /dev/null +++ b/_modules/hezar/builders.html @@ -0,0 +1,667 @@ + + + + + + + + hezar.builders - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.builders

+r"""
+Builder functions are used to create an instance of a module e.g, models, preprocessors, etc. without having to import
+their corresponding classes manually. These builders use modules' registries to do so. Every builder gets a name and
+optional config or config kwargs to build the object.
+
+Examples:
+
+    >>> from hezar.builders import build_model
+    >>> model = build_model('distilbert_text_classification', id2label={0: 'negative', 1: 'positive'})
+    >>> print(model)
+
+"""
+from typing import Optional
+
+from .configs import (
+    DatasetConfig,
+    EmbeddingConfig,
+    MetricConfig,
+    ModelConfig,
+    PreprocessorConfig,
+)
+from .constants import SplitType
+from .registry import (
+    datasets_registry,
+    embeddings_registry,
+    metrics_registry,
+    models_registry,
+    preprocessors_registry,
+)
+
+
+__all__ = [
+    "build_model",
+    "build_dataset",
+    "build_preprocessor",
+    "build_embedding",
+    "build_metric",
+]
+
+
+
+[docs] +def build_model(name: str, config: Optional[ModelConfig] = None, **kwargs): + """ + Build the model using its registry name. If config is None then the model is built using the default config. Notice + that this function only builds the model and does not perform any weights loading/initialization unless these + actions are done in the model's :func:`__init__` . + + Args: + name (str): name of the model in the models' registry + config (ModelConfig): a ModelConfig instance + **kwargs: extra config parameters that are loaded to the model + + Returns: + A Model instance + """ + from .utils import list_available_models + + available_models = list_available_models() + if name not in available_models: + raise ValueError(f"Unknown model name: `{name}`!\nAvailable model names: {available_models}") + config = config or models_registry[name].config_class() + model = models_registry[name].module_class(config, **kwargs) + return model
+ + + +
+[docs] +def build_preprocessor(name: str, config: Optional[PreprocessorConfig] = None, **kwargs): + """ + Build the preprocessor using its registry name. If config is None then the preprocessor is built using the + default config. + + Args: + name (str): name of the preprocessor in the preprocessors' registry + config (PreprocessorConfig): a PreprocessorConfig instance + **kwargs: extra config parameters that are loaded to the preprocessor + + Returns: + A Preprocessor instance + """ + from .utils import list_available_preprocessors + + available_preprocessors = list_available_preprocessors() + if name not in preprocessors_registry: + raise ValueError( + f"Unknown preprocessor name: `{name}`!\nAvailable preprocessor names: {available_preprocessors}" + ) + config = config or preprocessors_registry[name].config_class() + preprocessor = preprocessors_registry[name].module_class(config, **kwargs) + return preprocessor
+ + + +
+[docs] +def build_dataset(name: str, config: Optional[DatasetConfig] = None, split: SplitType = None, **kwargs): + """ + Build the dataset using its registry name. If config is None then the dataset is built using the + default config. + + Args: + name (str): name of the dataset in the datasets' registry + config (DatasetConfig): a DatasetConfig instance + split (str): Dataset split to load + **kwargs: extra config parameters that are loaded to the dataset + + Returns: + A Dataset instance + """ + from .utils import list_available_datasets + + available_datasets = list_available_datasets() + if name not in available_datasets: + raise ValueError( + f"Unknown dataset name: `{name}`!\nAvailable dataset names: {available_datasets}" + ) + config = config or datasets_registry[name].config_class() + dataset = datasets_registry[name].module_class(config, split, **kwargs) + return dataset
+ + + +
+[docs] +def build_embedding(name: str, config: Optional[EmbeddingConfig] = None, **kwargs): + """ + Build the embedding using its registry name. If config is None then the embedding is built using the + default config. + + Args: + name (str): Name of the embedding in the embeddings' registry + config (EmbeddingConfig): An EmbeddingConfig instance + **kwargs: Extra config parameters that are loaded to the embedding + + Returns: + A Embedding instance + """ + from .utils import list_available_embeddings + + available_embeddings = list_available_embeddings() + if name not in available_embeddings: + raise ValueError( + f"Unknown embedding name: `{name}`!\nAvailable embedding names: {available_embeddings}" + ) + config = config or embeddings_registry[name].config_class() + embedding = embeddings_registry[name].module_class(config, **kwargs) + return embedding
+ + + +
+[docs] +def build_metric(name: str, config: Optional[MetricConfig] = None, **kwargs): + """ + Build the metric using its registry name. If config is None then the metric is built using the + default config. + + Args: + name (str): Name of the metric in the metrics' registry + config (MetricConfig): A MetricConfig instance + **kwargs: Extra config parameters that are loaded to the metric + + Returns: + A Metric instance + """ + from .utils import list_available_metrics + + available_metrics = list_available_metrics() + if name not in available_metrics: + raise ValueError(f"Unknown metric name: `{name}`!\nAvailable metric names: {available_metrics}") + config = config or metrics_registry[name].config_class() + metric = metrics_registry[name].module_class(config, **kwargs) + return metric
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/configs.html b/_modules/hezar/configs.html new file mode 100644 index 00000000..15f4c29d --- /dev/null +++ b/_modules/hezar/configs.html @@ -0,0 +1,986 @@ + + + + + + + + hezar.configs - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.configs

+"""
+Configs are at the core of Hezar. All core modules like `Model`, `Preprocessor`, `Trainer`, etc. take their parameters
+as a config container which is an instance of `Config` or its derivatives. A `Config` is a Python dataclass with
+auxiliary methods for loading, saving, uploading to the hub and etc.
+
+Examples:
+    >>> from hezar.configs import ModelConfig
+    >>> config = ModelConfig.load("hezarai/bert-base-fa")
+
+    >>> from hezar.models import BertLMConfig
+    >>> bert_config = BertLMConfig(vocab_size=50000, hidden_size=768)
+    >>> bert_config.save("saved/bert", filename="model_config.yaml")
+    >>> bert_config.push_to_hub("hezarai/bert-custom", filename="model_config.yaml")
+"""
+from __future__ import annotations
+
+import os
+import tempfile
+from dataclasses import asdict, dataclass, field
+from enum import Enum
+from pprint import pformat
+from typing import Dict, List, Literal, Optional, Tuple
+
+from huggingface_hub import create_repo, hf_hub_download, upload_file
+from omegaconf import DictConfig, OmegaConf
+
+from .constants import (
+    DEFAULT_MODEL_CONFIG_FILE,
+    HEZAR_CACHE_DIR,
+    ConfigType,
+    LRSchedulerType,
+    OptimizerType,
+    PrecisionType,
+    TaskType,
+)
+from .utils import Logger, get_module_config_class
+
+
+__all__ = [
+    "Config",
+    "ModelConfig",
+    "PreprocessorConfig",
+    "TrainerConfig",
+    "DatasetConfig",
+    "EmbeddingConfig",
+    "MetricConfig",
+]
+
+logger = Logger(__name__)
+
+CONFIG_CLASS_VARS = ["name", "config_type"]
+
+_config_to_type_mapping = {
+    "ModelConfig": ConfigType.MODEL,
+    "PreprocessorConfig": ConfigType.PREPROCESSOR,
+    "TrainerConfig": ConfigType.TRAINER,
+    "DatasetConfig": ConfigType.DATASET,
+    "EmbeddingConfig": ConfigType.EMBEDDING,
+    "CriterionConfig": ConfigType.CRITERION,
+    "MetricConfig": ConfigType.METRIC,
+}
+_type_to_config_mapping = {v: k for k, v in _config_to_type_mapping.items()}
+
+
+
+[docs] +@dataclass +class Config: + """ + Base class for all configs in Hezar. + + All configs are simple dataclasses that have some customized functionalities to manage their attributes. There are + also some Hezar specific methods: `load`, `save` and `push_to_hub`. + + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.BASE) + + def __post_init__(self): + # Class variables cannot be init-able + for attr in CONFIG_CLASS_VARS: + if self.__dataclass_fields__[attr].init == True: # noqa + raise ValueError( + f"The parameter `{attr}` in a config should be either non-initable or unannotated! " + f"\nYou should define it as either:\n" + f"`{attr} = '{getattr(self, attr)}'`" + f" or " + f"`{attr}: str = field(default='{getattr(self, attr)}', init=False)`" + ) + + # Convert enums to values + for param, value in self.dict().items(): + if isinstance(getattr(self, param), Enum): + setattr(self, param, str(getattr(self, param))) + + def __str__(self): + return pformat(self.dict()) + + def __getitem__(self, item): + try: + return self.dict()[item] + except KeyError: + raise AttributeError(f"`{self.__class__.__name__}` does not have the parameter `{item}`!") + + def __len__(self): + return len(self.dict()) + + def __iter__(self): + return iter(self.dict()) + +
+[docs] + def dict(self): + """ + Returns the config object as a dictionary (works on nested dataclasses too) + + Returns: + The config object as a dictionary + """ + return asdict(self)
+ + +
+[docs] + def keys(self): + return list(self.dict().keys())
+ + +
+[docs] + def get(self, key, default=None): + return getattr(self, key, default)
+ + +
+[docs] + def update(self, d: dict, **kwargs): + """ + Update config with a given dictionary or keyword arguments. If a key does not exist in the attributes, prints a + warning but sets it anyway. + + Args: + d: A dictionary + **kwargs: Key/value pairs in the form of keyword arguments + + Returns: + The config object itself but the operation happens in-place anyway + """ + d.update(kwargs) + for k, v in d.items(): + if k not in self.__annotations__.keys(): + logger.warning(f"`{str(self.__class__.__name__)}` does not take `{k}` as a config parameter!") + setattr(self, k, v) + return self
+ + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path: str | os.PathLike, + filename: Optional[str] = None, + subfolder: Optional[str] = None, + repo_type: str = None, + cache_dir: str = None, + **kwargs, + ) -> "Config": + """ + Load config from Hub or locally if it already exists on disk (handled by HfApi) + + Args: + hub_or_local_path: Local or Hub path for the config + filename: Configuration filename + subfolder: Optional subfolder path where the config is in + repo_type: Repo type e.g, model, dataset, etc + cache_dir: Path to cache directory + **kwargs: Manual config parameters to override + + Returns: + A Config instance + """ + filename = filename or DEFAULT_MODEL_CONFIG_FILE + subfolder = subfolder or "" + + config_path = os.path.join(hub_or_local_path, subfolder, filename) + is_local = os.path.isfile(config_path) + if os.path.isdir(hub_or_local_path) and not is_local: + raise EnvironmentError( + f"Path `{hub_or_local_path}` exists locally but the config file {filename} is missing!" + ) + # if the file or repo_id does not exist locally, load from the Hub + if not is_local: + config_path = hf_hub_download( + hub_or_local_path, + filename=filename, + subfolder=subfolder, + cache_dir=cache_dir or HEZAR_CACHE_DIR, + repo_type=repo_type, + ) + # Load config file and convert to dictionary + dict_config = OmegaConf.load(config_path) + config = OmegaConf.to_container(dict_config) + # Check if config_type in the file and class are equal + config_type = config.get("config_type", None) + if config_type is None: + logger.warning(f"`config_type` parameter in `{filename}` is `None` or does not exist!") + elif config_type in _config_to_type_mapping.values(): + if config_type != cls.config_type: + raise ValueError( + f"The `config_type` for `{cls.__name__}` is `{cls.config_type}` " + f"which is different from the `config_type` parameter in `{filename}` which is `{config_type}`!" + ) + config_cls = get_module_config_class(config["name"], registry_type=config_type) + if config_cls is None: + config_cls = cls + config = config_cls.from_dict(config, **kwargs) + return config
+ + +
+[docs] + @classmethod + def from_dict(cls, dict_config: Dict | DictConfig, **kwargs): + """ + Load config from a dict-like object. Nested configs are also properly converted to their classes if possible. + """ + # Update config parameters with kwargs + dict_config.update(**kwargs) + + for k, v in dict_config.items(): + if isinstance(v, Dict) and "name" in v and "config_type" in v: + config_cls = get_module_config_class(v["name"], v["config_type"]) + if config_cls is not None: + dict_config[k] = config_cls.from_dict(v) + + # Remove class vars to avoid TypeError (unexpected argument) + [dict_config.pop(k, None) for k in CONFIG_CLASS_VARS] + + config = cls(**{k: v for k, v in dict_config.items() if k in cls.__annotations__}) # noqa + + return config
+ + +
+[docs] + def save( + self, + save_dir: str | os.PathLike, + filename: str, + subfolder: Optional[str] = None, + skip_none_fields: Optional[bool] = True, + ): + """ + Save the `*config.yaml` file to a local path + + Args: + save_dir: Save directory path + filename: Config file name + subfolder: Subfolder to save the config file + skip_none_fields (bool): Whether to skip saving None values or not + """ + subfolder = subfolder or "" + config = self.dict() + + if skip_none_fields: + # exclude None items + config = {k: v for k, v in config.items() if v is not None} + + # make and save to directory + os.makedirs(os.path.join(save_dir, subfolder), exist_ok=True) + save_path = os.path.join(save_dir, subfolder, filename) + OmegaConf.save(config, save_path) + + return save_path
+ + +
+[docs] + def push_to_hub( + self, + repo_id: str, + filename: str, + subfolder: Optional[str] = None, + repo_type: Optional[str] = "model", + skip_none_fields: Optional[bool] = True, + private: Optional[bool] = False, + commit_message: Optional[str] = None, + ): + """ + Push the config file to the hub + + Args: + repo_id (str): Repo name or id on the Hub + filename (str): config file name + subfolder (str): subfolder to save the config + repo_type (str): Type of the repo e.g, model, dataset, space + skip_none_fields (bool): Whether to skip saving None values or not + private (bool): Whether the repo type should be private or not (ignored if the repo exists) + commit_message (str): Push commit message + """ + path_in_repo = f"{subfolder}/{filename}" if subfolder else filename + subfolder = subfolder or "" + + # create remote repo + create_repo(repo_id, repo_type=repo_type, private=private, exist_ok=True) + # save to tmp and prepare for push + cache_path = tempfile.mkdtemp() + config_path = self.save(cache_path, filename=filename, subfolder=subfolder, skip_none_fields=skip_none_fields) + # push to hub + if commit_message is None: + commit_message = f"Hezar: Upload {filename}" + upload_file( + path_or_fileobj=config_path, + path_in_repo=path_in_repo, + repo_id=repo_id, + repo_type=repo_type, + commit_message=commit_message, + ) + logger.log_upload_success(name=f"{self.__class__.__name__}()", target_path=os.path.join(repo_id, path_in_repo))
+
+ + + +
+[docs] +@dataclass +class ModelConfig(Config): + """ + Base dataclass for all model configs + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.MODEL)
+ + + +
+[docs] +@dataclass +class PreprocessorConfig(Config): + """ + Base dataclass for all preprocessor configs + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.PREPROCESSOR)
+ + + +
+[docs] +@dataclass +class DatasetConfig(Config): + """ + Base dataclass for all dataset configs + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.DATASET) + task: TaskType | List[TaskType] = field( + default=None, metadata={"help": "Name of the task(s) this dataset is built for"} + ) + path: str = None
+ + + +
+[docs] +@dataclass +class EmbeddingConfig(Config): + """ + Base dataclass for all embedding configs + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.EMBEDDING) + bypass_version_check: bool = field( + default=False, + metadata={"help": "Whether to bypass checking gensim/numpy/hezar version compatibility"}, + )
+ + + +
+[docs] +@dataclass +class MetricConfig(Config): + """ + Base dataclass config for all metric configs + """ + + name: str = field(init=False, default=None) + config_type: str = field(init=False, default=ConfigType.METRIC) + objective: Literal["maximize", "minimize"] = None + output_keys: List | Tuple = None + n_decimals: int = 4
+ + + +
+[docs] +@dataclass +class TrainerConfig(Config): + """ + Base dataclass for all trainer configs + + Args: + task (str, TaskType): + The training task. Must be a valid name from `TaskType`. + output_dir (str): + Path to the directory to save trainer properties. + device (str): + Hardware device e.g, `cuda:0`, `cpu`, etc. + init_weights_from (str): + Path to a model from disk or Hub to load the initial weights from. + num_dataloader_workers (int): + Number of dataloader workers, defaults to 4 . + seed (int): + Control determinism of the run by setting a seed value. defaults to 42. + optimizer (OptimizerType): + Name of the optimizer, available values include properties in `OptimizerType` enum. + learning_rate (float): + Initial learning rate for the optimizer. + weight_decay (float): + Optimizer weight decay value. + lr_scheduler (LRSchedulerType): + Optional learning rate scheduler among `LRSchedulerType` enum. + batch_size (int): + Training batch size. + eval_batch_size (int): + Evaluation batch size, defaults to `batch_size` if None. + distributed (bool): + Whether to use distributed training or not (via the `accelerate` package) + mixed_precision (PrecisionType | str): + Mixed precision type e.g, fp16, bf16, etc. (disabled by default) + evaluate_with_generate (bool): + Whether to use `generate()` in the evaluation step or not. (only applicable for generative models). + metrics (List[str | MetricConfig]): + A list of metrics. Depending on the `valid_metrics` in the specific MetricsHandler of the Trainer. + num_epochs (int): + Number of total epochs to train the model. + save_freq (int): + Save the trainer stats and everything every `save_freq` epochs. + checkpoints_dir (str): + Path to the checkpoints' folder. The actual files will be saved under `{output_dir}/{checkpoints_dir}`. + logs_dir (str): + Path to the logs' folder. The actual log files will be saved under `{output_dir}/{logs_dir}`. + """ + + name: str = field(init=False, default="trainer") + config_type: str = field(init=False, default=ConfigType.TRAINER) + output_dir: str + task: str | TaskType + device: str = "cuda" + num_epochs: int = None + init_weights_from: str = None + num_dataloader_workers: int = 0 + seed: int = 42 + optimizer: str | OptimizerType = None + learning_rate: float = 2e-5 + weight_decay: float = 0.0 + lr_scheduler: str | LRSchedulerType = None + batch_size: int = None + eval_batch_size: int = None + distributed: bool = field( + init=False, + default=False, + metadata={"help": "Distributed training isn't supported yet! We're working hard to solve some bugs rn :("} + ) + mixed_precision: PrecisionType | str | None = None + use_cpu: bool = False + evaluate_with_generate: bool = True + metrics: List[str | MetricConfig] = None + metric_for_best_model: str = "evaluation.loss" + save_freq: int = 1 + checkpoints_dir: str = "checkpoints" + logs_dir: str = "logs" + + def __post_init__(self): + """ + Perform some argument sanitization and filtering here to avoid unexpected behavior in the trainer. + The need for having this method is that some fields in the Trainer's config have correlations with each other + and not controlling them can lead to conflicts. + """ + super().__post_init__() + if self.task not in list(TaskType): + raise ValueError( + f"Invalid task `{self.task}` passed to `TrainerConfig`. " + f"Available options are {TaskType.list()}", + ) + if not (self.metric_for_best_model.startswith("evaluation") or self.metric_for_best_model.startswith("train")): + self.metric_for_best_model = f"evaluation.{self.metric_for_best_model}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/constants.html b/_modules/hezar/constants.html new file mode 100644 index 00000000..ad3abdd7 --- /dev/null +++ b/_modules/hezar/constants.html @@ -0,0 +1,727 @@ + + + + + + + + hezar.constants - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.constants

+"""
+Home to all constant variables in Hezar
+"""
+
+import os
+from enum import Enum
+
+
+HEZAR_HUB_ID = "hezarai"
+HEZAR_CACHE_DIR = os.getenv("HEZAR_CACHE_DIR", f'{os.path.expanduser("~")}/.cache/hezar')
+
+DEFAULT_MODEL_FILE = "model.pt"
+DEFAULT_MODEL_CONFIG_FILE = "model_config.yaml"
+DEFAULT_TRAINER_SUBFOLDER = "train"
+DEFAULT_TRAINER_CONFIG_FILE = "train_config.yaml"
+DEFAULT_TRAINER_CSV_LOG_FILE = "training_logs.csv"
+DEFAULT_TRAINER_STATE_FILE = "trainer_state.yaml"
+DEFAULT_PREPROCESSOR_SUBFOLDER = "preprocessor"
+DEFAULT_NORMALIZER_CONFIG_FILE = "normalizer_config.yaml"
+DEFAULT_IMAGE_PROCESSOR_CONFIG_FILE = "image_processor_config.yaml"
+DEFAULT_FEATURE_EXTRACTOR_CONFIG_FILE = "feature_extractor_config.yaml"
+DEFAULT_TOKENIZER_FILE = "tokenizer.json"
+DEFAULT_TOKENIZER_CONFIG_FILE = "tokenizer_config.yaml"
+DEFAULT_DATASET_CONFIG_FILE = "dataset_config.yaml"
+DEFAULT_EMBEDDING_FILE = "embedding.bin"
+DEFAULT_EMBEDDING_CONFIG_FILE = "embedding_config.yaml"
+DEFAULT_EMBEDDING_SUBFOLDER = "embedding"
+
+TQDM_BAR_FORMAT = "{desc:<16}{percentage:3.0f}%|{bar:70}{r_bar}"
+
+
+
+[docs] +class ExplicitEnum(str, Enum): + def __str__(self): + return self.value + +
+[docs] + @classmethod + def list(cls): + return [x.value for x in cls.__members__.values()]
+
+ + + +
+[docs] +class Backends(ExplicitEnum): + """ + All required dependency packages and libraries. Note that the values here must be the exact module names used + for importing, for example if you set PILLOW the value must be `PIL` not `pillow`, `pil`, etc. + """ + + PYTORCH = "torch" + TRANSFORMERS = "transformers" + DATASETS = "datasets" + TOKENIZERS = "tokenizers" + ACCELERATE = "accelerate" + SOUNDFILE = "soundfile" + LIBROSA = "librosa" + WANDB = "wandb" + GENSIM = "gensim" + PILLOW = "PIL" + JIWER = "jiwer" + NLTK = "nltk" + SCIKIT = "sklearn" + SEQEVAL = "seqeval" + EVALUATE = "evaluate" + ROUGE = "rouge_score"
+ + + +
+[docs] +class TaskType(ExplicitEnum): + AUDIO_CLASSIFICATION = "audio_classification" + BACKBONE = "backbone" + IMAGE2TEXT = "image2text" + LANGUAGE_MODELING = "language_modeling" + SEQUENCE_LABELING = "sequence_labeling" + SPEECH_RECOGNITION = "speech_recognition" + TEXT_CLASSIFICATION = "text_classification" + TEXT_DETECTION = "text_detection" + TEXT_GENERATION = "text_generation"
+ + + +
+[docs] +class ConfigType(ExplicitEnum): + BASE = "base" + MODEL = "model" + DATASET = "dataset" + PREPROCESSOR = "preprocessor" + EMBEDDING = "embedding" + TRAINER = "trainer" + OPTIMIZER = "optimizer" + CRITERION = "criterion" + LR_SCHEDULER = "lr_scheduler" + METRIC = "metric"
+ + + +
+[docs] +class RegistryType(ExplicitEnum): + MODEL = "model" + DATASET = "dataset" + PREPROCESSOR = "preprocessor" + EMBEDDING = "embedding" + TRAINER = "trainer" + OPTIMIZER = "optimizer" + CRITERION = "criterion" + LR_SCHEDULER = "lr_scheduler" + METRIC = "metric"
+ + + +
+[docs] +class LossType(ExplicitEnum): + L1 = "l1" + NLL = "nll" + NLL_2D = "nll_2d" + POISSON_NLL = "poisson_nll" + GAUSSIAN_NLL = "gaussian_nll" + MSE = "mse" + BCE = "bce" + BCE_WITH_LOGITS = "bce_with_logits" + CROSS_ENTROPY = "cross_entropy" + TRIPLE_MARGIN = "triple_margin" + CTC = "ctc"
+ + + +
+[docs] +class PrecisionType(ExplicitEnum): + NO = "no" + FP8 = "fp8" + FP16 = "fp16" + BF16 = "bf16"
+ + + +
+[docs] +class OptimizerType(ExplicitEnum): + ADAM = "adam" + ADAMW = "adamw" + SDG = "sdg"
+ + + +
+[docs] +class LRSchedulerType(ExplicitEnum): + REDUCE_LR_ON_PLATEAU = "reduce_lr_on_plateau" + COSINE_LR = "cosine_lr"
+ + + +
+[docs] +class SplitType(ExplicitEnum): + TRAIN = "train" + EVAL = "eval" + VALID = "validation" + TEST = "test"
+ + + +
+[docs] +class MetricType(ExplicitEnum): + ACCURACY = "accuracy" + F1 = "f1" + RECALL = "recall" + PRECISION = "precision" + SEQEVAL = "seqeval" + CER = "cer" + WER = "wer" + BLEU = "bleu" + ROUGE = "rouge"
+ + + +
+[docs] +class RepoType(ExplicitEnum): + DATASET = "dataset" + MODEL = "model"
+ + + +
+[docs] +class ImageType(ExplicitEnum): + NUMPY = "numpy" + PILLOW = "pillow" + TORCH = "torch"
+ + + +
+[docs] +class ChannelsAxisSide(ExplicitEnum): + FIRST = "first" + LAST = "last"
+ + + +
+[docs] +class PaddingType(ExplicitEnum): + MAX_LENGTH = "max_length" + LONGEST = "longest"
+ + + +
+[docs] +class Color(ExplicitEnum): + HEADER = "\033[95m" + NORMAL = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + ITALIC = "\33[3m" + BLUE = "\033[94m" + CYAN = "\033[96m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + RED = "\033[91m" + GREY = "\33[90m"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/data_collators.html b/_modules/hezar/data/data_collators.html new file mode 100644 index 00000000..bbc522a3 --- /dev/null +++ b/_modules/hezar/data/data_collators.html @@ -0,0 +1,836 @@ + + + + + + + + hezar.data.data_collators - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.data_collators

+import numpy as np
+import torch
+
+from ..preprocessors import Tokenizer
+from ..utils import Logger, convert_batch_dict_dtype
+
+
+__all__ = [
+    "TextPaddingDataCollator",
+    "TextGenerationDataCollator",
+    "ImageCaptioningDataCollator",
+    "SequenceLabelingDataCollator",
+    "CharLevelOCRDataCollator",
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +class TextPaddingDataCollator: + """ + A data collator that pads a batch of tokenized inputs. + + Args: + tokenizer (Tokenizer): A Hezar tokenizer instance. (only its config is going to be used) + padding_type (str): Specifies padding strategy. Defaults to `longest`, but can also be `max_length` (in this case + `max_length` cannot be None) + padding_side (str): Specifies from which side of each tensor to add paddings. Defaults to `right`, but can also be + `left`. + max_length (int): If `padding_type` is set to `max_length` this parameter must be specified. Forces all tensors to + have this value as length. + return_tensors (str): Specifies the dtype of the returning tensors in the batch. Defaults to `pt(torch.Tensor)`, but + can also be `np` or `list`. + """ + + def __init__( + self, + tokenizer: Tokenizer, + padding_type: str = "longest", + padding_side: str = "right", + max_length: int = None, + return_tensors: str = "pt", + ): + self.tokenizer = tokenizer + self.padding_type = padding_type + self.padding_side = padding_side + self.max_length = max_length + self.return_tensors = return_tensors + + self.field_to_pad_id_mapping = { + "token_ids": self.tokenizer.pad_token_id, + "token_type_ids": self.tokenizer.config.pad_token_type_id, + "tokens": "", + "special_tokens_mask": 1, + "attention_mask": 0, + } + + if padding_type == "longest" and max_length is not None: + logger.warning( + "You passed `max_length` while also setting `padding_type` to `longest` which are " + "incompatible! Instead leave `max_length` as None or set `padding_type` to `max_length`! " + "Ignoring `max_length`" + ) + self.max_length = None + + def __call__(self, encoded_batch): + """ + Add padding to every item in the batch + + Args: + encoded_batch: A batch dictionary + + Returns: + Dict: The same batch dictionary but padded + """ + encoded_batch = [convert_batch_dict_dtype(x, dtype="list") for x in encoded_batch] + permuted_batch = {} + for key in encoded_batch[0].keys(): + stack = [e for item in encoded_batch for e in item[key]] + permuted_batch[key] = stack + + encoded_batch = permuted_batch.copy() + if "label" in encoded_batch: + encoded_batch["labels"] = encoded_batch["label"] + del encoded_batch["label"] + + labels = encoded_batch.pop("labels") + input_length = self.max_length or max(len(x) for x in encoded_batch["token_ids"]) + + for field, batch in encoded_batch.items(): + padded_batch = [] + for x in batch: + if isinstance(x, torch.Tensor): + x = x.cpu().numpy().tolist() + elif isinstance(x, np.ndarray): + x = x.tolist() + difference = input_length - len(x) + paddings = [self.field_to_pad_id_mapping[field]] * difference + padded_x = x + paddings if self.padding_side == "right" else paddings + x + padded_batch.append(padded_x) + encoded_batch[field] = padded_batch + + encoded_batch["labels"] = labels + + encoded_batch = convert_batch_dict_dtype(encoded_batch, dtype=self.return_tensors) + + return encoded_batch
+ + + +
+[docs] +class TextGenerationDataCollator: + """ + A data collator for text to text generation + + Args: + tokenizer (Tokenizer): A Hezar tokenizer instance. (only its config is going to be used) + padding_type (str): Specifies padding strategy. Defaults to `longest`, but can also be `max_length` (in this case + `max_length` cannot be None) + padding_side (str): Specifies from which side of each tensor to add paddings. Defaults to `right`, but can also be + `left`. + max_length (int): If `padding_type` is set to `max_length` this parameter must be specified. Forces all tensors to + have this value as length. + max_target_length (int): Maximum target length for text generation. + return_tensors (str): Specifies the dtype of the returning tensors in the batch. Defaults to `pt(torch.Tensor)`, but + can also be `np` or `list`. + """ + + def __init__( + self, + tokenizer: Tokenizer, + padding_type: str = "longest", + padding_side: str = "right", + max_length: int = None, + max_target_length: int = None, + return_tensors: str = "pt", + ): + self.tokenizer = tokenizer + self.padding_type = padding_type + self.padding_side = padding_side + self.max_length = max_length + self.max_target_length = max_target_length + self.return_tensors = return_tensors + + if padding_type == "longest" and max_length is not None: + logger.warning( + "You passed `max_length` while also setting `padding_type` to `longest` which are " + "incompatible! Instead leave `max_length` as None or set `padding_type` to `max_length`! " + "Ignoring `max_length`" + ) + self.max_length = None + + def __call__(self, encoded_batch): + """ + Add padding to every item in the batch + + Args: + encoded_batch (List[Dict]): A batch dictionary + + Returns: + Dict: The same batch dictionary but padded + """ + encoded_batch = [convert_batch_dict_dtype(x, dtype="list") for x in encoded_batch] + permuted_batch = {} + for key in encoded_batch[0].keys(): + stack = [e for item in encoded_batch for e in item[key]] + permuted_batch[key] = stack + + padded_batch = self.tokenizer.pad_encoded_batch( + permuted_batch, + padding=self.padding_type, + max_length=self.max_length, + exclude_keys=["labels"], + return_tensors=self.return_tensors, + ) + padded_batch = self.tokenizer.pad_encoded_batch( + padded_batch, + padding=self.padding_type, + max_length=self.max_target_length, + include_keys=["labels"], + return_tensors=self.return_tensors, + ) + + return padded_batch
+ + + +
+[docs] +class ImageCaptioningDataCollator: + def __init__( + self, + tokenizer: Tokenizer, + padding_type: str = "longest", + padding_side: str = "right", + max_length: int = None, + return_tensors: str = "pt", + ): + self.tokenizer = tokenizer + self.padding_type = padding_type + self.padding_side = padding_side + self.max_length = max_length + self.return_tensors = return_tensors + + if padding_type == "longest" and max_length is not None: + logger.warning( + "You passed `max_length` while also setting `padding_type` to `longest` which are " + "incompatible! Instead leave `max_length` as None or set `padding_type` to `max_length`! " + "Ignoring `max_length`" + ) + self.max_length = None + + def __call__(self, encoded_batch): + encoded_batch = [convert_batch_dict_dtype(x, dtype="list") for x in encoded_batch] + permuted_batch = {} + for key in encoded_batch[0].keys(): + stack = [e for item in encoded_batch for e in item[key]] + permuted_batch[key] = stack + + padded_batch = self.tokenizer.pad_encoded_batch( + permuted_batch, + padding=self.padding_type, + max_length=self.max_length, + exclude_keys=["pixel_values"], + return_tensors=self.return_tensors, + ) + padded_batch = convert_batch_dict_dtype(padded_batch, dtype="pt") + + return padded_batch
+ + + +
+[docs] +class SequenceLabelingDataCollator: + """ + A data collator for sequence labeling. + + Args: + tokenizer (Tokenizer): A Hezar tokenizer instance. (only its config is going to be used) + padding_type (str): Specifies padding strategy. Defaults to `longest`, but can also be `max_length` (in this case + `max_length` cannot be None) + padding_side (str): Specifies from which side of each tensor to add paddings. Defaults to `right`, but can also be + `left`. + label_pad_token_id (int): Token ID for padding labels. + max_length (int): If `padding_type` is set to `max_length` this parameter must be specified. Forces all tensors to + have this value as length. + return_tensors (str): Specifies the dtype of the returning tensors in the batch. Defaults to `pt(torch.Tensor)`, but + can also be `np` or `list`. + """ + + def __init__( + self, + tokenizer: Tokenizer, + padding_type: str = "longest", + padding_side: str = "right", + label_pad_token_id: int = -100, + max_length: int = None, + return_tensors: str = "pt", + ): + self.tokenizer = tokenizer + self.padding_type = padding_type + self.padding_side = padding_side + self.label_pad_token_id = label_pad_token_id + self.max_length = max_length + self.return_tensors = return_tensors + + def __call__(self, encoded_batch): + """ + Add padding to every item in the batch + + Args: + encoded_batch (List[Dict]): A batch dictionary + + Returns: + Dict: The same batch dictionary but padded + """ + label_name = "label" if "label" in encoded_batch[0].keys() else "labels" + labels = [feature[label_name] for feature in encoded_batch] if label_name in encoded_batch[0].keys() else None + self.tokenizer.config.padding_direction = self.padding_side + batch = self.tokenizer.pad_encoded_batch( + encoded_batch, + padding=self.padding_type, # noqa + max_length=self.max_length, + # Conversion to tensors will fail if we have labels as they are not of the same length yet. + return_tensors="pt" if labels is None else None, + ) + + if labels is None: + return batch + + batch.pop("word_ids", None) + sequence_length = torch.tensor(batch["token_ids"]).shape[1] + if self.padding_side == "right": + batch[label_name] = [ + list(label) + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels + ] + else: + batch[label_name] = [ + [self.label_pad_token_id] * (sequence_length - len(label)) + list(label) for label in labels + ] + + batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()} + return batch
+ + + +
+[docs] +class CharLevelOCRDataCollator: + """ + A data collator for character-level OCR. + + Args: + pad_token_id (int): Token ID for padding characters. + """ + + def __init__(self, pad_token_id: int = 0): + self.pad_token_id = pad_token_id + + def __call__(self, input_batch): + """ + Add padding to character-level OCR data. + + Args: + input_batch (Dict): Input batch containing pixel values and labels. + + Returns: + Dict: Padded input batch. + """ + if isinstance(input_batch, (list, tuple)) and isinstance(input_batch[0], dict): + input_batch = {key: [example[key] for example in input_batch] for key in input_batch[0].keys()} + input_batch["pixel_values"] = torch.stack(input_batch["pixel_values"], 0) + + max_length = max(map(len, input_batch["labels"])) + all_labels = [] + for labels in input_batch["labels"]: + labels = labels.numpy().tolist() + labels += [self.pad_token_id] * (max_length - len(labels)) + all_labels.append(labels) + input_batch["labels"] = torch.tensor(all_labels) + return input_batch
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/dataset.html b/_modules/hezar/data/datasets/dataset.html new file mode 100644 index 00000000..230da8cd --- /dev/null +++ b/_modules/hezar/data/datasets/dataset.html @@ -0,0 +1,605 @@ + + + + + + + + hezar.data.datasets.dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.dataset

+from __future__ import annotations
+
+import os
+from typing import List, Optional
+
+from torch.utils.data import Dataset as TorchDataset
+
+from ...builders import build_dataset
+from ...configs import DatasetConfig
+from ...constants import (
+    DEFAULT_DATASET_CONFIG_FILE,
+    HEZAR_CACHE_DIR,
+    Backends,
+    RepoType,
+    SplitType,
+)
+from ...utils import verify_dependencies
+
+
+
+[docs] +class Dataset(TorchDataset): + """ + Base class for all datasets in Hezar. + + Args: + config: The configuration object for the dataset. + **kwargs: Additional keyword arguments. + + Attributes: + required_backends (List[str | Backends]): List of required backends for the dataset. + config_filename (str): Default dataset config file name. + cache_dir (str): Default cache directory for the dataset. + + """ + required_backends: List[str | Backends] = None + config_filename = DEFAULT_DATASET_CONFIG_FILE + cache_dir = os.path.join(HEZAR_CACHE_DIR, "datasets") + + def __init__(self, config: DatasetConfig, split=None, **kwargs): + verify_dependencies(self, self.required_backends) + self.config = config.update(kwargs) + self.preprocessor = None + self.data_collator = None + self.split = split + + def __str__(self): + dataset_name = self.config.path or self.config.name + dataset_size = len(self) + return f"{self.__class__.__name__}(path={dataset_name}['{self.split}'], size={dataset_size})" + + def __len__(self): + """ + Returns the length of the dataset. + + Raises: + NotImplementedError: This method must be implemented in derived classes. + + """ + raise NotImplementedError + + def __getitem__(self, index): + """ + Gets a specific item from the dataset. + + Args: + index: Index of the item to retrieve. + + Raises: + NotImplementedError: This method must be implemented in derived classes. + + """ + raise NotImplementedError + +
+[docs] + @classmethod + def load( + cls, + hub_path: str | os.PathLike, + config_filename: Optional[str] = None, + split: Optional[str | SplitType] = None, + cache_dir: str = None, + **kwargs, + ) -> "Dataset": + """ + Load the dataset from a hub path. + + Args: + hub_path (str | os.PathLike): Path to dataset from hub or locally. + config_filename (Optional[str]): Dataset config file name. + split (Optional[str | SplitType]): Dataset split, defaults to "train". + cache_dir (str): Path to cache directory + **kwargs: Config parameters as keyword arguments. + + Returns: + Dataset: An instance of the loaded dataset. + + """ + split = split or "train" + config_filename = config_filename or cls.config_filename + if cache_dir is not None: + cls.cache_dir = cache_dir + dataset_config = DatasetConfig.load( + hub_path, + filename=config_filename, + repo_type=RepoType.DATASET, + cache_dir=cls.cache_dir, + ) + dataset_config.path = hub_path + dataset = build_dataset(dataset_config.name, config=dataset_config, split=split, **kwargs) + return dataset
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/image_captioning_dataset.html b/_modules/hezar/data/datasets/image_captioning_dataset.html new file mode 100644 index 00000000..200b4114 --- /dev/null +++ b/_modules/hezar/data/datasets/image_captioning_dataset.html @@ -0,0 +1,617 @@ + + + + + + + + hezar.data.datasets.image_captioning_dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.image_captioning_dataset

+from dataclasses import dataclass
+
+import torch
+from datasets import load_dataset
+
+from ...builders import build_preprocessor
+from ...configs import DatasetConfig
+from ...constants import Backends, TaskType
+from ...preprocessors import ImageProcessorConfig, Tokenizer
+from ...registry import register_dataset
+from ...utils import Logger, shift_tokens_right
+from ..data_collators import ImageCaptioningDataCollator
+from .dataset import Dataset
+
+
+logger = Logger(__name__)
+
+_required_backends = [Backends.SCIKIT]
+
+
+
+[docs] +@dataclass +class ImageCaptioningDatasetConfig(DatasetConfig): + """ + Configuration class for image captioning datasets. + + Args: + path (str): Path to the dataset. + tokenizer_path (str): Path to the tokenizer file. + text_column (str): Column name for text in the dataset. + images_paths_column (str): Column name for image paths in the dataset. + max_length (int): Maximum length of text. + test_split_size (float): Size of the test split. + image_processor_config (ImageProcessorConfig): Configuration for image processing. + + """ + name = "image_captioning" + task: TaskType = TaskType.IMAGE2TEXT + path: str = None + tokenizer_path: str = None + text_column: str = "label" + images_paths_column = "image_path" + max_length: int = None + test_split_size: float = 0.2 + image_processor_config: ImageProcessorConfig = None
+ + + +
+[docs] +@register_dataset("image_captioning", config_class=ImageCaptioningDatasetConfig) +class ImageCaptioningDataset(Dataset): + """ + General image captioning dataset class. + + Args: + config (ImageCaptioningDatasetConfig): The configuration object for the dataset. + split: Dataset split, defaults to None. + **kwargs: Additional keyword arguments. + """ + required_backends = _required_backends + + def __init__(self, config: ImageCaptioningDatasetConfig, split=None, **kwargs): + super().__init__(config=config, split=split, **kwargs) + self.data = self._load(split) + self.image_processor = build_preprocessor("image_processor", config=self.config.image_processor_config) + self.tokenizer = Tokenizer.load(self.config.tokenizer_path) + self.data_collator = ImageCaptioningDataCollator( + self.tokenizer, + padding_type="max_length" if self.config.max_length is not None else "longest", + max_length=self.config.max_length + ) + + def __len__(self): + """ + Returns the length of the dataset. + + Returns: + int: The length of the dataset. + + """ + return len(self.data) + + def _load(self, split=None): + """ + Load the dataset and clean up invalid samples. + + Args: + split: Dataset split, defaults to None. + + Returns: + Dataset: The cleaned dataset. + + """ + data = load_dataset(self.config.path, split=split, cache_dir=self.cache_dir) + return data + + def __getitem__(self, index): + """ + Get a specific item from the dataset. + + Args: + index: Index of the item to retrieve. + + Returns: + dict: The input data. + """ + path, text = self.data[index].values() + pixel_values = self.image_processor(path, return_tensors="pt")["pixel_values"] + tokenized_inputs = self.tokenizer(text, padding="max_length", max_length=self.config.max_length) + labels = torch.tensor([tokenized_inputs["token_ids"]]) + attention_mask = torch.tensor([tokenized_inputs["attention_mask"]]) + decoder_input_ids = shift_tokens_right( + labels, + pad_token_id=self.tokenizer.pad_token_id, + decoder_start_token_id=self.tokenizer.bos_token_id, + ) + inputs = { + "pixel_values": pixel_values, + "labels": labels, + "decoder_input_ids": decoder_input_ids, + "decoder_attention_mask": attention_mask, + } + return inputs
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/ocr_dataset.html b/_modules/hezar/data/datasets/ocr_dataset.html new file mode 100644 index 00000000..7dd4e872 --- /dev/null +++ b/_modules/hezar/data/datasets/ocr_dataset.html @@ -0,0 +1,697 @@ + + + + + + + + hezar.data.datasets.ocr_dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.ocr_dataset

+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Dict
+
+import torch
+from datasets import load_dataset
+
+from ...builders import build_preprocessor
+from ...configs import DatasetConfig
+from ...constants import Backends, TaskType
+from ...preprocessors import ImageProcessorConfig, Tokenizer
+from ...registry import register_dataset
+from ...utils import Logger, is_text_valid, reverse_string_digits
+from ..data_collators import CharLevelOCRDataCollator
+from .dataset import Dataset
+
+
+logger = Logger(__name__)
+
+_required_backends = [Backends.SCIKIT]
+
+fa_characters = [
+    "", "آ", "ا", "ب", "پ", "ت", "ث", "ج", "چ", "ح", "خ", "د", "ذ", "ر", "ز", "ژ", "س", "ش",
+    "ص", "ض", "ط", "ظ", "ع", "غ", "ف", "ق", "ک", "گ", "ل", "م", "ن", "و", "ه", "ی", " "
+]
+fa_numbers = ["۱", "۲", "۳", "۴", "۵", "۶", "۷", "۸", "۹", "۰"]
+fa_special_characters = ["ء", "ؤ", "ئ", "أ", "ّ"]
+fa_symbols = ["/", "(", ")", "+", "-", ":", "،", "!", ".", "؛", "=", "%", "؟"]
+en_numbers = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
+all_characters = fa_characters + fa_numbers + fa_special_characters + fa_symbols + en_numbers
+
+ID2LABEL = dict(enumerate(all_characters))
+
+
+
+[docs] +class TextSplitType(str, Enum): + CHAR_SPLIT = "char_split" # mostly for char level ocr models + TOKENIZE = "tokenize" # mostly for transformer-based ocr models
+ + + +
+[docs] +@dataclass +class OCRDatasetConfig(DatasetConfig): + """ + Configuration class for OCR datasets. + + Args: + path (str): Path to the dataset. + text_split_type (TextSplitType): Type of text splitting (CHAR_SPLIT or TOKENIZE). + tokenizer_path (str): Path to the tokenizer file. + id2label (Dict[int, str]): Mapping of label IDs to characters. + text_column (str): Column name for text in the dataset. + images_paths_column (str): Column name for image paths in the dataset. + max_length (int): Maximum length of text. + invalid_characters (list): List of invalid characters. + reverse_digits (bool): Whether to reverse the digits in text. + image_processor_config (ImageProcessorConfig): Configuration for image processing. + + """ + name = "ocr" + task: TaskType = TaskType.IMAGE2TEXT + path: str = None + text_split_type: str | TextSplitType = TextSplitType.TOKENIZE + tokenizer_path: str = None # if left to None, text_split_type must be `char_split` + id2label: Dict[int, str] = field(default_factory=lambda: ID2LABEL) + text_column: str = "label" + images_paths_column = "image_path" + max_length: int = None + invalid_characters: list = None + reverse_text: bool = None + reverse_digits: bool = None + image_processor_config: ImageProcessorConfig = None
+ + + +
+[docs] +@register_dataset("ocr", config_class=OCRDatasetConfig) +class OCRDataset(Dataset): + """ + General OCR dataset class. + + OCR dataset supports two types of image to text dataset. One is for tokenizer-based models in which the labels are + tokens and the other is char-level models in which the labels are separated by character and the converted to ids. + This behavior is specified by the `text_split_type` in config which can be either `tokenize` or `char_split`. + + """ + required_backends = _required_backends + + def __init__(self, config: OCRDatasetConfig, split=None, **kwargs): + """ + Initializes a new OCRDataset instance. + + Args: + config (OCRDatasetConfig): The configuration object for the dataset. + split: Dataset split, defaults to None. + **kwargs: Additional keyword arguments. + + """ + super().__init__(config=config, split=split, **kwargs) + self.data = self._load(split) + self.image_processor = build_preprocessor("image_processor", config=self.config.image_processor_config) + if self.config.text_split_type == TextSplitType.TOKENIZE: + if self.config.tokenizer_path is not None: + self.tokenizer = Tokenizer.load(self.config.tokenizer_path) + self.data_collator = None # TODO resolve this in the future. + else: + raise ValueError("No `tokenizer_path` given although `text_split_type` is set to `tokenize`!") + else: + self.tokenizer = None + self.data_collator = CharLevelOCRDataCollator() + + def __len__(self): + """ + Returns the length of the dataset. + + Returns: + int: The length of the dataset. + + """ + return len(self.data) + + def _load(self, split=None): + """ + Load the dataset and clean up invalid samples. + + Args: + split: Dataset split, defaults to None. + + Returns: + Dataset: The cleaned dataset. + + """ + data = load_dataset(self.config.path, split=split, cache_dir=self.cache_dir) + # Cleanup dataset + valid_indices = [] + invalid_indices = [] + for i, sample in enumerate(list(iter(data))): + path, text = sample.values() + if len(text) <= self.config.max_length and is_text_valid(text, self.config.id2label.values()): + valid_indices.append(i) + else: + invalid_indices.append(i) + if len(invalid_indices): + logger.warning( + f"{len(invalid_indices)} invalid samples found in the dataset! " + f"Inspect them using the `invalid_data` attribute" + ) + self.invalid_data = data.select(invalid_indices) + data = data.select(valid_indices) + return data + + def _text_to_tensor(self, text): + """ + Convert text to tensor based on the configured text_split_type. + + Args: + text (str): The raw text. + + Returns: + torch.Tensor: The output tensor. + + """ + # Tokenize text inputs if text_split_type is set to `tokenize` + if self.config.text_split_type == TextSplitType.TOKENIZE: + token_ids = self.tokenizer(text, padding="max_length", max_length=self.config.max_length)["token_ids"] + # Make sure to ignore pad tokens by the loss function + token_ids = [token_id if token_id != self.tokenizer.pad_token_id else -100 for token_id in token_ids] + labels = torch.tensor(token_ids) + # If split text is not tokenizer-based + elif self.config.text_split_type == TextSplitType.CHAR_SPLIT: + if self.config.reverse_digits: + text = reverse_string_digits(text) + label2id = {v: k for k, v in self.config.id2label.items()} + labels = [label2id[x] for x in text] + labels = torch.LongTensor(labels) + else: + raise ValueError(f"Invalid `text_split_type={self.config.text_split_type}`") + + return labels + + def __getitem__(self, index): + """ + Get a specific item from the dataset. + + Args: + index: Index of the item to retrieve. + + Returns: + dict: The input data. + + """ + path, text = self.data[index].values() + pixel_values = self.image_processor(path, return_tensors="pt")["pixel_values"][0] + labels = self._text_to_tensor(text) + inputs = { + "pixel_values": pixel_values, + "labels": labels, + } + return inputs
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/sequence_labeling_dataset.html b/_modules/hezar/data/datasets/sequence_labeling_dataset.html new file mode 100644 index 00000000..703f3585 --- /dev/null +++ b/_modules/hezar/data/datasets/sequence_labeling_dataset.html @@ -0,0 +1,676 @@ + + + + + + + + hezar.data.datasets.sequence_labeling_dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.sequence_labeling_dataset

+from dataclasses import dataclass
+
+from datasets import load_dataset
+
+from ...configs import DatasetConfig
+from ...constants import TaskType
+from ...preprocessors import Tokenizer
+from ...registry import register_dataset
+from ...utils import Logger
+from ..data_collators import SequenceLabelingDataCollator
+from .dataset import Dataset
+
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class SequenceLabelingDatasetConfig(DatasetConfig): + """ + Configuration class for sequence labeling datasets. + + Args: + path (str): Path to the dataset. + tokenizer_path (str): Path to the tokenizer file. + tags_field (str): Field name for tags in the dataset. + tokens_field (str): Field name for tokens in the dataset. + max_length (int): Maximum length of tokens. + ignore_index (int): Index to ignore in the loss function. + label_all_tokens (bool): Whether to label all tokens or just the first token in a word. + is_iob_schema (bool): Whether the dataset follows the IOB schema. + """ + + name = "sequence_labeling" + task: TaskType = TaskType.SEQUENCE_LABELING + path: str = None + tokenizer_path: str = None + tags_field: str = None + tokens_field: str = None + max_length: int = None + ignore_index: int = -100 + label_all_tokens: bool = True + is_iob_schema: bool = False # Usually set to True for NER & Chunker and set to False for POS
+ + + +
+[docs] +@register_dataset("sequence_labeling", config_class=SequenceLabelingDatasetConfig) +class SequenceLabelingDataset(Dataset): + """ + A sequence labeling dataset class. + As of now this class is intended for datasets existing on the Hub! + + Args: + config (SequenceLabelingDatasetConfig): Dataset config object. + split: Which split to use. + **kwargs: Extra config parameters to assign to the original config. + """ + + def __init__(self, config: SequenceLabelingDatasetConfig, split=None, **kwargs): + """ + Initializes a new SequenceLabelingDataset instance. + + Args: + config (SequenceLabelingDatasetConfig): The configuration object for the dataset. + split: Dataset split, defaults to None. + **kwargs: Additional keyword arguments. + + """ + super().__init__(config, split=split, **kwargs) + self.dataset = self._load(split) + self._extract_labels() + self.tokenizer = self._build_tokenizer() + if self.tokenizer: + self.data_collator = SequenceLabelingDataCollator(self.tokenizer) + + def _load(self, split): + """ + Load the dataset. + + Args: + split: Dataset split. + + Returns: + The whole dataset. + + """ + # TODO: In case we want to make this class work on other types like csv, json, etc. we have to do it here. + dataset = load_dataset(self.config.path, split=split, cache_dir=self.cache_dir) + return dataset + + def _build_tokenizer(self): + """ + Build the tokenizer. + + Returns: + Tokenizer: The tokenizer. + + """ + if self.config.tokenizer_path: + tokenizer = Tokenizer.load(self.config.tokenizer_path) + else: + logger.warning( + "This dataset requires a tokenizer to work. Provide it in config as `tokenizer_path` " + "or set it manually as `dataset.tokenizer = your_tokenizer` after building the dataset." + ) + tokenizer = None + return tokenizer + + def _extract_labels(self): + """ + Extract label names, ids and build dictionaries. + """ + tags_list = self.dataset.features[self.config.tags_field].feature.names + self.id2label = self.config.id2label = {k: str(v) for k, v in dict(enumerate(tags_list)).items()} + self.label2id = self.config.label2id = {v: k for k, v in self.id2label.items()} + self.num_labels = self.config.num_labels = len(tags_list) + + def __len__(self): + """ + Returns the length of the dataset. + + Returns: + int: The length of the dataset. + + """ + return len(self.dataset) + + def _tokenize_and_align(self, tokens, labels): + """ + Tokenize and align tokens and labels. + + Args: + tokens: List of tokens. + labels: List of labels. + + Returns: + dict: Tokenized and aligned inputs. + + """ + tokenized_inputs = self.tokenizer( + tokens, + is_split_into_words=True, + return_word_ids=True, + padding=True, + truncation=True, + ) + word_ids = tokenized_inputs["word_ids"] + + previous_word_idx = None + label_ids = [] + for word_idx in word_ids: + # Special tokens have a word id that is None. We set the label to -100, so they are automatically + # ignored in the loss function. + if word_idx is None: + label_ids.append(self.config.ignore_index) + # We set the label for the first token of each word. + elif word_idx != previous_word_idx: + label_ids.append(labels[word_idx]) + # For the other tokens in a word, we set the label to either the current label or -100, depending on + # the label_all_tokens flag. + else: + label_ids.append(labels[word_idx] if self.config.label_all_tokens else self.config.ignore_index) + previous_word_idx = word_idx + + tokenized_inputs["labels"] = label_ids + return tokenized_inputs + + def __getitem__(self, index): + """ + Tokenize inputs and return a dict containing ids, masks, labels, etc. + + Args: + index: Sample index. + + Returns: + dict: The input data. + + """ + tokens, tags = self.dataset[index].values() + inputs = self._tokenize_and_align(tokens, tags) + return inputs
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/text_classification_dataset.html b/_modules/hezar/data/datasets/text_classification_dataset.html new file mode 100644 index 00000000..c4e5d033 --- /dev/null +++ b/_modules/hezar/data/datasets/text_classification_dataset.html @@ -0,0 +1,643 @@ + + + + + + + + hezar.data.datasets.text_classification_dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.text_classification_dataset

+from dataclasses import dataclass
+
+import torch
+from datasets import load_dataset
+
+from ...configs import DatasetConfig
+from ...constants import TaskType
+from ...preprocessors import Tokenizer
+from ...registry import register_dataset
+from ...utils import Logger
+from ..data_collators import TextPaddingDataCollator
+from .dataset import Dataset
+
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class TextClassificationDatasetConfig(DatasetConfig): + """ + Configuration class for text classification datasets. + + Args: + path (str): Path to the dataset. + tokenizer_path (str): Path to the tokenizer file. + label_field (str): Field name for labels in the dataset. + text_field (str): Field name for text in the dataset. + max_length (int): Maximum length of text. + """ + + name = "text_classification" + task: TaskType = TaskType.TEXT_CLASSIFICATION + path: str = None + tokenizer_path: str = None + label_field: str = None + text_field: str = None + max_length: int = None
+ + + +
+[docs] +@register_dataset("text_classification", config_class=TextClassificationDatasetConfig) +class TextClassificationDataset(Dataset): + """ + A text classification dataset class. + As of now this class is intended for datasets existing on the Hub! + + Args: + config (TextClassificationDatasetConfig): Dataset config object. + split: Which split to use. + **kwargs: Extra config parameters to assign to the original config. + """ + + def __init__(self, config: TextClassificationDatasetConfig, split=None, **kwargs): + """ + Initializes a new TextClassificationDataset instance. + + Args: + config (TextClassificationDatasetConfig): The configuration object for the dataset. + split: Dataset split, defaults to None. + **kwargs: Additional keyword arguments. + + """ + super().__init__(config, split=split, **kwargs) + self.dataset = self._load(split) + self._extract_labels() + self.tokenizer = self._build_tokenizer() + self.data_collator = TextPaddingDataCollator( + tokenizer=self.tokenizer, + max_length=self.config.max_length, + ) + + def _load(self, split): + """ + Load the dataset. + + Args: + split: Dataset split. + + Returns: + The whole dataset. + + """ + # TODO: In case we want to make this class work on other types like csv, json, etc. we have to do it here. + dataset = load_dataset(self.config.path, split=split, cache_dir=self.cache_dir) + return dataset + + def _build_tokenizer(self): + """ + Build the tokenizer. + + Returns: + Tokenizer: The tokenizer. + + """ + if self.config.tokenizer_path: + tokenizer = Tokenizer.load(self.config.tokenizer_path) + else: + logger.warning( + "This dataset requires a tokenizer to work. Provide it in config as `tokenizer_path` " + "or set it manually as `dataset.tokenizer = your_tokenizer` after building the dataset." + ) + tokenizer = None + return tokenizer + + def _extract_labels(self): + """ + Extract label names, ids and build dictionaries. + """ + labels_list = self.dataset.features[self.config.label_field].names + self.id2label = self.config.id2label = {k: str(v) for k, v in dict(enumerate(labels_list)).items()} + self.label2id = self.config.label2id = {v: k for k, v in self.id2label.items()} + self.num_labels = self.config.num_labels = len(labels_list) + + def __len__(self): + """ + Returns the length of the dataset. + + Returns: + int: The length of the dataset. + + """ + return len(self.dataset) + + def __getitem__(self, index): + """ + Tokenize inputs and return a dict containing ids, masks, labels, etc. + + Args: + index: Sample index. + + Returns: + dict: The input data. + + """ + text = self.dataset[index][self.config.text_field] + label = self.dataset[index][self.config.label_field] + inputs = self.tokenizer( + text, + return_tensors="pt", + truncation_strategy="longest_first", + padding="longest", + return_attention_mask=True, + ) + label_idx = torch.tensor([label], dtype=torch.long) # noqa + inputs["labels"] = label_idx + + return inputs
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/data/datasets/text_summarization_dataset.html b/_modules/hezar/data/datasets/text_summarization_dataset.html new file mode 100644 index 00000000..42137f27 --- /dev/null +++ b/_modules/hezar/data/datasets/text_summarization_dataset.html @@ -0,0 +1,650 @@ + + + + + + + + hezar.data.datasets.text_summarization_dataset - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.data.datasets.text_summarization_dataset

+from dataclasses import dataclass
+
+from datasets import load_dataset
+
+from ...configs import DatasetConfig
+from ...constants import TaskType
+from ...preprocessors import Tokenizer
+from ...registry import register_dataset
+from ...utils import Logger
+from ..data_collators import TextGenerationDataCollator
+from .dataset import Dataset
+
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class TextSummarizationDatasetConfig(DatasetConfig): + """ + Configuration class for text summarization datasets. + + Args: + path (str): Path to the dataset. + tokenizer_path (str): Path to the tokenizer file. + prefix (str): Prefix for conditional generation. + text_field (str): Field name for text in the dataset. + summary_field (str): Field name for summary in the dataset. + title_field (str): Field name for title in the dataset. + max_length (int): Maximum length of text. + max_target_length (int): Maximum length of the target summary. + """ + + name = "text_summarization" + task: TaskType = TaskType.TEXT_GENERATION + path: str = None + tokenizer_path: str = None + prefix: str = None + text_field: str = None + summary_field: str = None + title_field: str = None + max_length: int = None + max_target_length: int = None
+ + + +
+[docs] +@register_dataset("text_summarization", config_class=TextSummarizationDatasetConfig) +class TextSummarizationDataset(Dataset): + """ + A text summarization dataset class. + As of now this class is intended for datasets existing on the Hub! + + Args: + config (TextSummarizationDatasetConfig): Dataset config object. + split: Which split to use. + **kwargs: Extra config parameters to assign to the original config. + """ + + def __init__(self, config: TextSummarizationDatasetConfig, split=None, **kwargs): + """ + Initializes a new TextSummarizationDataset instance. + + Args: + config (TextSummarizationDatasetConfig): The configuration object for the dataset. + split: Dataset split, defaults to None. + **kwargs: Additional keyword arguments. + + """ + super().__init__(config, split=split, **kwargs) + self.dataset = self._load(split) + self.tokenizer = self._build_tokenizer() + self.data_collator = TextGenerationDataCollator( + tokenizer=self.tokenizer, + max_length=self.config.max_length, + max_target_length=self.config.max_target_length, + padding_type="max_length" if self.config.max_length else "longest", + ) + + def _load(self, split): + """ + Load the dataset. + + Args: + split: Dataset split. + + Returns: + The whole dataset. + + """ + # TODO: In case we want to make this class work on other types like csv, json, etc. we have to do it here. + dataset = load_dataset(self.config.path, split=split, cache_dir=self.cache_dir) + return dataset + + def _build_tokenizer(self): + """ + Build the tokenizer. + + Returns: + Tokenizer: The tokenizer. + + """ + if self.config.tokenizer_path: + tokenizer = Tokenizer.load(self.config.tokenizer_path) + else: + logger.warning( + "This dataset requires a tokenizer to work. Provide it in config as `tokenizer_path` " + "or set it manually as `dataset.tokenizer = your_tokenizer` after building the dataset." + ) + tokenizer = None + return tokenizer + + def __len__(self): + """ + Returns the length of the dataset. + + Returns: + int: The length of the dataset. + + """ + return len(self.dataset) + + def __getitem__(self, index): + """ + Tokenize inputs and return a dict containing ids, masks, labels, etc. + + Args: + index: Sample index. + + Returns: + dict: The input data. + + """ + text = self.dataset[index][self.config.text_field] + if self.config.prefix is not None: + text = self.config.prefix + text # for conditional generation we might need a static prefix + summary = self.dataset[index][self.config.summary_field] + + inputs = self.tokenizer( + text, + return_tensors="pt", + max_length=self.config.max_length, + padding="max_length" if self.config.max_length else "longest", + return_attention_mask=True, + ) + labels = self.tokenizer( + summary, + return_tensors="pt", + max_length=self.config.max_length, + padding="max_length" if self.config.max_target_length else "longest", + return_attention_mask=True, + ) + + inputs["labels"] = labels["token_ids"] + + return inputs
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/embeddings/embedding.html b/_modules/hezar/embeddings/embedding.html new file mode 100644 index 00000000..17592afa --- /dev/null +++ b/_modules/hezar/embeddings/embedding.html @@ -0,0 +1,900 @@ + + + + + + + + hezar.embeddings.embedding - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.embeddings.embedding

+from __future__ import annotations
+
+import os
+import tempfile
+from typing import Dict, List
+
+from huggingface_hub import HfApi, hf_hub_download
+
+from ..builders import build_embedding
+from ..configs import EmbeddingConfig
+from ..constants import (
+    DEFAULT_EMBEDDING_CONFIG_FILE,
+    DEFAULT_EMBEDDING_FILE,
+    DEFAULT_EMBEDDING_SUBFOLDER,
+    HEZAR_CACHE_DIR,
+    Backends,
+)
+from ..utils import Logger, get_lib_version, verify_dependencies
+
+
+logger = Logger(__name__)
+
+# The below code is a workaround. Gensim's models have this limitation that the models can only be loaded using the same
+# gensim & numpy version they were saved with.
+REQUIRED_GENSIM_VERSION = "4.3.2"
+REQUIRED_NUMPY_VERSION = "1.24"
+
+
+# Check if the right combo of gensim/numpy versions are installed
+def _verify_gensim_installation():
+    if (
+        not get_lib_version("numpy").startswith(REQUIRED_NUMPY_VERSION)
+        or not get_lib_version("gensim").startswith(REQUIRED_GENSIM_VERSION)
+    ):
+        raise ImportError(
+            f"The embeddings module in this version of Hezar, requires a combo of numpy>={REQUIRED_NUMPY_VERSION} and "
+            f"gensim=={REQUIRED_GENSIM_VERSION}. Please install them by running: \n"
+            f"`pip install numpy~={REQUIRED_NUMPY_VERSION} gensim=={REQUIRED_GENSIM_VERSION}`\n"
+            f"and make sure to restart your runtime if you're on a notebook environment!\n"
+            f"You can also set `bypass_version_check=True` in the embedding's config so that this error is not raised."
+        )
+
+
+
+[docs] +class Embedding: + """ + Base class for all embeddings. + + Args: + config: An EmbeddingConfig object to construct the embedding. + embedding_file (str): Path to the embedding file. + vectors_file (str): Path to the vectors file. + **kwargs: Extra embedding config parameters passed as keyword arguments. + """ + + required_backends: List[str | Backends] = [] + + filename = DEFAULT_EMBEDDING_FILE + vectors_filename = f"{filename}.wv.vectors.npy" + config_filename = DEFAULT_EMBEDDING_CONFIG_FILE + subfolder = DEFAULT_EMBEDDING_SUBFOLDER + + def __init__(self, config: EmbeddingConfig, embedding_file: str = None, vectors_file: str = None, **kwargs): + verify_dependencies(self, self.required_backends) # Check if all the required dependencies are installed + self.config = config.update(kwargs) + if not self.config.bypass_version_check: + _verify_gensim_installation() + + self.config = config.update(kwargs) + self.model = self.from_file(embedding_file, vectors_file) if embedding_file else self.build() + +
+[docs] + def build(self): + """ + Build the embedding model. + """ + raise NotImplementedError
+ + +
+[docs] + def from_file(self, embedding_path, vectors_path): + """ + Load the embedding model from file. + + Args: + embedding_path (str): Path to the embedding file. + vectors_path (str): Path to the vectors file. + """ + raise NotImplementedError
+ + + def __call__(self, inputs: str | List[str], **kwargs): + """ + Get vectors for input words. + + Args: + inputs (str | List[str]): Input word(s). + **kwargs: Additional keyword arguments. + + Returns: + List: List of word vectors. + """ + if isinstance(inputs, str): + inputs = [inputs] + vectors = [self.word_vectors[w] for w in inputs] + return vectors + +
+[docs] + def train(self, dataset, epochs): + """ + Train the embedding model on a dataset. + + Args: + dataset: The training dataset. + epochs: Number of training epochs. + """ + raise NotImplementedError
+ + +
+[docs] + def word2index(self, word): + """ + Get the index of a word in the vocabulary. + + Args: + word (str): Input word. + + Returns: + int: Index of the word. + """ + return self.vocab.get(word, -1)
+ + +
+[docs] + def index2word(self, index): + """ + Get the word corresponding to a given index. + + Args: + index (int): Input index. + + Returns: + str: Word corresponding to the index. + """ + keyed_vocab = {v: k for k, v in self.vocab.items()} + return keyed_vocab[index]
+ + +
+[docs] + def similarity(self, word1: str, word2: str): + """ + Get the similarity between two words. + + Args: + word1 (str): First word. + word2 (str): Second word. + """ + raise NotImplementedError
+ + +
+[docs] + def doesnt_match(self, words: List[str]): + """ + Get the word that doesn't match the others in a list. + + Args: + words (List[str]): List of words. + """ + raise NotImplementedError
+ + +
+[docs] + def most_similar(self, word: str, top_n: int = 5): + """ + Get the most similar words to a given word. + + Args: + word (str): Input word. + top_n (int): Number of similar words to retrieve. + """ + raise NotImplementedError
+ + +
+[docs] + def get_normed_vectors(self): + """ + Get normalized word vectors. + """ + raise NotImplementedError
+ + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + config_filename=None, + embedding_file=None, + vectors_file=None, + subfolder=None, + cache_dir=None, + **kwargs, + ) -> "Embedding": + """ + Load an embedding model from a local or Hugging Face Hub path. + + Args: + hub_or_local_path: Path to the local directory or the Hugging Face Hub repository. + config_filename (str): Configuration file name. + embedding_file (str): Embedding file name. + vectors_file (str): Vectors file name. + subfolder (str): Subfolder within the repository. + cache_dir (str): Path to cache directory + **kwargs: Additional keyword arguments. + + Returns: + Embedding: Loaded Embedding object. + """ + config_filename = config_filename or cls.config_filename + embedding_file = embedding_file or cls.filename + vectors_file = vectors_file or cls.vectors_filename + subfolder = subfolder or cls.subfolder + cache_dir = cache_dir or HEZAR_CACHE_DIR + + config = EmbeddingConfig.load( + hub_or_local_path, + filename=config_filename, + subfolder=subfolder, + cache_dir=cache_dir, + ) + + if os.path.isdir(hub_or_local_path): + embedding_path = os.path.join(hub_or_local_path, subfolder, embedding_file) + vectors_path = os.path.join(hub_or_local_path, subfolder, vectors_file) + else: + embedding_path = hf_hub_download( + hub_or_local_path, + filename=embedding_file, + subfolder=subfolder, + cache_dir=cache_dir, + resume_download=True, + ) + vectors_path = hf_hub_download( + hub_or_local_path, + filename=vectors_file, + subfolder=subfolder, + cache_dir=cache_dir, + resume_download=True, + ) + + embedding = build_embedding( + config.name, + config=config, + embedding_file=embedding_path, + vectors_file=vectors_path, + **kwargs, + ) + + return embedding
+ + +
+[docs] + def save( + self, + path: str | os.PathLike, + filename: str = None, + subfolder: str = None, + save_config: bool = True, + config_filename: str = None, + ): + """ + Save the embedding model to a specified path. + + Args: + path (str | os.PathLike): Path to save the embedding model. + filename (str): Name of the embedding file. + subfolder (str): Subfolder within the path. + save_config (bool): Whether to save the configuration. + config_filename (str): Configuration file name. + """ + raise NotImplementedError
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + commit_message=None, + subfolder=None, + filename=None, + vectors_filename=None, + config_filename=None, + private=False, + ): + """ + Push the embedding model to the Hugging Face Hub. + + Args: + repo_id: ID of the Hugging Face Hub repository. + commit_message (str): Commit message. + subfolder (str): Subfolder within the repository. + filename (str): Name of the embedding file. + vectors_filename (str): Name of the vectors file. + config_filename (str): Configuration file name. + private (bool): Whether the repository is private. + """ + subfolder = subfolder or self.subfolder + filename = filename or self.filename + vectors_filename = vectors_filename or self.vectors_filename + config_filename = config_filename or self.config_filename + + api = HfApi() + # create remote repo + api.create_repo(repo_id, exist_ok=True) + # save to tmp and prepare for push + cache_path = tempfile.mkdtemp() + # save embedding model file + embedding_save_dir = os.path.join(cache_path) + os.makedirs(embedding_save_dir, exist_ok=True) + + if commit_message is None: + commit_message = "Hezar: Upload embedding and config" + + self.save(embedding_save_dir, filename, subfolder=subfolder, save_config=False) + + self.config.push_to_hub( + repo_id, + config_filename, + subfolder=subfolder, + repo_type="model", + private=private, + commit_message=commit_message, + ) + + api.upload_file( + repo_id=repo_id, + path_or_fileobj=os.path.join(embedding_save_dir, subfolder, filename), + repo_type="model", + path_in_repo=f"{subfolder}/{filename}", + commit_message=commit_message, + ) + logger.log_upload_success( + name=f"{self.__class__.__name__}(name={self.config.name})", + target_path=f"{os.path.join(repo_id, subfolder, filename)}", + ) + + api.upload_file( + repo_id=repo_id, + path_or_fileobj=os.path.join(embedding_save_dir, subfolder, vectors_filename), + repo_type="model", + path_in_repo=f"{subfolder}/{vectors_filename}", + commit_message=commit_message, + ) + logger.log_upload_success( + name=f"`{self.__class__.__name__}(name={self.config.name})`", + target_path=f"`{os.path.join(repo_id, subfolder, vectors_filename)}`", + )
+ + +
+[docs] + def torch_embedding(self): + """ + Convert the embedding model to a PyTorch Embedding layer. + + Returns: + torch.nn.Embedding: PyTorch Embedding layer. + """ + import torch + + weights = torch.FloatTensor(self.vectors) + embedding_layer = torch.nn.Embedding.from_pretrained(weights) + return embedding_layer
+ + + @property + def word_vectors(self): + """ + Get key:value pairs of word:vector. + """ + raise NotImplementedError + + @property + def vectors(self): + """ + Get the all vectors array/tensor. + """ + raise NotImplementedError + + @property + def vocab(self) -> Dict[str, int]: + """ + Get the vocabulary. + """ + raise NotImplementedError
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/embeddings/fasttext.html b/_modules/hezar/embeddings/fasttext.html new file mode 100644 index 00000000..344de326 --- /dev/null +++ b/_modules/hezar/embeddings/fasttext.html @@ -0,0 +1,755 @@ + + + + + + + + hezar.embeddings.fasttext - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.embeddings.fasttext

+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import List, Literal
+
+from ..constants import Backends
+from ..registry import register_embedding
+from ..utils import is_backend_available
+from .embedding import Embedding, EmbeddingConfig
+
+
+if is_backend_available(Backends.GENSIM):
+    from gensim.models import fasttext
+
+_required_backends = [
+    Backends.GENSIM,
+]
+
+
+
+[docs] +@dataclass +class FastTextConfig(EmbeddingConfig): + """ + Configuration class for FastText embeddings. + + Attributes: + name (str): Name of the embedding. + dataset_path (str): Path to the dataset. + vector_size (int): Size of the word vectors. + window (int): Window size for context words. + alpha (float): Learning rate. + min_count (int): Ignores all words with a total frequency lower than this. + seed (int): Seed for random number generation. + workers (int): Number of workers for training. + min_alpha (float): Minimum learning rate. + train_algorithm (Literal["skipgram", "cbow"]): Training algorithm, either 'skipgram' or 'cbow'. + cbow_mean (int): Constant for CBOW. Default is 1. + epochs (int): Number of training epochs. Default is 5. + """ + + name = "fasttext" + dataset_path: str = None + vector_size: int = 300 + window: int = 5 + alpha: float = 0.025 + min_count: int = 1 + seed: int = 1 + workers: int = 3 + min_alpha: float = 0.0001 + train_algorithm: Literal["skipgram", "cbow"] = "skipgram" + cbow_mean: int = 1 + epochs: int = 5
+ + + +
+[docs] +@register_embedding("fasttext", config_class=FastTextConfig) +class FastText(Embedding): + """ + FastText embedding class. + + Args: + config (FastTextConfig): Configuration object. + embedding_file (str): Path to the embedding file. + vectors_file (str): Path to the vectors file. + **kwargs: Additional config parameters given as keyword arguments. + """ + + required_backends = _required_backends + + def __init__(self, config: FastTextConfig, embedding_file: str = None, vectors_file: str = None, **kwargs): + super().__init__(config, embedding_file=embedding_file, vectors_file=vectors_file, **kwargs) + +
+[docs] + def build(self): + """ + Build the FastText embedding model. + + Returns: + fasttext.FastText: FastText embedding model. + """ + embedding_model = fasttext.FastText( + vector_size=self.config.vector_size, + window=self.config.window, + sg=1 if self.config.train_algorithm == "skipgram" else 0, + workers=self.config.workers, + alpha=self.config.alpha, + min_alpha=self.config.min_alpha, + min_count=self.config.min_count, + ) + return embedding_model
+ + +
+[docs] + def from_file(self, embedding_path, vectors_path): + """ + Load the FastText embedding model from file. + + Args: + embedding_path (str): Path to the embedding file. + vectors_path (str): Path to the vectors file. + + Returns: + fasttext.FastText: Loaded FastText embedding model. + """ + if not os.path.isfile(vectors_path): + raise ValueError( + f"Could not load or find vectors file at `{vectors_path}`! " + f"Please make sure it's been downloaded properly!" + ) + + embedding_model = fasttext.FastText.load(embedding_path) + + return embedding_model
+ + +
+[docs] + def train( + self, + dataset: List[str], + epochs: int = 5, + ): + """ + Train the FastText embedding model. + + Args: + dataset (List[str]): List of sentences for training. + epochs (int): Number of training epochs. + """ + self.model.build_vocab(dataset) + self.model.train( + dataset, + epochs=epochs, + total_examples=self.model.corpus_count, + total_words=self.model.corpus_total_words, + )
+ + +
+[docs] + def save( + self, + path: str | os.PathLike, + filename: str = None, + subfolder: str = None, + save_config: bool = True, + config_filename: str = None, + ): + """ + Save the FastText embedding model to a specified path. + + Args: + path (str | os.PathLike): Path to save the embedding model. + filename (str): Name of the embedding file. + subfolder (str): Subfolder within the path. + save_config (bool): Whether to save the configuration. + config_filename (str): Configuration file name. + """ + filename = filename or self.filename + config_filename = config_filename or self.config_filename + subfolder = subfolder or self.subfolder + + save_dir = os.path.join(path, subfolder) + os.makedirs(save_dir, exist_ok=True) + self.config.save(path, config_filename, subfolder=subfolder) + + self.model.save(os.path.join(save_dir, filename))
+ + +
+[docs] + def similarity(self, word1: str, word2: str): + """ + Get the similarity between two words. + + Args: + word1 (str): First word. + word2 (str): Second word. + + Returns: + float: Similarity score. + """ + if not isinstance(word1, str) or not isinstance(word2, str): + raise ValueError( + f"`Embedding.similarity()` takes two string objects!\n" + f"`word1`: {type(word1)}, `word2`: {type(word2)}" + ) + similarity = self.word_vectors.similarity(word1, word2) + return similarity
+ + +
+[docs] + def doesnt_match(self, words: List[str]): + """ + Get the word that doesn't match the others in a list. + + Args: + words (List[str]): List of words. + + Returns: + str: Word that doesn't match. + """ + doesnt_match = self.word_vectors.doesnt_match(words) + return doesnt_match
+ + +
+[docs] + def most_similar(self, word: str, top_n: int = 5): + """ + Get the most similar words to a given word. + + Args: + word (str): Input word. + top_n (int): Number of similar words to retrieve. + + Returns: + List[Dict[str, str | float]]: List of dictionaries containing 'word' and 'score'. + """ + if not isinstance(word, str): + raise ValueError(f"`word` must be `str`, got `{type(word)}`!") + most_similar = self.word_vectors.most_similar(word, topn=top_n) + most_similar = [{"word": word, "score": f"{score:.4f}"} for word, score in most_similar] + return most_similar
+ + +
+[docs] + def get_normed_vectors(self): + """ + Get normalized word vectors. + """ + normed_vectors = self.word_vectors.get_normed_vectors() + return normed_vectors
+ + + @property + def word_vectors(self): + """ + Get word vectors. + """ + return self.model.wv + + @property + def vectors(self): + """ + Get all vectors. + """ + return self.model.wv.vectors + + @property + def vocab(self): + """ + Get vocabulary. + """ + return self.model.wv.key_to_index
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/embeddings/word2vec.html b/_modules/hezar/embeddings/word2vec.html new file mode 100644 index 00000000..833e1895 --- /dev/null +++ b/_modules/hezar/embeddings/word2vec.html @@ -0,0 +1,772 @@ + + + + + + + + hezar.embeddings.word2vec - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.embeddings.word2vec

+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import List, Literal
+
+from ..constants import Backends
+from ..registry import register_embedding
+from ..utils import is_backend_available
+from .embedding import Embedding, EmbeddingConfig
+
+
+if is_backend_available(Backends.GENSIM):
+    from gensim.models import word2vec
+
+_required_backends = [
+    Backends.GENSIM,
+]
+
+
+
+[docs] +@dataclass +class Word2VecConfig(EmbeddingConfig): + """ + Configuration class for Word2Vec embeddings. + + Attributes: + name (str): Name of the embedding. + dataset_path (str): Path to the dataset. + vector_size (int): Size of the word vectors. + window (int): Window size for context words. + alpha (float): Learning rate. + min_count (int): Ignores all words with a total frequency lower than this. + seed (int): Seed for random number generation. + workers (int): Number of workers for training. + min_alpha (float): Minimum learning rate. + cbow_mean (int): Constant for CBOW. Default is 1. + epochs (int): Number of training epochs. Default is 5. + train_algorithm (Literal["skipgram", "cbow"]): Training algorithm, either 'skipgram' or 'cbow'. + save_format (Literal["binary", "text"]): Format for saving the model, either 'binary' or 'text'. + """ + + name = "word2vec" + dataset_path: str = None + vector_size: int = 300 + window: int = 5 + alpha: float = 0.025 + min_count: int = 1 + seed: int = 1 + workers: int = 3 + min_alpha: float = 0.0001 + cbow_mean: int = 1 + epochs: int = 5 + train_algorithm: Literal["skipgram", "cbow"] = "skipgram" + save_format: Literal["binary", "text"] = "binary"
+ + + +
+[docs] +@register_embedding("word2vec", config_class=Word2VecConfig) +class Word2Vec(Embedding): + """ + Word2Vec embedding class. + + Args: + config (Word2VecConfig): Configuration object. + embedding_file (str): Path to the embedding file. + vectors_file (str): Path to the vectors file. + **kwargs: Additional config parameters given as keyword arguments. + """ + + required_backends = _required_backends + + def __init__(self, config: Word2VecConfig, embedding_file: str = None, vectors_file: str = None, **kwargs): + super().__init__(config, embedding_file=embedding_file, vectors_file=vectors_file, **kwargs) + +
+[docs] + def build(self): + """ + Build the Word2Vec embedding model. + + Returns: + gensim.models.Word2Vec: Word2Vec embedding model. + """ + embedding_model = word2vec.Word2Vec( + vector_size=self.config.vector_size, + window=self.config.window, + sg=1 if self.config.train_algorithm == "skipgram" else 0, + workers=self.config.workers, + alpha=self.config.alpha, + min_alpha=self.config.min_alpha, + min_count=self.config.min_count, + ) + return embedding_model
+ + +
+[docs] + def from_file(self, embedding_path, vectors_path): + """ + Load the Word2Vec embedding model from file. + + Args: + embedding_path (str): Path to the embedding file. + vectors_path (str): Path to the vectors file. + + Raises: + ValueError: If vectors file is not found. + + Returns: + gensim.models.Word2Vec: Loaded Word2Vec embedding model. + """ + if not os.path.isfile(vectors_path): + raise ValueError( + f"Could not load or find vectors file at `{vectors_path}`! " + f"Please make sure it's been downloaded properly!" + ) + + embedding_model = word2vec.Word2Vec.load(embedding_path) + + return embedding_model
+ + +
+[docs] + def train( + self, + dataset: List[str], + epochs: int = 5, + ): + """ + Train the Word2Vec embedding model. + + Args: + dataset (List[str]): List of sentences for training. + epochs (int): Number of training epochs. + """ + self.model.build_vocab(dataset) + self.model.train( + dataset, + epochs=epochs, + total_examples=self.model.corpus_count, + total_words=self.model.corpus_total_words, + )
+ + +
+[docs] + def save( + self, + path: str | os.PathLike, + filename: str = None, + subfolder: str = None, + save_config: bool = True, + config_filename: str = None, + ): + """ + Save the Word2Vec embedding model to a specified path. + + Args: + path (str | os.PathLike): Path to save the embedding model. + filename (str): Name of the embedding file. + subfolder (str): Subfolder within the path. + save_config (bool): Whether to save the configuration. + config_filename (str): Configuration file name. + """ + filename = filename or self.filename + config_filename = config_filename or self.config_filename + subfolder = subfolder or self.subfolder + + save_dir = os.path.join(path, subfolder) + os.makedirs(save_dir, exist_ok=True) + self.config.save(path, config_filename, subfolder=subfolder) + + self.model.save(os.path.join(save_dir, filename))
+ + +
+[docs] + def similarity(self, word1: str, word2: str): + """ + Get the similarity between two words. + + Args: + word1 (str): First word. + word2 (str): Second word. + + Returns: + float: Similarity score. + """ + if not isinstance(word1, str) or not isinstance(word2, str): + raise ValueError( + f"`Embedding.similarity()` takes two string objects!\n" + f"`word1`: {type(word1)}, `word2`: {type(word2)}" + ) + similarity = self.word_vectors.similarity(word1, word2) + return similarity
+ + +
+[docs] + def doesnt_match(self, words: List[str]): + """ + Get the word that doesn't match the others in a list. + + Args: + words (List[str]): List of words. + + Returns: + str: Word that doesn't match. + """ + doesnt_match = self.word_vectors.doesnt_match(words) + return doesnt_match
+ + +
+[docs] + def most_similar(self, word: str, top_n: int = 5): + """ + Get the most similar words to a given word. + + Args: + word (str): Input word. + top_n (int): Number of similar words to retrieve. + + Returns: + List[Dict[str, str | float]]: List of dictionaries containing 'word' and 'score'. + """ + if not isinstance(word, str): + raise ValueError(f"`word` must be `str`, got `{type(word)}`!") + most_similar = self.word_vectors.most_similar(word, topn=top_n) + most_similar = [{"word": word, "score": f"{score:.4f}"} for word, score in most_similar] + return most_similar
+ + +
+[docs] + def get_normed_vectors(self): + """ + Get normalized word vectors. + + Returns: + Any: Normed word vectors. + """ + normed_vectors = self.word_vectors.get_normed_vectors() + return normed_vectors
+ + + @property + def word_vectors(self): + """ + Get word vectors. + + Returns: + gensim.models.keyedvectors.KeyedVectors: Word vectors. + """ + return self.model.wv + + @property + def vectors(self): + """ + Get all vectors. + + Returns: + numpy.ndarray: All vectors. + """ + return self.model.wv.vectors + + @property + def vocab(self): + """ + Get vocabulary. + + Returns: + Dict[str, int]: Vocabulary. + """ + return self.model.wv.key_to_index
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/accuracy.html b/_modules/hezar/metrics/accuracy.html new file mode 100644 index 00000000..7317e88f --- /dev/null +++ b/_modules/hezar/metrics/accuracy.html @@ -0,0 +1,580 @@ + + + + + + + + hezar.metrics.accuracy - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.accuracy

+from dataclasses import dataclass
+from typing import Iterable
+
+from sklearn.metrics import accuracy_score
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from .metric import Metric
+
+
+_required_backends = [
+    Backends.SCIKIT,
+]
+
+
+
+[docs] +@dataclass +class AccuracyConfig(MetricConfig): + name = MetricType.ACCURACY + objective: str = "maximize" + normalize: bool = True + sample_weight: Iterable[float] = None + output_keys: tuple = ("accuracy",)
+ + + +
+[docs] +@register_metric("accuracy", config_class=AccuracyConfig) +class Accuracy(Metric): + """ + Accuracy metric for numeric arrays backed by Scikit-learn's `accuracy_score`. + + Args: + config (AccuracyConfig): Metric config object + **kwargs: Extra config parameters passed as kwargs to update the `config` + """ + required_backends = _required_backends + + def __init__(self, config: AccuracyConfig, **kwargs): + super().__init__(config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + normalize=None, + sample_weight=None, + n_decimals=None, + output_keys=None, + ): + """ + Compute the accuracy score for the given predictions against targets. + + Args: + predictions: A list of prediction labels + targets: A list of ground truth labels + normalize: Whether to normalize the inputs or not + sample_weight: Sample weight + n_decimals: Floating point decimals for the final score + output_keys: Filter the output keys + + Returns: + A dictionary of the metric results + """ + normalize = normalize or self.config.normalize + sample_weight = sample_weight or self.config.sample_weight + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + score = accuracy_score( + targets, + predictions, + normalize=normalize, + sample_weight=sample_weight, + ) + + results = {"accuracy": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/bleu.html b/_modules/hezar/metrics/bleu.html new file mode 100644 index 00000000..93e0dbef --- /dev/null +++ b/_modules/hezar/metrics/bleu.html @@ -0,0 +1,580 @@ + + + + + + + + hezar.metrics.bleu - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.bleu

+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from ..utils import is_backend_available
+from .metric import Metric
+
+
+if is_backend_available(Backends.NLTK):
+    from nltk.translate.bleu_score import corpus_bleu
+
+_required_backends = [
+    Backends.NLTK,
+]
+
+
+
+[docs] +@dataclass +class BLEUConfig(MetricConfig): + """ + Configuration class for BLEU metric. + + args: + name (MetricType): The type of metric, BLEU in this case. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.BLEU + objective: str = "maximize" + output_keys: tuple = ("bleu",)
+ + + +
+[docs] +@register_metric("bleu", config_class=BLEUConfig) +class BLEU(Metric): + """ + BLEU metric for evaluating text generation models like translation, summarization, etc. + """ + required_backends = _required_backends + + def __init__(self, config: BLEUConfig, **kwargs): + super().__init__(config=config, **kwargs) + +
+[docs] + def compute( + self, + predictions: Iterable[str] | str = None, + targets: Iterable[str] | str = None, + weights=(0.25, 0.25, 0.25, 0.25), + n_decimals=None, + output_keys=None, + **kwargs, + ): + """ + Computes the BLEU score for the given predictions against targets. + + Args: + predictions (Iterable[str] | str): Predicted sentences or tokens. + targets (Iterable[str] | str): Ground truth sentences or tokens. + weights (tuple): Weights for n-gram precision, default is (0.25, 0.25, 0.25, 0.25). + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + predictions = [x.split() if isinstance(x, str) else x for x in predictions] + targets = [x.split() if isinstance(x, str) else x for x in targets] + + score = corpus_bleu(targets, predictions, weights=weights) + + results = {"bleu": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/cer.html b/_modules/hezar/metrics/cer.html new file mode 100644 index 00000000..718e3f67 --- /dev/null +++ b/_modules/hezar/metrics/cer.html @@ -0,0 +1,614 @@ + + + + + + + + hezar.metrics.cer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.cer

+from dataclasses import dataclass
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from ..utils import is_backend_available
+from .metric import Metric
+
+
+if is_backend_available(Backends.JIWER):
+    import jiwer
+    import jiwer.transforms as tr
+
+_DESCRIPTION = "Character Error Rate (CER) using `jiwer`. Commonly used for Speech Recognition and OCR systems"
+
+_required_backends = [
+    Backends.JIWER,
+]
+
+
+
+[docs] +@dataclass +class CERConfig(MetricConfig): + """ + Configuration class for CER (Character Error Rate) metric. + + Args: + name (MetricType): The type of metric, CER in this case. + sentence_delimiter (str): Delimiter for separating sentences in texts. + concatenate_texts (bool): Flag to concatenate texts before computing CER. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.CER + objective: str = "minimize" + sentence_delimiter: str = " " + concatenate_texts: bool = False + output_keys: tuple = ("cer",)
+ + + +
+[docs] +@register_metric("cer", config_class=CERConfig, description=_DESCRIPTION) +class CER(Metric): + """ + CER metric for evaluating Character Error Rate using `jiwer`. + + Args: + config (CERConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: CERConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.transform = tr.Compose( + [ + tr.RemoveMultipleSpaces(), + tr.Strip(), + tr.ReduceToSingleSentence(self.config.sentence_delimiter), + tr.ReduceToListOfListOfChars(), + ] + ) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + concatenate_texts=None, + n_decimals=None, + output_keys=None, + **kwargs, + ): + """ + Computes the Character Error Rate (CER) for the given predictions against targets. + + Args: + predictions: Predicted texts. + targets: Ground truth texts. + concatenate_texts (bool): Flag to concatenate texts before computing CER. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + concatenate_texts = concatenate_texts or self.config.concatenate_texts + n_decimals = n_decimals or self.config.n_decimals + + if concatenate_texts: + score = jiwer.process_words( + targets, + predictions, + reference_transform=self.transform, + hypothesis_transform=self.transform, + ).wer + + else: + incorrect = 0 + total = 0 + for prediction, reference in zip(predictions, targets): + measures = jiwer.process_words( + reference, + prediction, + reference_transform=self.transform, + hypothesis_transform=self.transform, + ) + incorrect += measures.substitutions + measures.deletions + measures.insertions + total += measures.substitutions + measures.deletions + measures.hits + + score = incorrect / total + + results = {"cer": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/f1.html b/_modules/hezar/metrics/f1.html new file mode 100644 index 00000000..cb4e652f --- /dev/null +++ b/_modules/hezar/metrics/f1.html @@ -0,0 +1,603 @@ + + + + + + + + hezar.metrics.f1 - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.f1

+from dataclasses import dataclass
+from typing import Iterable
+
+from sklearn.metrics import f1_score
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from .metric import Metric
+
+
+_required_backends = [
+    Backends.SCIKIT,
+]
+
+
+
+[docs] +@dataclass +class F1Config(MetricConfig): + """ + Configuration class for F1 metric. + + Args: + name (MetricType): The type of metric, F1 in this case. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the F1 score. + sample_weight (Iterable[float]): Sample weights for the F1 score. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.F1 + objective: str = "maximize" + pos_label: int = 1 + average: str = "macro" + sample_weight: Iterable[float] = None + output_keys: tuple = ("f1",)
+ + + +
+[docs] +@register_metric("f1", config_class=F1Config) +class F1(Metric): + """ + F1 metric for evaluating classification performance using sklearn's `f1_score`. + + Args: + config (F1Config): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: F1Config, **kwargs): + super().__init__(config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + labels=None, + pos_label=1, + average=None, + sample_weight=None, + zero_division="warn", + n_decimals=None, + output_keys=None, + ): + """ + Computes the F1 score for the given predictions against targets. + + Args: + predictions: Predicted labels. + targets: Ground truth labels. + labels: List of labels to include in the calculation. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the F1 score. + sample_weight (Iterable[float]): Sample weights for the F1 score. + zero_division (str): Strategy to use for zero-division, default is "warn". + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + pos_label = pos_label or self.config.pos_label + average = average or self.config.average + sample_weight = sample_weight or self.config.sample_weight + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + score = f1_score( + targets, + predictions, + labels=labels, + pos_label=pos_label, + average=average, + sample_weight=sample_weight, + zero_division=zero_division, + ) + + score = float(score) if score.size == 1 else score + + results = {"f1": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/metric.html b/_modules/hezar/metrics/metric.html new file mode 100644 index 00000000..111fcd2c --- /dev/null +++ b/_modules/hezar/metrics/metric.html @@ -0,0 +1,535 @@ + + + + + + + + hezar.metrics.metric - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.metric

+from __future__ import annotations
+
+from typing import Dict, List
+
+from ..configs import MetricConfig
+from ..constants import Backends
+from ..utils import verify_dependencies
+
+
+
+[docs] +class Metric: + """ + The base metric class for all metrics in Hezar. + + Metrics are simple wrappers for casual ready-to-use metrics like in scikit-learn, etc. and it's strongly recommended + not to reinvent the wheel. If a metric is already implemented by some package, use it! The only reason to implement + such a module, is to make sure the metrics are treated the same all over the framework. + """ + + required_backends: List[str | Backends] = [] + + def __init__(self, config: MetricConfig, **kwargs): + # Check if all the required dependencies are installed + verify_dependencies(self, self.required_backends) + + self.config = config.update(kwargs) + +
+[docs] + def compute(self, predictions=None, targets=None, **kwargs) -> Dict: + """ + Compute metric value for the given predictions against the targets + Args: + predictions: Prediction values + targets: Ground truth values + **kwargs: Extra arguments depending on the metric + + Returns: + A dictionary of the results and scores + """ + raise NotImplementedError
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/precision.html b/_modules/hezar/metrics/precision.html new file mode 100644 index 00000000..5ff03313 --- /dev/null +++ b/_modules/hezar/metrics/precision.html @@ -0,0 +1,608 @@ + + + + + + + + hezar.metrics.precision - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.precision

+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+from sklearn.metrics import precision_score
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from .metric import Metric
+
+
+_required_backends = [
+    Backends.SCIKIT,
+]
+
+
+
+[docs] +@dataclass +class PrecisionConfig(MetricConfig): + """ + Configuration class for Precision metric. + + Args: + name (MetricType): The type of metric, Precision in this case. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the precision score. + sample_weight (Iterable[float]): Sample weights for the precision score. + zero_division (str | float): Strategy for zero-division, default is 0.0. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.PRECISION + objective: str = "maximize" + pos_label: int = 1 + average: str = "macro" + sample_weight: Iterable[float] = None + zero_division: str | float = 0.0 + output_keys: tuple = ("precision",)
+ + + +
+[docs] +@register_metric("precision", config_class=PrecisionConfig) +class Precision(Metric): + """ + Precision metric for evaluating classification performance using sklearn's `precision_score`. + + Args: + config (PrecisionConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: PrecisionConfig, **kwargs): + super().__init__(config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + labels=None, + pos_label=1, + average=None, + sample_weight=None, + zero_division=None, + n_decimals=None, + output_keys=None, + ): + """ + Computes the Precision score for the given predictions against targets. + + Args: + predictions: Predicted labels. + targets: Ground truth labels. + labels: List of labels to include in the calculation. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the precision score. + sample_weight (Iterable[float]): Sample weights for the precision score. + zero_division (str | float): Strategy for zero-division, default is 0.0. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + pos_label = pos_label or self.config.pos_label + average = average or self.config.average + sample_weight = sample_weight or self.config.sample_weight + zero_division = zero_division or self.config.zero_division + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + score = precision_score( + targets, + predictions, + labels=labels, + pos_label=pos_label, + average=average, + sample_weight=sample_weight, + zero_division=zero_division, + ) + + score = float(score) if score.size == 1 else score + + results = {"precision": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/recall.html b/_modules/hezar/metrics/recall.html new file mode 100644 index 00000000..b3af472a --- /dev/null +++ b/_modules/hezar/metrics/recall.html @@ -0,0 +1,608 @@ + + + + + + + + hezar.metrics.recall - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.recall

+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+from sklearn.metrics import recall_score
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from .metric import Metric
+
+
+_required_backends = [
+    Backends.SCIKIT,
+]
+
+
+
+[docs] +@dataclass +class RecallConfig(MetricConfig): + """ + Configuration class for Recall metric. + + Args: + name (MetricType): The type of metric, Recall in this case. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the recall score. + sample_weight (Iterable[float]): Sample weights for the recall score. + zero_division (str | float): Strategy for zero-division, default is 0.0. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.RECALL + objective: str = "maximize" + pos_label: int = 1 + average: str = "macro" + sample_weight: Iterable[float] = None + zero_division: str | float = 0.0 + output_keys: tuple = ("recall",)
+ + + +
+[docs] +@register_metric("recall", config_class=RecallConfig) +class Recall(Metric): + """ + Recall metric for evaluating classification performance using sklearn's `recall_score`. + + Args: + config (RecallConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: RecallConfig, **kwargs): + super().__init__(config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + labels=None, + pos_label=None, + average=None, + sample_weight=None, + zero_division=None, + n_decimals=None, + output_keys=None, + ): + """ + Computes the Recall score for the given predictions against targets. + + Args: + predictions: Predicted labels. + targets: Ground truth labels. + labels: List of labels to include in the calculation. + pos_label (int): Label of the positive class. + average (str): Type of averaging for the recall score. + sample_weight (Iterable[float]): Sample weights for the recall score. + zero_division (str | float): Strategy for zero-division, default is 0.0. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + pos_label = pos_label or self.config.pos_label + average = average or self.config.average + sample_weight = sample_weight or self.config.sample_weight + zero_division = zero_division or self.config.zero_division + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + score = recall_score( + targets, + predictions, + labels=labels, + pos_label=pos_label, + average=average, + sample_weight=sample_weight, + zero_division=zero_division, + ) + + score = float(score) if score.size == 1 else score + + results = {"recall": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/rouge.html b/_modules/hezar/metrics/rouge.html new file mode 100644 index 00000000..bbdea5ab --- /dev/null +++ b/_modules/hezar/metrics/rouge.html @@ -0,0 +1,598 @@ + + + + + + + + hezar.metrics.rouge - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.rouge

+from dataclasses import dataclass
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from ..utils import is_backend_available
+from .metric import Metric
+
+
+if is_backend_available(Backends.ROUGE):
+    from rouge_score import rouge_scorer, scoring
+
+_DESCRIPTION = "Rouge estimation. Commonly used for Text Summarization"
+
+_required_backends = [
+    Backends.ROUGE,
+]
+
+
+
+[docs] +@dataclass +class ROUGEConfig(MetricConfig): + """ + Configuration class for ROUGE metric. + + Args: + name (MetricType): The type of metric, ROUGE in this case. + use_stemmer (bool): Flag to enable stemming when computing ROUGE. + use_aggregator (bool): Flag to enable score aggregation for multiple references. + multi_ref (bool): Flag to indicate if multiple references are present. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.ROUGE + objective: str = "maximize" + use_stemmer: bool = False + use_aggregator: bool = True + multi_ref: bool = True + output_keys: tuple = ("rouge1", "rouge2", "rougeL", "rougeLsum",)
+ + + +
+[docs] +@register_metric("rouge", config_class=ROUGEConfig, description=_DESCRIPTION) +class ROUGE(Metric): + """ + ROUGE metric for evaluating text summarization using `rouge_score`. + + Args: + config (ROUGEConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: ROUGEConfig, **kwargs): + super().__init__(config=config, **kwargs) + rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"] + self.scorer = rouge_scorer.RougeScorer( + rouge_types=rouge_types, + use_stemmer=self.config.use_stemmer, + ) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + use_aggregator=None, + n_decimals=None, + output_keys=None, + **kwargs, + ): + """ + Computes the ROUGE scores for the given predictions against targets. + + Args: + predictions: Predicted summaries. + targets: Ground truth summaries. + use_aggregator (bool): Flag to enable score aggregation for multiple references. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + aggregator = scoring.BootstrapAggregator() + + for ref, pred in zip(targets, predictions): + if self.config.multi_ref: + score = self.scorer.score_multi(ref, pred) + else: + score = self.scorer.score(ref, pred) + + aggregator.add_scores(score) + + results = aggregator.aggregate() + for key in results: + results[key] = results[key].mid.fmeasure + + if output_keys: + results = {k: round(v, 4) for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/seqeval.html b/_modules/hezar/metrics/seqeval.html new file mode 100644 index 00000000..4aeb5a5c --- /dev/null +++ b/_modules/hezar/metrics/seqeval.html @@ -0,0 +1,619 @@ + + + + + + + + hezar.metrics.seqeval - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.seqeval

+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List, Optional
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from ..utils import Logger, is_backend_available
+from .metric import Metric
+
+
+if is_backend_available(Backends.SEQEVAL):
+    from seqeval.metrics import accuracy_score, classification_report
+
+_required_backends = [
+    Backends.SEQEVAL,
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class SeqevalConfig(MetricConfig): + """ + Configuration class for Seqeval metric. + + Args: + name (MetricType): The type of metric, Seqeval in this case. + output_keys (tuple): Keys to filter the metric results for output. + suffix (bool): Flag to indicate whether the labels have suffixes. + mode (Optional[str]): Evaluation mode for seqeval. + sample_weight (Optional[List[int]]): Sample weights for the seqeval metrics. + zero_division (str | int): Strategy for zero-division, default is 0. + """ + name = MetricType.SEQEVAL + objective: str = "maximize" + output_keys: tuple = ("accuracy", "recall", "precision", "f1") + suffix: bool = False + mode: Optional[str] = None + sample_weight: Optional[List[int]] = None + zero_division: str | int = 0
+ + + +
+[docs] +@register_metric("seqeval", config_class=SeqevalConfig) +class Seqeval(Metric): + """ + Seqeval metric for sequence labeling tasks using `seqeval`. + + Args: + config (SeqevalConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: SeqevalConfig, **kwargs): + super().__init__(config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + suffix: bool = None, + mode: Optional[str] = None, + sample_weight: Optional[List[int]] = None, + zero_division: str | int = None, + n_decimals: int = None, + output_keys=None, + **kwargs, + ): + """ + Computes the Seqeval scores for the given predictions against targets. + + Args: + predictions: Predicted labels. + targets: Ground truth labels. + suffix (bool): Flag to indicate whether the labels have suffixes. + mode (Optional[str]): Evaluation mode for seqeval. + sample_weight (Optional[List[int]]): Sample weights for the seqeval metrics. + zero_division (str | int): Strategy for zero-division, default is 0. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + suffix = suffix or self.config.suffix + mode = mode or self.config.mode + sample_weight = sample_weight or self.config.sample_weight + zero_division = zero_division or self.config.zero_division + n_decimals = n_decimals or self.config.n_decimals + output_keys = output_keys or self.config.output_keys + + report = classification_report( + y_true=targets, + y_pred=predictions, + suffix=suffix, + output_dict=True, + mode=mode, + sample_weight=sample_weight, + zero_division=zero_division, + ) + report.pop("macro avg") + report.pop("weighted avg") + overall_score = report.pop("micro avg") + + results = { + "accuracy": format(accuracy_score(predictions, targets)), + "f1": overall_score["f1-score"], + "recall": overall_score["recall"], + "precision": overall_score["precision"], + } + + results = {k: round(float(v), n_decimals) for k, v in results.items()} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/metrics/wer.html b/_modules/hezar/metrics/wer.html new file mode 100644 index 00000000..ed876ce6 --- /dev/null +++ b/_modules/hezar/metrics/wer.html @@ -0,0 +1,592 @@ + + + + + + + + hezar.metrics.wer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.metrics.wer

+from dataclasses import dataclass
+
+from ..configs import MetricConfig
+from ..constants import Backends, MetricType
+from ..registry import register_metric
+from ..utils import is_backend_available
+from .metric import Metric
+
+
+if is_backend_available(Backends.JIWER):
+    import jiwer
+
+_DESCRIPTION = "Word Error Rate (WER) using `jiwer`. Commonly used for Speech Recognition systems"
+
+_required_backends = [
+    Backends.JIWER,
+]
+
+
+
+[docs] +@dataclass +class WERConfig(MetricConfig): + """ + Configuration class for WER metric. + + Args: + name (MetricType): The type of metric, WER in this case. + concatenate_texts (bool): Flag to indicate whether to concatenate texts before WER calculation. + output_keys (tuple): Keys to filter the metric results for output. + """ + name = MetricType.WER + objective: str = "minimize" + concatenate_texts: bool = False + output_keys: tuple = ("wer",)
+ + + +
+[docs] +@register_metric("wer", config_class=WERConfig, description=_DESCRIPTION) +class WER(Metric): + """ + WER metric for evaluating Word Error Rate using `jiwer`. + + Args: + config (WERConfig): Metric configuration object. + **kwargs: Extra configuration parameters passed as kwargs to update the `config`. + """ + required_backends = _required_backends + + def __init__(self, config: WERConfig, **kwargs): + super().__init__(config=config, **kwargs) + +
+[docs] + def compute( + self, + predictions=None, + targets=None, + concatenate_texts=None, + n_decimals=None, + output_keys=None, + **kwargs, + ): + """ + Computes the WER for the given predictions against targets. + + Args: + predictions: Predicted texts. + targets: Ground truth texts. + concatenate_texts (bool): Flag to indicate whether to concatenate texts before WER calculation. + n_decimals (int): Number of decimals for the final score. + output_keys (tuple): Filter the output keys. + + Returns: + dict: A dictionary of the metric results, with keys specified by `output_keys`. + """ + concatenate_texts = concatenate_texts or self.config.concatenate_texts + n_decimals = n_decimals or self.config.n_decimals + + if concatenate_texts: + score = jiwer.process_words(targets, predictions).wer + else: + incorrect = 0 + total = 0 + for prediction, reference in zip(predictions, targets): + measures = jiwer.process_words(reference, prediction) + incorrect += measures.substitutions + measures.deletions + measures.insertions + total += measures.substitutions + measures.deletions + measures.hits + + score = incorrect / total + + results = {"wer": round(float(score), n_decimals)} + + if output_keys: + results = {k: v for k, v in results.items() if k in output_keys} + + return results
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/bert/bert.html b/_modules/hezar/models/backbone/bert/bert.html new file mode 100644 index 00000000..1907e594 --- /dev/null +++ b/_modules/hezar/models/backbone/bert/bert.html @@ -0,0 +1,588 @@ + + + + + + + + hezar.models.backbone.bert.bert - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.bert.bert

+"""
+A BERT base language model (HuggingFace Transformers) wrapped by a Hezar Model class
+"""
+from __future__ import annotations
+
+from typing import List
+
+from ....constants import Backends
+from ....models import Model
+from ....registry import register_model
+from ....utils import is_backend_available
+from .bert_config import BERTConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import BertConfig, BertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("bert", config_class=BERTConfig) +class BERT(Model): + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + skip_keys_on_load = ["model.embeddings.position_ids", "bert.embeddings.position_ids"] # For older versions + + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.bert = BertModel(BertConfig(**self.config)) + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.bert( + input_ids=token_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + return outputs
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + hidden_states = model_outputs.get("hidden_states", None) + attentions = model_outputs.get("attentions", None) + outputs = { + "last_hidden_state": model_outputs.get("last_hidden_state"), + "hidden_states": hidden_states, + "attentions": attentions, + } + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/bert/bert_config.html b/_modules/hezar/models/backbone/bert/bert_config.html new file mode 100644 index 00000000..29fc1143 --- /dev/null +++ b/_modules/hezar/models/backbone/bert/bert_config.html @@ -0,0 +1,520 @@ + + + + + + + + hezar.models.backbone.bert.bert_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.bert.bert_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class BERTConfig(ModelConfig): + name = "bert" + task: str = TaskType.BACKBONE + vocab_size: int = 42000 + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + attention_probs_dropout_prob: float = 0.1 + max_position_embeddings: int = 512 + type_vocab_size: int = 2 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + pad_token_id: int = 0 + position_embedding_type: str = "absolute" + use_cache: bool = True + classifier_dropout: float = None
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/distilbert/distilbert.html b/_modules/hezar/models/backbone/distilbert/distilbert.html new file mode 100644 index 00000000..cc080b8c --- /dev/null +++ b/_modules/hezar/models/backbone/distilbert/distilbert.html @@ -0,0 +1,575 @@ + + + + + + + + hezar.models.backbone.distilbert.distilbert - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.distilbert.distilbert

+"""
+A DistilBERT base language model (HuggingFace Transformers) wrapped by a Hezar Model class
+"""
+from __future__ import annotations
+
+from typing import List
+
+from ....constants import Backends
+from ....models import Model
+from ....registry import register_model
+from ....utils import is_backend_available
+from .distilbert_config import DistilBERTConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import DistilBertConfig, DistilBertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("distilbert", config_class=DistilBERTConfig) +class DistilBERT(Model): + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.distilbert = DistilBertModel(DistilBertConfig(**self.config)) + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.distilbert( + input_ids=token_ids, + attention_mask=attention_mask, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + return outputs
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + hidden_states = model_outputs.get("hidden_states", None) + attentions = model_outputs.get("attentions", None) + outputs = { + "last_hidden_state": model_outputs.get("last_hidden_state"), + "hidden_states": hidden_states, + "attentions": attentions, + } + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/distilbert/distilbert_config.html b/_modules/hezar/models/backbone/distilbert/distilbert_config.html new file mode 100644 index 00000000..c19dcb58 --- /dev/null +++ b/_modules/hezar/models/backbone/distilbert/distilbert_config.html @@ -0,0 +1,518 @@ + + + + + + + + hezar.models.backbone.distilbert.distilbert_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.distilbert.distilbert_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class DistilBERTConfig(ModelConfig): + name = "distilbert" + task: str = TaskType.LANGUAGE_MODELING + activation: str = "gelu" + attention_dropout: float = 0.1 + dim: int = 768 + dropout: float = 0.1 + hidden_dim: int = 3072 + initializer_range: float = 0.02 + max_position_embeddings: int = 512 + n_heads: int = 12 + n_layers: int = 6 + output_past: bool = True + pad_token_id: int = 0 + qa_dropout: float = 0.1 + tie_weights_: bool = True + vocab_size: int = 42000
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/roberta/roberta.html b/_modules/hezar/models/backbone/roberta/roberta.html new file mode 100644 index 00000000..380dde14 --- /dev/null +++ b/_modules/hezar/models/backbone/roberta/roberta.html @@ -0,0 +1,587 @@ + + + + + + + + hezar.models.backbone.roberta.roberta - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.roberta.roberta

+"""
+RoBERTa base language model (HuggingFace Transformers) wrapped by a Hezar Model class
+"""
+from __future__ import annotations
+
+from typing import List
+
+from ....constants import Backends
+from ....models import Model
+from ....registry import register_model
+from ....utils import is_backend_available
+from .roberta_config import RoBERTaConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import RobertaConfig, RobertaModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("roberta", config_class=RoBERTaConfig) +class RoBERTa(Model): + required_backends = _required_backends + tokenizer_name = "bpe_tokenizer" + skip_keys_on_load = ["model.embeddings.position_ids", "roberta.embeddings.position_ids"] # For older versions + + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.roberta = RobertaModel(RobertaConfig(**self.config)) + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.roberta( + input_ids=token_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + return outputs
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + hidden_states = model_outputs.get("hidden_states", None) + attentions = model_outputs.get("attentions", None) + outputs = { + "last_hidden_state": model_outputs.get("last_hidden_state"), + "hidden_states": hidden_states, + "attentions": attentions, + } + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/roberta/roberta_config.html b/_modules/hezar/models/backbone/roberta/roberta_config.html new file mode 100644 index 00000000..ca807491 --- /dev/null +++ b/_modules/hezar/models/backbone/roberta/roberta_config.html @@ -0,0 +1,522 @@ + + + + + + + + hezar.models.backbone.roberta.roberta_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.roberta.roberta_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class RoBERTaConfig(ModelConfig): + name = "roberta" + task: str = TaskType.LANGUAGE_MODELING + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 1 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/vit/vit.html b/_modules/hezar/models/backbone/vit/vit.html new file mode 100644 index 00000000..89cfced2 --- /dev/null +++ b/_modules/hezar/models/backbone/vit/vit.html @@ -0,0 +1,570 @@ + + + + + + + + hezar.models.backbone.vit.vit - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.vit.vit

+from __future__ import annotations
+
+from typing import Dict, List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from .vit_config import ViTConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import ViTConfig as ViTConfig_
+    from transformers import ViTModel
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS, Backends.PILLOW]
+
+
+
+[docs] +@register_model("vit", config_class=ViTConfig) +class ViT(Model): + required_backends = _required_backends + image_processor = "image_processor" + loss_fn_name = "cross_entropy" + + def __init__(self, config: ViTConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.vit = ViTModel(ViTConfig_(**self.config)) + +
+[docs] + def forward( + self, + pixel_values=None, + bool_masked_pos=None, + head_mask=None, + output_attentions=None, + output_hidden_states=None, + interpolate_pos_encoding=None, + ): + outputs = self.vit( + pixel_values=pixel_values, + bool_masked_pos=bool_masked_pos, + head_mask=head_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + interpolate_pos_encoding=interpolate_pos_encoding, + ) + + return outputs
+ + +
+[docs] + def preprocess(self, inputs: List[str] | List[np.ndarray] | List["Image"] | List[torch.Tensor], **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, model_outputs: Dict[str, torch.Tensor]): + outputs = { + "last_hidden_state": model_outputs.get("last_hidden_state", None), + "pooler_output": model_outputs.get("pooler_output", None), + "hidden_states": model_outputs.get("hidden_states", None), + "attentions": model_outputs.get("attentions", None), + } + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/backbone/vit/vit_config.html b/_modules/hezar/models/backbone/vit/vit_config.html new file mode 100644 index 00000000..4a7b6a3b --- /dev/null +++ b/_modules/hezar/models/backbone/vit/vit_config.html @@ -0,0 +1,516 @@ + + + + + + + + hezar.models.backbone.vit.vit_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.backbone.vit.vit_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class ViTConfig(ModelConfig): + name = "vit" + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.0 + attention_probs_dropout_prob: float = 0.0 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + image_size: int = 224 + patch_size: int = 16 + num_channels: int = 3 + qkv_bias: bool = True + encoder_stride: int = 16
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text.html b/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text.html new file mode 100644 index 00000000..4fd61e79 --- /dev/null +++ b/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text.html @@ -0,0 +1,608 @@ + + + + + + + + hezar.models.image2text.beit_roberta.beit_roberta_image2text - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.beit_roberta.beit_roberta_image2text

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import Image2TextOutput
+from .beit_roberta_image2text_config import BeitRobertaImage2TextConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import (
+        BeitConfig,
+        BeitModel,
+        GenerationConfig,
+        RobertaConfig,
+        RobertaForCausalLM,
+        VisionEncoderDecoderModel,
+    )
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS, Backends.PILLOW]
+
+
+
+[docs] +@register_model("beit_roberta_image2text", config_class=BeitRobertaImage2TextConfig) +class BeitRobertaImage2Text(Model): + """ + BEiT + RoBERTa for image to text + """ + + is_generative = True + required_backends = _required_backends + image_processor = "image_processor" + tokenizer_name = "bpe_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: BeitRobertaImage2TextConfig, **kwargs): + super().__init__(config, **kwargs) + encoder = BeitModel(config=BeitConfig(**self.config.encoder)) + decoder = RobertaForCausalLM(config=RobertaConfig(**self.config.decoder)) + self.beit_roberta = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder) + +
+[docs] + def forward( + self, + pixel_values, + decoder_input_ids=None, + decoder_attention_mask=None, + encoder_outputs=None, + past_key_values=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.beit_roberta( + pixel_values=pixel_values, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + loss = self.criterion(logits.reshape(-1, self.beit_roberta.decoder.config.vocab_size), labels.reshape(-1)) + return loss
+ + +
+[docs] + def generate(self, pixel_values, generation_config=None, **kwargs): + if generation_config is None: + generation_config = self.config.dict()["generation"] + generation_config = GenerationConfig(**generation_config) + outputs = self.beit_roberta.generate(inputs=pixel_values, generation_config=generation_config, **kwargs) + + return outputs
+ + +
+[docs] + def preprocess(self, inputs: List[str] | List[np.ndarray] | List["Image"] | List[torch.Tensor], **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(model_outputs.cpu().numpy().tolist()) + outputs = [Image2TextOutput(text=text) for text in decoded_outputs] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text_config.html b/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text_config.html new file mode 100644 index 00000000..07cfa707 --- /dev/null +++ b/_modules/hezar/models/image2text/beit_roberta/beit_roberta_image2text_config.html @@ -0,0 +1,586 @@ + + + + + + + + hezar.models.image2text.beit_roberta.beit_roberta_image2text_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.beit_roberta.beit_roberta_image2text_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class DecoderConfig(ModelConfig): + name = "beit_roberta_decoder" + is_decoder: bool = True + add_cross_attention: bool = True + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + classifier_dropout: float = None + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 1 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000
+ + + +
+[docs] +@dataclass +class EncoderConfig(ModelConfig): + name = "beit_roberta_encoder" + vocab_size = 8192 + hidden_size = 768 + num_hidden_layers = 12 + num_attention_heads = 12 + intermediate_size = 3072 + hidden_act = "gelu" + hidden_dropout_prob = 0.0 + attention_probs_dropout_prob = 0.0 + initializer_range = 0.02 + layer_norm_eps = 1e-12 + image_size = 224 + patch_size = 16 + num_channels = 3 + use_mask_token = False + use_absolute_position_embeddings = False + use_relative_position_bias = False + use_shared_relative_position_bias = False + layer_scale_init_value = 0.1 + drop_path_rate = 0.1 + use_mean_pooling = True + out_indices = [3, 5, 7, 11] + pool_scales = [1, 2, 3, 6] + use_auxiliary_head = True + auxiliary_loss_weight = 0.4 + auxiliary_channels = 256 + auxiliary_num_convs = 1 + auxiliary_concat_input = False + semantic_loss_ignore_index = 255
+ + + +
+[docs] +@dataclass +class GenerationConfig: + bos_token_id: int = 0 + decoder_start_token_id: int = 0 + early_stopping: bool = True + eos_token_id: int = 2 + length_penalty: float = 2.0 + max_length: int = 64 + no_repeat_ngram_size: int = 3 + num_beams: int = 4 + pad_token_id: int = 1
+ + + +
+[docs] +@dataclass +class BeitRobertaImage2TextConfig(ModelConfig): + name = "beit_roberta_image2text" + encoder: EncoderConfig = field(default_factory=EncoderConfig) + decoder: DecoderConfig = field(default_factory=DecoderConfig) + generation: GenerationConfig = field(default_factory=GenerationConfig)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/crnn/crnn_decode_utils.html b/_modules/hezar/models/image2text/crnn/crnn_decode_utils.html new file mode 100644 index 00000000..0bfcd3c1 --- /dev/null +++ b/_modules/hezar/models/image2text/crnn/crnn_decode_utils.html @@ -0,0 +1,534 @@ + + + + + + + + hezar.models.image2text.crnn.crnn_decode_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.crnn.crnn_decode_utils

+import numpy as np
+import torch
+
+
+def _reconstruct(labels, blank=0):
+    new_labels = []
+    # merge same labels
+    previous = None
+    for label in labels:
+        if label != previous:
+            new_labels.append(label)
+            previous = label
+    # delete blank
+    new_labels = [label for label in new_labels if label != blank]
+
+    return new_labels
+
+
+
+[docs] +def greedy_decode(emission_log_prob, blank=0): + labels = np.argmax(emission_log_prob, axis=-1) + labels = _reconstruct(labels, blank=blank) + return labels
+ + + +
+[docs] +def ctc_decode(log_probs, id2label=None, blank=0): + emission_log_probs = np.transpose(log_probs.cpu().detach().numpy(), (1, 0, 2)) + batch_size, max_length, _ = emission_log_probs.shape + + # size of emission_log_probs: (batch, length, class) + decoded_ids = [] + for emission_log_prob in emission_log_probs: + ids = greedy_decode(emission_log_prob, blank=blank) + if id2label: + ids = [id2label[label] for label in ids] + ids.extend([blank] * (max_length - len(ids))) + decoded_ids.append(ids) + return torch.tensor(decoded_ids)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/crnn/crnn_image2text.html b/_modules/hezar/models/image2text/crnn/crnn_image2text.html new file mode 100644 index 00000000..353ad717 --- /dev/null +++ b/_modules/hezar/models/image2text/crnn/crnn_image2text.html @@ -0,0 +1,633 @@ + + + + + + + + hezar.models.image2text.crnn.crnn_image2text - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.crnn.crnn_image2text

+import torch
+from torch import nn
+
+from ....registry import register_model
+from ....utils import reverse_string_digits
+from ...model import Model
+from ...model_outputs import Image2TextOutput
+from .crnn_decode_utils import ctc_decode
+from .crnn_image2text_config import CRNNImage2TextConfig
+
+
+
+[docs] +@register_model("crnn_image2text", config_class=CRNNImage2TextConfig) +class CRNNImage2Text(Model): + """ + A robust CRNN model for character level OCR based on the original paper. + """ + + is_generative = True + image_processor = "image_processor" + loss_fn_name = "ctc" + loss_fn_kwargs = {"zero_infinity": True} + + def __init__(self, config: CRNNImage2TextConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.cnn = nn.Sequential( + ConvBlock(self.config.n_channels, 64, 3, 1, 1), + nn.MaxPool2d(kernel_size=2, stride=2), + ConvBlock(64, 128, 3, 1, 1), + nn.MaxPool2d(kernel_size=2, stride=2), + ConvBlock(128, 256, 3, 1, 1), + ConvBlock(256, 256, 3, 1, 1), + nn.MaxPool2d(kernel_size=(2, 1)), + ConvBlock(256, 512, 3, 1, 1, batch_norm=True), + ConvBlock(512, 512, 3, 1, 1, batch_norm=True), + nn.MaxPool2d(kernel_size=(2, 1)), + ConvBlock(512, 512, 3, 1, 1), + ) + # map CNN to sequence + self.map2seq = nn.Linear(self.config.map2seq_in_dim, self.config.map2seq_out_dim) + # RNN + self.rnn1 = nn.LSTM(self.config.map2seq_out_dim, self.config.rnn_dim, bidirectional=True) + self.rnn2 = nn.LSTM(2 * self.config.rnn_dim, self.config.rnn_dim, bidirectional=True) + # classifier + self.classifier = nn.Linear(2 * self.config.rnn_dim, len(self.config.id2label)) + +
+[docs] + def forward(self, pixel_values, **kwargs): + # CNN block + x = self.cnn(pixel_values) + # reformat array + batch, channel, height, width = x.size() + x = x.view(batch, channel * height, width) + x = x.permute(2, 0, 1) + x = self.map2seq(x) + x, _ = self.rnn1(x) + x, _ = self.rnn2(x) + x = self.classifier(x) + x = nn.functional.log_softmax(x, 2) + outputs = {"logits": x} + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor): + batch_size = logits.size(1) + labels_lengths = torch.count_nonzero(labels, dim=1).flatten() + labels = labels[labels != self.config.blank_id] + input_lengths = torch.LongTensor([logits.size(0)] * batch_size) + + loss = self.criterion(logits, labels, input_lengths, labels_lengths) + + return loss
+ + +
+[docs] + def generate(self, pixel_values): + logits = self(pixel_values)["logits"] + output_ids = ctc_decode(logits, blank=self.config.blank_id) + probs, values = logits.permute(1, 0, 2).softmax(2).max(2) + mean_probs = probs.mean(1) + return {"generated_ids": output_ids, "scores": mean_probs}
+ + +
+[docs] + def preprocess(self, inputs, **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, generation_outputs, return_scores=False): + if isinstance(generation_outputs, torch.Tensor): + generated_ids = generation_outputs + scores = torch.tensor(torch.zeros(generated_ids.shape)) + else: + generated_ids, scores = generation_outputs.values() + + outputs = [] + generated_ids = generated_ids.cpu().numpy().tolist() + scores = scores.cpu().numpy().tolist() + for decoded_ids, score in zip(generated_ids, scores): + chars = [self.config.id2label[id_] for id_ in decoded_ids] + text = "".join(chars) + if self.config.reverse_output_digits: + text = reverse_string_digits(text) + if return_scores: + outputs.append(Image2TextOutput(text=text, score=score)) + else: + outputs.append(Image2TextOutput(text=text)) + return outputs
+
+ + + +
+[docs] +class ConvBlock(nn.Module): + def __init__(self, input_channel, output_channel, kernel_sizes, strides, paddings, batch_norm: bool = False): + super(ConvBlock, self).__init__() + self.do_batch_norm = batch_norm + self.conv = nn.Conv2d(input_channel, output_channel, kernel_sizes, strides, paddings) + self.bn = nn.BatchNorm2d(output_channel) + self.relu = nn.ReLU(inplace=True) + +
+[docs] + def forward(self, x): + x = self.conv(x) + if self.do_batch_norm: + x = self.bn(x) + x = self.relu(x) + return x
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/crnn/crnn_image2text_config.html b/_modules/hezar/models/image2text/crnn/crnn_image2text_config.html new file mode 100644 index 00000000..b6ccbdc1 --- /dev/null +++ b/_modules/hezar/models/image2text/crnn/crnn_image2text_config.html @@ -0,0 +1,513 @@ + + + + + + + + hezar.models.image2text.crnn.crnn_image2text_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.crnn.crnn_image2text_config

+from dataclasses import dataclass
+from typing import Dict
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class CRNNImage2TextConfig(ModelConfig): + name = "crnn_image2text" + id2label: Dict[int, str] = None + blank_id: int = 0 + n_channels: int = 1 + image_height: int = 32 + image_width: int = 128 + map2seq_in_dim: int = 2048 + map2seq_out_dim: int = 64 + rnn_dim: int = 256 + reverse_prediction_text: bool = None + reverse_output_digits: bool = None
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/trocr/trocr_image2text.html b/_modules/hezar/models/image2text/trocr/trocr_image2text.html new file mode 100644 index 00000000..1f0bcbf1 --- /dev/null +++ b/_modules/hezar/models/image2text/trocr/trocr_image2text.html @@ -0,0 +1,608 @@ + + + + + + + + hezar.models.image2text.trocr.trocr_image2text - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.trocr.trocr_image2text

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import Image2TextOutput
+from .trocr_image2text_config import TrOCRImage2TextConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import (
+        GenerationConfig,
+        RobertaConfig,
+        RobertaForCausalLM,
+        VisionEncoderDecoderModel,
+        ViTConfig,
+        ViTModel,
+    )
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS, Backends.PILLOW]
+
+
+
+[docs] +@register_model("trocr_image2text", config_class=TrOCRImage2TextConfig) +class TrOCRImage2Text(Model): + """ + TrOCR for optical character recognition + """ + + is_generative = True + required_backends = _required_backends + image_processor = "image_processor" + tokenizer_name = "bpe_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: TrOCRImage2TextConfig, **kwargs): + super().__init__(config, **kwargs) + encoder = ViTModel(config=ViTConfig(**self.config.encoder)) + decoder = RobertaForCausalLM(config=RobertaConfig(**self.config.decoder)) + self.trocr = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder) + +
+[docs] + def forward( + self, + pixel_values, + decoder_input_ids=None, + decoder_attention_mask=None, + encoder_outputs=None, + past_key_values=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.trocr( + pixel_values=pixel_values, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + loss = self.criterion(logits.reshape(-1, self.trocr.decoder.config.vocab_size), labels.reshape(-1)) + return loss
+ + +
+[docs] + def generate(self, pixel_values, generation_config=None, **kwargs): + if generation_config is None: + generation_config = self.config.dict()["generation"] + generation_config = GenerationConfig(**generation_config) + outputs = self.trocr.generate(inputs=pixel_values, generation_config=generation_config, **kwargs) + + return outputs
+ + +
+[docs] + def preprocess(self, inputs: List[str] | List[np.ndarray] | List["Image"] | List[torch.Tensor], **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(model_outputs.cpu().numpy().tolist()) + outputs = [Image2TextOutput(text=text) for text in decoded_outputs] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/trocr/trocr_image2text_config.html b/_modules/hezar/models/image2text/trocr/trocr_image2text_config.html new file mode 100644 index 00000000..5ddf78cc --- /dev/null +++ b/_modules/hezar/models/image2text/trocr/trocr_image2text_config.html @@ -0,0 +1,570 @@ + + + + + + + + hezar.models.image2text.trocr.trocr_image2text_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.trocr.trocr_image2text_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class DecoderConfig(ModelConfig): + is_decoder: bool = True + add_cross_attention: bool = True + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + classifier_dropout: float = None + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 1 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000
+ + + +
+[docs] +@dataclass +class EncoderConfig(ModelConfig): + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.0 + attention_probs_dropout_prob: float = 0.0 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + image_size: int = 224 + patch_size: int = 16 + num_channels: int = 3 + qkv_bias: bool = True + encoder_stride: int = 16
+ + + +
+[docs] +@dataclass +class GenerationConfig: + bos_token_id: int = 0 + decoder_start_token_id: int = 0 + early_stopping: bool = True + eos_token_id: int = 2 + length_penalty: float = 2.0 + max_length: int = 64 + no_repeat_ngram_size: int = 3 + num_beams: int = 4 + pad_token_id: int = 1
+ + + +
+[docs] +@dataclass +class TrOCRImage2TextConfig(ModelConfig): + name = "trocr_image2text" + encoder: EncoderConfig = field(default_factory=EncoderConfig) + decoder: DecoderConfig = field(default_factory=DecoderConfig) + generation: GenerationConfig = field(default_factory=GenerationConfig)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text.html b/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text.html new file mode 100644 index 00000000..ebc1e059 --- /dev/null +++ b/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text.html @@ -0,0 +1,608 @@ + + + + + + + + hezar.models.image2text.vit_gpt2.vit_gpt2_image2text - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.vit_gpt2.vit_gpt2_image2text

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import Image2TextOutput
+from .vit_gpt2_image2text_config import ViTGPT2Image2TextConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import (
+        GenerationConfig,
+        GPT2Config,
+        GPT2LMHeadModel,
+        VisionEncoderDecoderModel,
+        ViTConfig,
+        ViTModel,
+    )
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS, Backends.PILLOW]
+
+
+
+[docs] +@register_model("vit_gpt2_image2text", config_class=ViTGPT2Image2TextConfig) +class ViTGPT2Image2Text(Model): + """ + ViT + GPT2 for image to text generation (image captioning) + """ + + is_generative = True + required_backends = _required_backends + image_processor = "image_processor" + tokenizer_name = "bpe_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: ViTGPT2Image2TextConfig, **kwargs): + super().__init__(config, **kwargs) + encoder = ViTModel(config=ViTConfig(**self.config.encoder)) + decoder = GPT2LMHeadModel(config=GPT2Config(**self.config.decoder)) + self.vit_gpt2 = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder) + +
+[docs] + def forward( + self, + pixel_values, + decoder_input_ids=None, + decoder_attention_mask=None, + encoder_outputs=None, + past_key_values=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.vit_gpt2( + pixel_values=pixel_values, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + loss = self.criterion(logits.reshape(-1, self.vit_gpt2.decoder.config.vocab_size), labels.reshape(-1)) + return loss
+ + +
+[docs] + def generate(self, pixel_values, generation_config=None, **kwargs): + if generation_config is None: + generation_config = self.config.dict()["generation"] + generation_config = GenerationConfig(**generation_config) + outputs = self.vit_gpt2.generate(inputs=pixel_values, generation_config=generation_config, **kwargs) + + return outputs
+ + +
+[docs] + def preprocess(self, inputs: List[str] | List[np.ndarray] | List["Image"] | List[torch.Tensor], **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(model_outputs.cpu().numpy().tolist()) + outputs = [Image2TextOutput(text=text) for text in decoded_outputs] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text_config.html b/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text_config.html new file mode 100644 index 00000000..a3ff553d --- /dev/null +++ b/_modules/hezar/models/image2text/vit_gpt2/vit_gpt2_image2text_config.html @@ -0,0 +1,571 @@ + + + + + + + + hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class DecoderConfig(ModelConfig): + name = "vit_gpt2_decoder" + add_cross_attention: bool = True + vocab_size: int = 42001 + attn_pdrop: float = 0.1 + bos_token_id: int = 5 + embd_pdrop: float = 0.1 + eos_token_id: int = 5 + gradient_checkpointing: bool = False + initializer_range: float = 0.02 + layer_norm_epsilon: float = 1e-05 + model_type: str = "gpt2" + n_ctx: int = 1024 + n_embd: int = 768 + n_head: int = 12 + n_inner: int = None + n_layer: int = 12 + n_positions: int = 1024 + resid_pdrop: float = 0.1 + summary_activation: bool = False + summary_first_dropout: float = 0.1 + use_cache: bool = True
+ + + +
+[docs] +@dataclass +class EncoderConfig(ModelConfig): + name = "vit_gpt2_encoder" + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.0 + attention_probs_dropout_prob: float = 0.0 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + image_size: int = 224 + patch_size: int = 16 + num_channels: int = 3 + qkv_bias: bool = True + encoder_stride: int = 16
+ + + +
+[docs] +@dataclass +class GenerationConfig(ModelConfig): + bos_token_id: int = 0 + decoder_start_token_id: int = 0 + early_stopping: bool = True + eos_token_id: int = 2 + length_penalty: float = 2.0 + max_new_tokens: int = 24 + no_repeat_ngram_size: int = 3 + num_beams: int = 4 + pad_token_id: int = 1
+ + + +
+[docs] +@dataclass +class ViTGPT2Image2TextConfig(ModelConfig): + name = "vit_gpt2_image2text" + encoder: EncoderConfig = field(default_factory=EncoderConfig) + decoder: DecoderConfig = field(default_factory=DecoderConfig) + generation: GenerationConfig = field(default_factory=GenerationConfig)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text.html b/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text.html new file mode 100644 index 00000000..51dc0574 --- /dev/null +++ b/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text.html @@ -0,0 +1,619 @@ + + + + + + + + hezar.models.image2text.vit_roberta.vit_roberta_image2text - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.vit_roberta.vit_roberta_image2text

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import Image2TextOutput
+from .vit_roberta_image2text_config import ViTRobertaImage2TextConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import (
+        GenerationConfig,
+        RobertaConfig,
+        RobertaForCausalLM,
+        VisionEncoderDecoderModel,
+        ViTConfig,
+        ViTModel,
+    )
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS, Backends.PILLOW]
+
+
+
+[docs] +@register_model("vit_roberta_image2text", config_class=ViTRobertaImage2TextConfig) +class ViTRobertaImage2Text(Model): + """ + ViT + RoBERTa for image to text + """ + + is_generative = True + required_backends = _required_backends + image_processor = "image_processor" + tokenizer_name = "bpe_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: ViTRobertaImage2TextConfig, **kwargs): + super().__init__(config, **kwargs) + encoder = ViTModel(config=ViTConfig(**self.config.encoder)) + decoder = RobertaForCausalLM(config=RobertaConfig(**self.config.decoder)) + self.vit_roberta = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder) + +
+[docs] + def forward( + self, + pixel_values, + decoder_input_ids=None, + decoder_attention_mask=None, + encoder_outputs=None, + past_key_values=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.vit_roberta( + pixel_values=pixel_values, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return dict(outputs)
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + self.criterion.ignore_index = self.config.decoder["pad_token_id"] + loss = self.criterion(logits.reshape(-1, self.vit_roberta.decoder.config.vocab_size), labels.reshape(-1)) + return loss
+ + +
+[docs] + def generate(self, pixel_values, generation_config=None, **kwargs): + tokenizer = self.preprocessor["bpe_tokenizer"] + if generation_config is None: + generation_config = self.config.dict()["generation"] + generation_config["decoder_start_token_id"] = tokenizer.pad_token_id + generation_config = GenerationConfig(**generation_config) + outputs = self.vit_roberta.generate(inputs=pixel_values, generation_config=generation_config, **kwargs) + + return outputs
+ + +
+[docs] + def preprocess(self, inputs: List[str] | List[np.ndarray] | List["Image"] | List[torch.Tensor], **kwargs): + image_processor = self.preprocessor[self.image_processor] + processed_outputs = image_processor(inputs, **kwargs) + return processed_outputs
+ + +
+[docs] + def post_process(self, model_outputs: torch.Tensor, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(model_outputs.cpu().numpy().tolist()) + outputs = [Image2TextOutput(text=text) for text in decoded_outputs] + return outputs
+ + + @property + def encoder(self): + return self.vit_roberta.encoder + + @property + def decoder(self): + return self.vit_roberta.decoder
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text_config.html b/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text_config.html new file mode 100644 index 00000000..898af8cb --- /dev/null +++ b/_modules/hezar/models/image2text/vit_roberta/vit_roberta_image2text_config.html @@ -0,0 +1,571 @@ + + + + + + + + hezar.models.image2text.vit_roberta.vit_roberta_image2text_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.image2text.vit_roberta.vit_roberta_image2text_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class DecoderConfig(ModelConfig): + is_decoder: bool = True + add_cross_attention: bool = True + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + classifier_dropout: float = None + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 2 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000
+ + + +
+[docs] +@dataclass +class EncoderConfig(ModelConfig): + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.0 + attention_probs_dropout_prob: float = 0.0 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + image_size: int = 224 + patch_size: int = 16 + num_channels: int = 3 + qkv_bias: bool = True + encoder_stride: int = 16
+ + + +
+[docs] +@dataclass +class GenerationConfig: + bos_token_id: int = 0 + decoder_start_token_id: int = 0 + return_dict_in_generate: bool = False + early_stopping: bool = True + eos_token_id: int = 2 + length_penalty: float = 2.0 + max_length: int = 64 + no_repeat_ngram_size: int = 3 + num_beams: int = 4 + pad_token_id: int = 2
+ + + +
+[docs] +@dataclass +class ViTRobertaImage2TextConfig(ModelConfig): + name = "vit_roberta_image2text" + encoder: EncoderConfig = field(default_factory=EncoderConfig) + decoder: DecoderConfig = field(default_factory=DecoderConfig) + generation: GenerationConfig = field(default_factory=GenerationConfig)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/model.html b/_modules/hezar/models/model.html new file mode 100644 index 00000000..d0eaffb9 --- /dev/null +++ b/_modules/hezar/models/model.html @@ -0,0 +1,1062 @@ + + + + + + + + hezar.models.model - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.model

+"""
+Hezar models inherit the base class `Model`. A `Model` itself is a PyTorch Module to implement neural networks but has
+some extra Hezar-specific functionalities and methods e.g, pushing to hub, loading from hub, etc.
+
+Examples:
+    >>> # Load from hub
+    >>> from hezar.models import Model
+    >>> model = Model.load("hezarai/bert-base-fa")
+"""
+from __future__ import annotations
+
+import inspect
+import os
+import re
+import tempfile
+from collections import OrderedDict
+from typing import Any, Dict, Iterable, List, Mapping, Optional
+
+import torch
+from huggingface_hub import create_repo, hf_hub_download, upload_file
+from torch import nn
+
+from ..builders import build_model
+from ..configs import ModelConfig
+from ..constants import (
+    DEFAULT_MODEL_CONFIG_FILE,
+    DEFAULT_MODEL_FILE,
+    HEZAR_CACHE_DIR,
+    Backends,
+    LossType,
+    RegistryType,
+)
+from ..preprocessors import Preprocessor, PreprocessorsContainer
+from ..utils import Logger, get_module_class, verify_dependencies
+from .model_outputs import ModelOutput
+
+
+logger = Logger(__name__)
+
+criterions_mapping = {
+    LossType.L1: nn.L1Loss,
+    LossType.NLL: nn.NLLLoss,
+    LossType.NLL_2D: nn.NLLLoss2d,
+    LossType.POISSON_NLL: nn.PoissonNLLLoss,
+    LossType.GAUSSIAN_NLL: nn.GaussianNLLLoss,
+    LossType.MSE: nn.MSELoss,
+    LossType.BCE: nn.BCELoss,
+    LossType.BCE_WITH_LOGITS: nn.BCEWithLogitsLoss,
+    LossType.CROSS_ENTROPY: nn.CrossEntropyLoss,
+    LossType.TRIPLE_MARGIN: nn.TripletMarginLoss,
+    LossType.CTC: nn.CTCLoss
+}
+
+
+
+[docs] +class Model(nn.Module): + """ + Base class for all neural network models in Hezar. + + Args: + config: A dataclass model config + """ + + required_backends: List[Backends | str] = [] + # Default file names + model_filename = DEFAULT_MODEL_FILE + config_filename = DEFAULT_MODEL_CONFIG_FILE + + # Specify if the model is a generative model. If True, the model must also implement the `generate` method + is_generative: bool = False + + # Keys to ignore on loading state dicts + skip_keys_on_load = [] + + # Loss function name + loss_fn_name: str | LossType = LossType.CROSS_ENTROPY + loss_fn_kwargs: Dict[str, Any] = {} + + def __init__(self, config: ModelConfig, *args, **kwargs): + verify_dependencies(self, self.required_backends) + super().__init__() + self.config = config.update(kwargs) + self._preprocessor = None + self._criterion = self._set_criterion(self.loss_fn_name, **self.loss_fn_kwargs) + + def __repr__(self): + representation = super().__repr__() + pattern = r"\('?_criterion'?\): [^\)]+\)\s*" + representation = re.sub(pattern, '', representation) + return representation + + @staticmethod + def _set_criterion(loss_fn_name: str, **loss_fn_kwargs: Dict[str, Any]): + if loss_fn_name not in criterions_mapping: + raise ValueError(f"Invalid criterion name `{loss_fn_name}`. Available: {list(criterions_mapping.keys())}") + loss_fn = criterions_mapping[loss_fn_name](**loss_fn_kwargs) + return loss_fn + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path: str | os.PathLike, + load_locally: Optional[bool] = False, + load_preprocessor: Optional[bool] = True, + model_filename: Optional[str] = None, + config_filename: Optional[str] = None, + save_path: Optional[str | os.PathLike] = None, + cache_dir: Optional[str | os.PathLike] = None, + **kwargs, + ) -> "Model": + """ + Load the model from local path or hub. + + It's recommended to actually use this method with :class:`hezar.Model` rather than any other model class + unless you actually know that the class is the same as the one on the Hub, because the output will always be + the one specified on the Hub! + + Args: + hub_or_local_path: Path to the model living on the Hub or local disk. + load_locally: Force loading from local path + load_preprocessor: Whether to load the preprocessor(s) or not + model_filename: Optional model filename. + config_filename: Optional config filename + save_path: Save model to this path after loading + cache_dir: Path to cache directory, defaults to `~/.cache/hezar` + + Returns: + The fully loaded Hezar model + """ + # Get device if provided in the kwargs + device = None or kwargs.pop("device", None) + # Load config + config_filename = config_filename or cls.config_filename + cache_dir = cache_dir or HEZAR_CACHE_DIR + config = ModelConfig.load(hub_or_local_path=hub_or_local_path, filename=config_filename, cache_dir=cache_dir) + # Get the exact model class based on registry name under config.name + model_cls = get_module_class(config.name, registry_type=RegistryType.MODEL) + # Handle compatibility of model class and the one in the config + if cls.__name__ == "Model": + # Build model wih config + model = build_model(config.name, config, **kwargs) + else: + if cls.__name__ != model_cls.__name__: + logger.warning( + f"You attempted to load a model using `{cls.__name__}` " + f"but the model in `{hub_or_local_path}` is of type `{model_cls.__name__}`, " + f"so the output model will be of type {model_cls.__name__} anyway!" + ) + model = model_cls(config, **kwargs) + + model_filename = model_filename or model_cls.model_filename or cls.model_filename + # does the path exist locally? + is_local = load_locally or os.path.isdir(hub_or_local_path) + if not is_local: + model_path = hf_hub_download( + hub_or_local_path, + filename=model_filename, + cache_dir=cache_dir, + resume_download=True, + ) + else: + model_path = os.path.join(hub_or_local_path, model_filename) + # Get state dict from the model + state_dict = torch.load(model_path, map_location=torch.device("cpu")) + model.load_state_dict(state_dict) + if device: + model.to(device) + if save_path: + model.save(save_path) + # Load the preprocessor(s) + if load_preprocessor: + preprocessor = Preprocessor.load(hub_or_local_path, force_return_dict=True, cache_dir=cache_dir) + model.preprocessor = preprocessor + return model
+ + +
+[docs] + def load_state_dict(self, state_dict: Mapping[str, Any], **kwargs): + """ + Flexibly load the state dict to the model. + + Any incompatible or missing key is ignored and other layer weights are + loaded. In that case a warning with additional info is raised. + + Args: + state_dict: Model state dict + """ + if len(self.skip_keys_on_load): + for key in self.skip_keys_on_load: + if key in state_dict: + state_dict.pop(key, None) # noqa + try: + super().load_state_dict(state_dict, strict=True) + except RuntimeError: + compatible_state_dict = OrderedDict() + src_state_dict = self.state_dict() + + incompatible_keys = [] + + for (src_key, src_weight), (trg_key, trg_weight) in zip(src_state_dict.items(), state_dict.items()): + if src_weight.shape == trg_weight.shape: + compatible_state_dict[src_key] = trg_weight + else: + # put the source key and weight if trg weight is incompatible + compatible_state_dict[src_key] = src_weight + incompatible_keys.append(src_key) + + missing_keys, _ = super().load_state_dict(compatible_state_dict, strict=False) + if len(missing_keys) or len(incompatible_keys): + logger.warning( + "Partially loading the weights as the model architecture and the given state dict are " + "incompatible! \nIgnore this warning in case you plan on fine-tuning this model\n" + f"Incompatible keys: {incompatible_keys}\n" + f"Missing keys: {missing_keys}\n" + )
+ + +
+[docs] + def save( + self, + path: str | os.PathLike, + filename: Optional[str] = None, + save_preprocessor: Optional[bool] = True, + config_filename: Optional[str] = None, + ): + """ + Save model weights and config to a local path + + Args: + path: A local directory to save model, config, etc. + save_preprocessor: Whether to save preprocessor(s) along with the model or not + config_filename: Model config filename, + filename: Model weights filename + + Returns: + Path to the saved model + """ + # save model and config to the repo + config_filename = config_filename or self.config_filename + filename = filename or self.model_filename + os.makedirs(path, exist_ok=True) + + self.config.save(save_dir=path, filename=config_filename) + + model_save_path = os.path.join(path, filename) + torch.save(self.state_dict(), model_save_path) + + if save_preprocessor: + if self.preprocessor is not None: + self.preprocessor.save(path)
+ + +
+[docs] + def push_to_hub( + self, + repo_id: str, + filename: Optional[str] = None, + config_filename: Optional[str] = None, + push_preprocessor: Optional[bool] = True, + commit_message: Optional[str] = None, + private: Optional[bool] = False, + ): + """ + Push the model and required files to the hub + + Args: + repo_id: The path (id or repo name) on the hub + filename: Model file name + config_filename: Config file name + push_preprocessor: Whether to push preprocessor(s) or not + commit_message (str): Commit message for this push + private (bool): Whether to create a private repo or not + """ + config_filename = config_filename or self.config_filename + filename = filename or self.model_filename + + # create remote repo + create_repo(repo_id, repo_type="model", exist_ok=True, private=private) + + # save to tmp and prepare for push + cache_path = tempfile.mkdtemp() + self.save(cache_path, filename=filename, config_filename=config_filename) + if commit_message is None: + commit_message = "Hezar: Upload model and config" + + # upload config file + self.config.push_to_hub( + repo_id, + filename=config_filename, + repo_type="model", + commit_message=commit_message, + ) + # upload preprocessor(s) + if push_preprocessor: + if self.preprocessor is not None: + self.preprocessor.push_to_hub(repo_id, commit_message=commit_message, private=private) + + # upload model file + weights_path = os.path.join(cache_path, filename) + upload_file( + path_or_fileobj=weights_path, + path_in_repo=filename, + repo_id=repo_id, + commit_message=commit_message, + ) + + logger.log_upload_success( + name=f"{self.__class__.__name__}(name={self.config.name})", + target_path=os.path.join(repo_id, filename), + )
+ + +
+[docs] + def forward(self, *model_inputs, **kwargs) -> Dict: + """ + Forward inputs through the model and return logits, etc. + + Args: + model_inputs: The required inputs for the model forward + + Returns: + A dict of outputs like logits, loss, etc. + """ + raise NotImplementedError
+ + +
+[docs] + def compute_loss(self, inputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: + """ + Compute loss on the model outputs against the given labels + + Args: + inputs: Input tensor to compute loss on + targets: Target tensor + + Returns: + Loss tensor + """ + raise NotImplementedError
+ + +
+[docs] + def generate(self, *model_inputs, **kwargs) -> torch.Tensor: + """ + Generation method for all generative models. Generative models have the `is_generative` attribute set to True. + The behavior of this method is usually controlled by `generation` part of the model's config. + + Args: + model_inputs: Model inputs for generation, usually the same as forward's `model_inputs` + **kwargs: Generation kwargs + + Returns: + Generated output tensor + """ + raise NotImplementedError
+ + +
+[docs] + def preprocess(self, *raw_inputs: Any | List[Any], **kwargs): + """ + Given raw inputs, preprocess the inputs and prepare them for model's `forward()`. + + Args: + raw_inputs: Raw model inputs + **kwargs: Extra kwargs specific to the model. See the model's specific class for more info + + Returns: + A dict of inputs for model forward + """ + return raw_inputs
+ + +
+[docs] + def post_process(self, *model_outputs: Dict[str, torch.Tensor] | torch.Tensor, **kwargs): + """ + Process model outputs and return human-readable results. Called in `self.predict()` + + Args: + model_outputs: model outputs to process + **kwargs: extra arguments specific to the derived class + + Returns: + Processed model output values and converted to human-readable results + """ + return model_outputs
+ + +
+[docs] + @torch.inference_mode() + def predict( + self, + inputs: Any | List[Any], + device: str | torch.device = None, + unpack_forward_inputs: bool = True, + **kwargs, + ) -> Dict | List[Dict] | torch.Tensor | Iterable | ModelOutput: + """ + Perform an end-to-end prediction on raw inputs. + + If the model is a generative model, it has to implement the `generate()` method too which will be called + instead of `forward()`. (`forward()` method is called internally within the `generate()` method) + + Args: + inputs: Raw inputs e.g, a list of texts, path to images, etc. + device: What device to perform inference on + unpack_forward_inputs: Whether to unpack forward inputs. Set to False if you want to send preprocess outputs + directly to the forward/generate method without unpacking it. Note that this only applies to the cases that + the preprocess method's output is a dict-like/mapping object. + **kwargs: Other arguments for `preprocess`, `forward`, `generate` and `post_process`. each will be passed to + the correct method automatically. + + Returns: + Output dict of results + """ + # Unpack kwargs for each step + preprocess_kwargs, forward_kwargs, post_process_kwargs = self._unpack_prediction_kwargs(**kwargs) + invalid_kwargs = { + k: v for k, v in kwargs.items() if k not in {**preprocess_kwargs, **forward_kwargs, **post_process_kwargs} + } + if len(invalid_kwargs): + logger.warning( + f"Unrecognized arguments {list(invalid_kwargs.keys())} passed to `predict` method for " + f"`{self.__class__.__name__}`" + ) + + # Put model in eval mode + self.eval() + + # Preprocessing step + model_inputs = self.preprocess(inputs, **preprocess_kwargs) + + # Map inputs and model to device + device = device or self.device + model_inputs = self._move_inputs_to_device(model_inputs, device) + self.to(device) + + # Specify model inference function + inference_fn = self.generate if self.is_generative else self.__call__ + + # Model inference step (forward for regular models and generate for generative models) + if isinstance(model_inputs, Mapping) and unpack_forward_inputs: + model_outputs = inference_fn(**model_inputs, **forward_kwargs) + else: + model_outputs = inference_fn(model_inputs, **forward_kwargs) + + # Post-processing step + processed_outputs = self.post_process(model_outputs, **post_process_kwargs) + return processed_outputs
+ + + @staticmethod + def _move_inputs_to_device(inputs, device): + """ + Move all input tensors in the inputs to the device + + Args: + inputs: A torch.Tensor or a batch dict that contains tensors in its values + device: A torch compatible device + + Returns: + Same inputs moved to the device + """ + if isinstance(inputs, torch.Tensor): + inputs = inputs.to(device) + elif isinstance(inputs, Mapping): + inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()} + else: + raise ValueError( + f"Cannot move inputs of type `{type(inputs)}` to `{device}`. " + f"Inputs data type must be either `torch.Tensor` or a mapping object like `dict`!" + ) + return inputs + + def _unpack_prediction_kwargs(self, **kwargs): + """ + The `predict` method can accept extra parameters for each of the `preprocess`, `forward/generate` + and `post_process` methods. These parameters are passed as keyword arguments so that we have to make sure that + each of them are passed to the correct method. + + Args: + **kwargs: The kwargs to be unpacked + + Returns: + A 3-sized tuple of (preprocess_kwargs, forward_kwargs, post_process_kwargs) + """ + # Whether to use forward or generate based on model type + inference_fn = type(self).generate if self.is_generative else type(self).forward + + def _get_positional_kwargs(fn): + params = dict(inspect.signature(fn).parameters) + params = {k: v for k, v in params.items() if v.default != v.empty} + return params + + # Get keyword arguments from the child class (ignore positional arguments) + preprocess_kwargs_keys = list(_get_positional_kwargs(type(self).preprocess).keys()) + post_process_kwargs_keys = list(_get_positional_kwargs(type(self).post_process).keys()) + forward_kwargs_keys = list(_get_positional_kwargs(inference_fn).keys()) + + preprocess_kwargs = {k: kwargs.get(k) for k in preprocess_kwargs_keys if k in kwargs} + forward_kwargs = {k: kwargs.get(k) for k in forward_kwargs_keys if k in kwargs} + post_process_kwargs = {k: kwargs.get(k) for k in post_process_kwargs_keys if k in kwargs} + + return preprocess_kwargs, forward_kwargs, post_process_kwargs + + @property + def device(self): + """ + Get the model's device. This method is only safe when all weights of the model are on the same device. + """ + return next(self.parameters()).device + + @property + def criterion(self): + return self._criterion + + @criterion.setter + def criterion(self, value): + if isinstance(value, str): + self._criterion = self._set_criterion(value) + elif isinstance(value, nn.Module): + self._criterion = value + else: + raise ValueError(f"Criterion value must be either a name or a PyTorch `nn.Module`, got {type(value)}!") + + @property + def preprocessor(self) -> PreprocessorsContainer: + return self._preprocessor + + @preprocessor.setter + def preprocessor(self, value: Preprocessor | PreprocessorsContainer | List[Preprocessor]): + """ + A safe setter method for model's preprocessor. Value must be either a Preprocessor, a list of Preprocessors or + a PreprocessorsContainer instance. + """ + if isinstance(value, Preprocessor): + preprocessor = PreprocessorsContainer() + preprocessor[value.config.name] = value + elif isinstance(value, Mapping): + preprocessor = PreprocessorsContainer(**value) + elif isinstance(value, list): + preprocessor = PreprocessorsContainer(**{p.config.name: p for p in value}) + elif value is None: + preprocessor = None + else: + raise ValueError( + f"Preprocessor value must be a `Preprocessor` " + f"or a list of Preprocessor objects" + f"or `PreprocessorContainer` instance not `{type(value)}`!" + ) + self._preprocessor = preprocessor + + @property + def num_parameters(self): + return sum(p.numel() for p in self.parameters()) + + @property + def num_trainable_parameters(self): + return sum(p.numel() for p in self.parameters() if p.requires_grad)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/model_outputs.html b/_modules/hezar/models/model_outputs.html new file mode 100644 index 00000000..d2ddc39c --- /dev/null +++ b/_modules/hezar/models/model_outputs.html @@ -0,0 +1,610 @@ + + + + + + + + hezar.models.model_outputs - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.model_outputs

+"""
+Define all model outputs here
+"""
+from dataclasses import asdict, dataclass
+from typing import Dict, List, Optional
+
+
+
+[docs] +@dataclass +class ModelOutput: + """ + Base class for all models' prediction outputs (`model.predict()`/`model.post_process()` outputs). + + Note that prediction outputs must all be a list of `ModelOutput` objects since we consider only batch inferences. + + The helper functions in the class enable it to be treated as a mapping or a dict object. + """ + +
+[docs] + def dict(self): + return asdict(self)
+ + + def __str__(self): + return str({k: v for k, v in self.dict().items() if v is not None}) + + def __repr__(self): + return str(self) + + def __getitem__(self, item): + try: + return self.dict()[item] + except KeyError: + raise AttributeError(f"`{self.__class__.__name__}` has no attribute `{item}`!") + + def __len__(self): + return len(self.dict()) + + def __iter__(self): + return iter(self.dict()) + +
+[docs] + def keys(self): + return list(self.dict().keys())
+ + +
+[docs] + def values(self): + return list(self.dict().values())
+ + +
+[docs] + def items(self): + return self.dict().items()
+
+ + + +
+[docs] +@dataclass(repr=False) +class LanguageModelingOutput(ModelOutput): + token: Optional[int] = None + sequence: Optional[str] = None + token_id: Optional[str] = None + score: Optional[float] = None
+ + + +
+[docs] +@dataclass(repr=False) +class TextClassificationOutput(ModelOutput): + label: Optional[str] = None + score: Optional[float] = None
+ + + +
+[docs] +@dataclass(repr=False) +class SequenceLabelingOutput(ModelOutput): + token: Optional[List[List[str]]] = None + label: Optional[List[List[str]]] = None + start: Optional[int] = None + end: Optional[int] = None + score: Optional[List[List[float]]] = None
+ + + +
+[docs] +@dataclass(repr=False) +class TextGenerationOutput(ModelOutput): + text: Optional[str] = None
+ + + +
+[docs] +@dataclass(repr=False) +class SpeechRecognitionOutput(ModelOutput): + text: Optional[str] = None + chunks: Optional[List[Dict]] = None
+ + + +
+[docs] +@dataclass(repr=False) +class Image2TextOutput(ModelOutput): + text: Optional[str] = None + score: Optional[str] = None
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling.html b/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling.html new file mode 100644 index 00000000..20e09244 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling.html @@ -0,0 +1,646 @@ + + + + + + + + hezar.models.sequence_labeling.bert.bert_sequence_labeling - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.bert.bert_sequence_labeling

+"""
+A BERT model for sequence labeling built using HuggingFace Transformers
+"""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+import torch.nn as nn
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import SequenceLabelingOutput
+from .bert_sequence_labeling_config import BertSequenceLabelingConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import BertConfig, BertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("bert_sequence_labeling", BertSequenceLabelingConfig) +class BertSequenceLabeling(Model): + """ + BERT model for sequence labeling + """ + + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + skip_keys_on_load = ["model.embeddings.position_ids", "bert.embeddings.position_ids"] # For older versions + + def __init__(self, config: BertSequenceLabelingConfig, **kwargs): + super().__init__(config, **kwargs) + self.bert = BertModel(self._build_inner_config()) + classifier_dropout = ( + config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob + ) + self.dropout = nn.Dropout(classifier_dropout) + self.classifier = nn.Linear(config.hidden_size, config.num_labels) + + def _build_inner_config(self): + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + bert_config = BertConfig(**self.config) + return bert_config + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ) -> Dict: + lm_outputs = self.bert( + token_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + sequence_output = lm_outputs[0] + + sequence_output = self.dropout(sequence_output) + logits = self.classifier(sequence_output) + + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + "tokens": kwargs.get("tokens", None), + "offsets": kwargs.get("offsets_mapping", None) + } + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(logits.view(-1, self.config.num_labels), labels.view(-1)) + return loss
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer( + inputs, + return_word_ids=True, + return_tokens=True, + return_offsets_mapping=True, + padding=True, + truncation=True, + return_tensors="pt", + device=self.device, + ) + return inputs
+ + +
+[docs] + def post_process( + self, + model_outputs: Dict[str, torch.Tensor], + return_offsets: bool = False, + return_scores: bool = False, + ): + logits = model_outputs["logits"].softmax(2) + tokens = model_outputs["tokens"] + offsets = model_outputs["offsets"] + probs, predictions = logits.max(2) + predictions = [[self.config.id2label[p.item()] for p in prediction] for prediction in predictions] + outputs = [] + for tokens_list, prediction, probs_, offsets_mapping in zip(tokens, predictions, probs, offsets): + results = [] + for token, label, prob, offset in zip(tokens_list, prediction, probs_, offsets_mapping): + if token not in self.config.prediction_skip_tokens: + token_results = {"token": token, "label": label} + if return_scores: + token_results["score"] = prob.item() + if return_offsets: + start, end = offset + token_results["start"] = start + token_results["end"] = end + results.append(SequenceLabelingOutput(**token_results)) + outputs.append(results) + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling_config.html b/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling_config.html new file mode 100644 index 00000000..758c93c0 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/bert/bert_sequence_labeling_config.html @@ -0,0 +1,523 @@ + + + + + + + + hezar.models.sequence_labeling.bert.bert_sequence_labeling_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.bert.bert_sequence_labeling_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class BertSequenceLabelingConfig(ModelConfig): + name = "bert_sequence_labeling" + task: str = TaskType.SEQUENCE_LABELING + num_labels: int = None + id2label: dict = None + vocab_size: int = 42000 + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + attention_probs_dropout_prob: float = 0.1 + max_position_embeddings: int = 512 + type_vocab_size: int = 2 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + pad_token_id: int = 0 + position_embedding_type: str = "absolute" + use_cache: bool = True + classifier_dropout: float = None + prediction_skip_tokens: list = field(default_factory=lambda: ["[SEP]", "[CLS]"])
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling.html b/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling.html new file mode 100644 index 00000000..2fa23290 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling.html @@ -0,0 +1,642 @@ + + + + + + + + hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling

+"""
+A DISTILBERT model for sequence labeling built using HuggingFace Transformers
+"""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+from torch import nn
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import SequenceLabelingOutput
+from .distilbert_sequence_labeling_config import DistilBertSequenceLabelingConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import DistilBertConfig, DistilBertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("distilbert_sequence_labeling", DistilBertSequenceLabelingConfig) +class DistilBertSequenceLabeling(Model): + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + + def __init__(self, config: DistilBertSequenceLabelingConfig, **kwargs): + super().__init__(config, **kwargs) + self.distilbert = DistilBertModel(self._build_inner_config()) + classifier_dropout = ( + config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob + ) + self.dropout = nn.Dropout(classifier_dropout) + self.classifier = nn.Linear(config.dim, config.num_labels) + + def _build_inner_config(self): + """ + Build the inner config for DistilBert. If `num_labels` is not provided, it will be inferred from `id2label`. + If only `num_labels` is provided, `id2label` will be inferred from `num_labels` using the default label names. + :return: + """ + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label is not None and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + config = DistilBertConfig(**self.config) + return config + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ) -> Dict: + lm_outputs = self.distilbert( + input_ids=token_ids, + attention_mask=attention_mask, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + sequence_output = lm_outputs[0] + + sequence_output = self.dropout(sequence_output) + logits = self.classifier(sequence_output) + + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + "tokens": kwargs.get("tokens", None), + "offsets": kwargs.get("offsets_mapping", None) + } + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(logits.view(-1, self.config.num_labels), labels.view(-1)) + return loss
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer( + inputs, + return_word_ids=True, + return_tokens=True, + return_offsets_mapping=True, + padding=True, + truncation=True, + return_tensors="pt", + device=self.device, + ) + return inputs
+ + +
+[docs] + def post_process( + self, + model_outputs: Dict[str, torch.Tensor], + return_offsets: bool = False, + return_scores: bool = False, + ): + logits = model_outputs["logits"].softmax(2) + tokens = model_outputs["tokens"] + offsets = model_outputs["offsets"] + probs, predictions = logits.max(2) + predictions = [[self.config.id2label[p.item()] for p in prediction] for prediction in predictions] + outputs = [] + for tokens_list, prediction, probs_, offsets_mapping in zip(tokens, predictions, probs, offsets): + results = [] + for token, label, prob, offset in zip(tokens_list, prediction, probs_, offsets_mapping): + if token not in self.config.prediction_skip_tokens: + token_results = {"token": token, "label": label} + if return_scores: + token_results["score"] = prob.item() + if return_offsets: + start, end = offset + token_results["start"] = start + token_results["end"] = end + results.append(SequenceLabelingOutput(**token_results)) + outputs.append(results) + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling_config.html b/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling_config.html new file mode 100644 index 00000000..8d9caa97 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/distilbert/distilbert_sequence_labeling_config.html @@ -0,0 +1,524 @@ + + + + + + + + hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class DistilBertSequenceLabelingConfig(ModelConfig): + name = "distilbert_sequence_labeling" + task: str = TaskType.SEQUENCE_LABELING + num_labels: int = None + id2label: dict = None + activation: str = "gelu" + attention_dropout: float = 0.1 + dim: int = 768 + dropout: float = 0.1 + initializer_range: float = 0.02 + max_position_embeddings: int = 512 + n_heads: int = 12 + n_layers: int = 6 + output_past: bool = True + pad_token_id: int = 0 + qa_dropout: float = 0.1 + tie_weights_: bool = True + vocab_size: int = 42000 + + hidden_dropout_prob: float = 0.1 + use_cache: bool = True + classifier_dropout: float = None + prediction_skip_tokens: list = field(default_factory=lambda: ["[SEP]", "[CLS]"])
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling.html b/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling.html new file mode 100644 index 00000000..ca3a1062 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling.html @@ -0,0 +1,673 @@ + + + + + + + + hezar.models.sequence_labeling.roberta.roberta_sequence_labeling - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.roberta.roberta_sequence_labeling

+"""
+A RoBERTa Language Model (HuggingFace Transformers) wrapped by a Hezar Model class
+"""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+from torch import nn, tanh
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import SequenceLabelingOutput
+from .roberta_sequence_labeling_config import RobertaSequenceLabelingConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import RobertaConfig, RobertaModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("roberta_sequence_labeling", config_class=RobertaSequenceLabelingConfig) +class RobertaSequenceLabeling(Model): + """ + A standard 🤗Transformers RoBERTa model for sequence labeling + + Args: + config: The whole model config including arguments needed for the inner 🤗Transformers model. + """ + + required_backends = _required_backends + tokenizer_name = "bpe_tokenizer" + skip_keys_on_load = ["roberta.embeddings.position_ids", "model.embeddings.position_ids"] + + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.roberta = RobertaModel(self._build_inner_config(), add_pooling_layer=False) + self.classifier = RobertaClassificationHead(self.config) + + def _build_inner_config(self): + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + config = RobertaConfig(**self.config) + return config + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + **kwargs, + ): + lm_outputs = self.roberta( + token_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = lm_outputs[0] + logits = self.classifier(sequence_output) + + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + "tokens": kwargs.get("tokens", None), + "offsets": kwargs.get("offsets_mapping", None) + } + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(logits.view(-1, self.config.num_labels), labels.view(-1)) + return loss
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer( + inputs, + return_word_ids=True, + return_tokens=True, + return_offsets_mapping=True, + padding=True, + truncation=True, + return_tensors="pt", + device=self.device, + ) + return inputs
+ + +
+[docs] + def post_process( + self, + model_outputs: Dict[str, torch.Tensor], + return_offsets: bool = False, + return_scores: bool = False, + ): + logits = model_outputs["logits"].softmax(2) + tokens = model_outputs["tokens"] + offsets = model_outputs["offsets"] + probs, predictions = logits.max(2) + predictions = [[self.config.id2label[p.item()] for p in prediction] for prediction in predictions] + outputs = [] + for tokens_list, prediction, probs_, offsets_mapping in zip(tokens, predictions, probs, offsets): + results = [] + for token, label, prob, offset in zip(tokens_list, prediction, probs_, offsets_mapping): + if token not in self.config.prediction_skip_tokens: + token_results = {"token": token, "label": label} + if return_scores: + token_results["score"] = prob.item() + if return_offsets: + start, end = offset + token_results["start"] = start + token_results["end"] = end + results.append(SequenceLabelingOutput(**token_results)) + outputs.append(results) + return outputs
+
+ + + +
+[docs] +class RobertaClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + classifier_dropout = ( + config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob + ) + self.dropout = nn.Dropout(classifier_dropout) + self.out_proj = nn.Linear(config.hidden_size, config.num_labels) + +
+[docs] + def forward(self, inputs, **kwargs): + x = inputs # Apply to all tokens + x = self.dropout(x) + x = self.dense(x) + x = tanh(x) + x = self.dropout(x) + x = self.out_proj(x) + return x
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling_config.html b/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling_config.html new file mode 100644 index 00000000..1549b4f6 --- /dev/null +++ b/_modules/hezar/models/sequence_labeling/roberta/roberta_sequence_labeling_config.html @@ -0,0 +1,527 @@ + + + + + + + + hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config

+from dataclasses import dataclass
+from typing import Tuple
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class RobertaSequenceLabelingConfig(ModelConfig): + name = "roberta_sequence_labeling" + task: str = TaskType.SEQUENCE_LABELING + num_labels: int = None + id2label: dict = None + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + classifier_dropout: float = None + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 1 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000 + prediction_skip_tokens: Tuple[str] = ("<s>", "</s>, <pad>")
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/speech_recognition/whisper/whisper_feature_extractor.html b/_modules/hezar/models/speech_recognition/whisper/whisper_feature_extractor.html new file mode 100644 index 00000000..83f78ffc --- /dev/null +++ b/_modules/hezar/models/speech_recognition/whisper/whisper_feature_extractor.html @@ -0,0 +1,692 @@ + + + + + + + + hezar.models.speech_recognition.whisper.whisper_feature_extractor - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.speech_recognition.whisper.whisper_feature_extractor

+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List
+
+import numpy as np
+import torch
+
+from ....preprocessors import AudioFeatureExtractor, AudioFeatureExtractorConfig
+from ....registry import register_preprocessor
+from ....utils import (
+    Logger,
+    convert_batch_dict_dtype,
+    mel_filter_bank,
+    spectrogram,
+    window_function,
+)
+
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class WhisperFeatureExtractorConfig(AudioFeatureExtractorConfig): + name = "whisper_feature_extractor" + feature_size: int = 80 + sampling_rate: int = 16000 + hop_length: int = 160 + chunk_length: int = 30 + n_fft: int = 400 + padding: str = "longest" + padding_value: float = 0.0 + padding_side: str = "right" + return_attention_mask: bool = False
+ + + +
+[docs] +@register_preprocessor("whisper_feature_extractor", config_class=WhisperFeatureExtractorConfig) +class WhisperFeatureExtractor(AudioFeatureExtractor): + """ + A feature extractor for Whisper model. + + This feature extractor inherits from `AudioFeatureExtractor` which contains most of the main methods. + + This class extracts mel-filter bank features from raw speech using a custom numpy implementation of the `Short Time + Fourier Transform` which should match pytorch's `torch.stft` equivalent. + """ + + def __init__(self, config: WhisperFeatureExtractorConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.n_samples = self.config.chunk_length * self.config.sampling_rate + self.nb_max_frames = self.n_samples // self.config.hop_length + self.mel_filters = mel_filter_bank( + num_frequency_bins=1 + self.config.n_fft // 2, + num_mel_filters=self.config.feature_size, + min_frequency=0.0, + max_frequency=8000.0, + sampling_rate=self.config.sampling_rate, + norm="slaney", + mel_scale="slaney", + ) + + def __call__( + self, + raw_speech: np.ndarray | List[float] | List[np.ndarray] | List[List[float]], + device: str = None, + truncation: bool = True, + pad_to_multiple_of: int = None, + return_tensors: str = None, + return_attention_mask: bool = None, + padding: str = "max_length", + max_length: int = None, + sampling_rate: int = None, + do_normalize: bool = None, + **kwargs, + ): + sampling_rate = sampling_rate or self.config.sampling_rate + if sampling_rate is not None: + if sampling_rate != self.config.sampling_rate: + raise ValueError( + f"The model corresponding to this feature extractor: {self.__class__.__name__} was trained using a" + f" sampling rate of {self.config.sampling_rate}. Please make sure that the provided `raw_speech` " + f"input was sampled with {self.config.sampling_rate} and not {sampling_rate}." + ) + else: + logger.warning( + "It is strongly recommended to pass the `sampling_rate` argument to this function. " + "Failing to do so can result in silent errors that might be hard to debug." + ) + + is_batched_numpy = isinstance(raw_speech, np.ndarray) and len(raw_speech.shape) > 1 + if is_batched_numpy and len(raw_speech.shape) > 2: + raise ValueError(f"Only mono-channel audio is supported for input to {self}") + is_batched = is_batched_numpy or ( + isinstance(raw_speech, (list, tuple)) and (isinstance(raw_speech[0], (np.ndarray, tuple, list))) + ) + + if is_batched: + raw_speech = [np.asarray([speech], dtype=np.float32).T for speech in raw_speech] + elif not is_batched and not isinstance(raw_speech, np.ndarray): + raw_speech = np.asarray(raw_speech, dtype=np.float32) + elif isinstance(raw_speech, np.ndarray) and raw_speech.dtype is np.dtype(np.float64): + raw_speech = raw_speech.astype(np.float32) + + # always return batch + if not is_batched: + raw_speech = [np.asarray([raw_speech]).T] + + batched_speech = {"input_features": raw_speech} + + padded_inputs = self.pad( + batched_speech, + padding=padding, + max_length=max_length if max_length else self.n_samples, + truncation=truncation, + pad_to_multiple_of=pad_to_multiple_of, + return_attention_mask=return_attention_mask or do_normalize, + return_tensors="np", + ) + + # zero-mean and unit-variance normalization + if do_normalize: + padded_inputs["input_features"] = self.zero_mean_unit_var_norm( + padded_inputs["input_features"], + attention_mask=padded_inputs["attention_mask"], + padding_value=self.config.padding_value, + ) + padded_inputs["input_features"] = np.stack(padded_inputs["input_features"], axis=0) + + # make sure list is in array format + input_features = padded_inputs.get("input_features").transpose(2, 0, 1) + + input_features = [self._np_extract_fbank_features(waveform) for waveform in input_features[0]] + + if isinstance(input_features[0], List): + padded_inputs["input_features"] = [np.asarray(feature, dtype=np.float32) for feature in input_features] + else: + padded_inputs["input_features"] = input_features + + if return_attention_mask: + # rescale from sample (48000) to feature (3000) + padded_inputs["attention_mask"] = padded_inputs["attention_mask"][:, :: self.config.hop_length] + + if return_tensors is not None: + padded_inputs = {k: np.asarray(v) for k, v in padded_inputs.items()} + padded_inputs = convert_batch_dict_dtype(padded_inputs, dtype=return_tensors) + if device: + padded_inputs = { + k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in padded_inputs.items() + } + + return padded_inputs + + def _np_extract_fbank_features(self, waveform: np.array) -> np.ndarray: + """ + Compute the log-mel spectrogram of the provided audio, gives similar results to Whisper's original torch + implementation with 1e-5 tolerance. + """ + log_spec = spectrogram( + waveform, + window_function(self.config.n_fft, "hann"), + frame_length=self.config.n_fft, + hop_length=self.config.hop_length, + power=2.0, + mel_filters=self.mel_filters, + log_mel="log10", + ) + log_spec = log_spec[:, :-1] + log_spec = np.maximum(log_spec, log_spec.max() - 8.0) + log_spec = (log_spec + 4.0) / 4.0 + return log_spec + +
+[docs] + @staticmethod + def zero_mean_unit_var_norm( + input_values: List[np.ndarray], attention_mask: List[np.ndarray], padding_value: float = 0.0 + ) -> List[np.ndarray]: + """ + Every array in the list is normalized to have zero mean and unit variance + """ + if attention_mask is not None: + attention_mask = np.array(attention_mask, np.int32) + normed_input_values = [] + + for vector, length in zip(input_values, attention_mask.sum(-1)): + normed_slice = (vector - vector[:length].mean()) / np.sqrt(vector[:length].var() + 1e-7) + if length < normed_slice.shape[0]: + normed_slice[length:] = padding_value + + normed_input_values.append(normed_slice) + else: + normed_input_values = [(x - x.mean()) / np.sqrt(x.var() + 1e-7) for x in input_values] + + return normed_input_values
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition.html b/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition.html new file mode 100644 index 00000000..cb99dc61 --- /dev/null +++ b/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition.html @@ -0,0 +1,705 @@ + + + + + + + + hezar.models.speech_recognition.whisper.whisper_speech_recognition - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.speech_recognition.whisper.whisper_speech_recognition

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available, load_audio_files
+from ...model import Model
+from ...model_outputs import SpeechRecognitionOutput
+from .whisper_speech_recognition_config import WhisperSpeechRecognitionConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import WhisperConfig, WhisperForConditionalGeneration
+
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+    Backends.LIBROSA,
+]
+
+
+
+[docs] +@register_model("whisper_speech_recognition", config_class=WhisperSpeechRecognitionConfig) +class WhisperSpeechRecognition(Model): + """ + Whisper model for automatic speech recognition + """ + + is_generative = True + required_backends = _required_backends + feature_extractor_name = "whisper_feature_extractor" + tokenizer_name = "whisper_bpe_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: WhisperSpeechRecognitionConfig, **kwargs): + super().__init__(config, **kwargs) + self.whisper = WhisperForConditionalGeneration(WhisperConfig(**self.config)) + +
+[docs] + def forward( + self, + input_features, + attention_mask=None, + decoder_input_ids=None, + decoder_attention_mask=None, + head_mask=None, + decoder_head_mask=None, + cross_attn_head_mask=None, + encoder_outputs=None, + past_key_values=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ): + outputs = self.whisper( + input_features=input_features, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + loss = self.criterion(logits.view(-1, self.config.vocab_size), labels.view(-1)) + return loss
+ + +
+[docs] + def generate( + self, + input_features, + forced_decoder_ids=None, + generation_config=None, + logits_processor=None, + stopping_criteria=None, + prefix_allowed_tokens_fn=None, + synced_gpus=None, + return_timestamps=None, + task=None, + language=None, + is_multilingual=None, + prompt_ids=None, + **kwargs, + ): + generation_outputs = self.whisper.generate( + inputs=input_features, + generation_config=generation_config, + logits_processor=logits_processor, + stopping_criteria=stopping_criteria, + prefix_allowed_tokens_fn=prefix_allowed_tokens_fn, + synced_gpus=synced_gpus, + return_timestamps=return_timestamps, + task=task, + language=language, + is_multilingual=is_multilingual, + prompt_ids=prompt_ids, + forced_decoder_ids=forced_decoder_ids, + **kwargs, + ) + return generation_outputs
+ + +
+[docs] + def prepare_inputs_for_generation( + self, + decoder_input_ids, + past_key_values=None, + use_cache=None, + encoder_outputs=None, + attention_mask=None, + **kwargs, + ): + return self.whisper.prepare_inputs_for_generation( + decoder_input_ids=decoder_input_ids, + past_key_values=past_key_values, + use_cache=use_cache, + encoder_outputs=encoder_outputs, + attention_mask=attention_mask, + **kwargs, + )
+ + +
+[docs] + def get_encoder(self): + return self.whisper.get_encoder()
+ + +
+[docs] + def get_decoder(self): + return self.whisper.get_decoder()
+ + +
+[docs] + def resize_token_embeddings(self, new_num_tokens: int) -> torch.nn.Embedding: + new_embeddings = self.whisper.resize_token_embeddings(new_num_tokens) + return new_embeddings
+ + +
+[docs] + def get_output_embeddings(self): + return self.whisper.proj_out
+ + +
+[docs] + def set_output_embeddings(self, new_embeddings): + self.whisper.set_output_embeddings(new_embeddings)
+ + +
+[docs] + def get_input_embeddings(self) -> torch.nn.Module: + return self.whisper.get_input_embeddings()
+ + +
+[docs] + def freeze_encoder(self): + self.whisper.freeze_encoder()
+ + +
+[docs] + def preprocess(self, inputs: str | np.ndarray | List[np.ndarray] | List[str], **kwargs): + if isinstance(inputs, str) or (isinstance(inputs, List) and isinstance(inputs[0], str)): + inputs = load_audio_files(inputs) + + tokenizer = self.preprocessor[self.tokenizer_name] + feature_extractor = self.preprocessor[self.feature_extractor_name] + + forced_decoder_ids = tokenizer.get_decoder_prompt_ids(language="persian", task="transcribe") + inputs = feature_extractor(inputs, sampling_rate=self.config.sampling_rate, return_tensors="pt") + inputs["forced_decoder_ids"] = forced_decoder_ids + return inputs
+ + +
+[docs] + def post_process(self, model_outputs, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + transcripts = tokenizer.decode(model_outputs, decode_with_timestamps=True, skip_special_tokens=True) + outputs = [SpeechRecognitionOutput(text=transcript) for transcript in transcripts] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition_config.html b/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition_config.html new file mode 100644 index 00000000..be4114b6 --- /dev/null +++ b/_modules/hezar/models/speech_recognition/whisper/whisper_speech_recognition_config.html @@ -0,0 +1,543 @@ + + + + + + + + hezar.models.speech_recognition.whisper.whisper_speech_recognition_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.speech_recognition.whisper.whisper_speech_recognition_config

+from dataclasses import dataclass, field
+from typing import List
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class WhisperSpeechRecognitionConfig(ModelConfig): + name = "whisper_speech_recognition" + vocab_size: int = 51865 + num_mel_bins: int = 80 + encoder_layers: int = 6 + encoder_attention_heads: int = 4 + decoder_layers: int = 6 + decoder_attention_heads: int = 4 + num_hidden_layers: int = 12 + decoder_ffn_dim: int = 1536 + encoder_ffn_dim: int = 1536 + encoder_layerdrop: float = 0.0 + decoder_layerdrop: float = 0.0 + decoder_start_token_id: int = 50257 + use_cache: bool = True + sampling_rate: int = 16000 + is_encoder_decoder: bool = True + activation_function: str = "gelu" + d_model: int = 256 + dropout: float = 0.0 + torch_dtype: str = "float32" + attention_dropout: float = 0.0 + activation_dropout: float = 0.0 + init_std: float = 0.02 + scale_embedding: bool = False + max_source_positions: int = 1500 + max_target_positions: int = 448 + pad_token_id: int = 50256 + bos_token_id: int = 50257 + eos_token_id: int = 50256 + suppress_tokens: List[int] = None + begin_suppress_tokens: List[int] = field(default_factory=lambda: [220, 50256]) + use_weighted_layer_sum: bool = False + classifier_proj_size: int = 256 + apply_spec_augment: bool = False + mask_time_prob: float = 0.05 + mask_time_length: int = 10 + mask_time_min_masks: int = 2 + mask_feature_prob: float = 0.0 + mask_feature_length: int = 10 + mask_feature_min_masks: int = 0 + max_new_tokens: int = 448
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/speech_recognition/whisper/whisper_tokenizer.html b/_modules/hezar/models/speech_recognition/whisper/whisper_tokenizer.html new file mode 100644 index 00000000..38b176cc --- /dev/null +++ b/_modules/hezar/models/speech_recognition/whisper/whisper_tokenizer.html @@ -0,0 +1,1236 @@ + + + + + + + + hezar.models.speech_recognition.whisper.whisper_tokenizer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.speech_recognition.whisper.whisper_tokenizer

+from dataclasses import dataclass, field
+from typing import List
+
+import numpy as np
+
+from ....constants import Backends
+from ....preprocessors import BPEConfig, BPETokenizer
+from ....registry import register_preprocessor
+from ....utils import Logger, is_backend_available
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import processors
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+logger = Logger(__name__)
+
+LANGUAGES = {
+    "en": "english",
+    "zh": "chinese",
+    "de": "german",
+    "es": "spanish",
+    "ru": "russian",
+    "ko": "korean",
+    "fr": "french",
+    "ja": "japanese",
+    "pt": "portuguese",
+    "tr": "turkish",
+    "pl": "polish",
+    "ca": "catalan",
+    "nl": "dutch",
+    "ar": "arabic",
+    "sv": "swedish",
+    "it": "italian",
+    "id": "indonesian",
+    "hi": "hindi",
+    "fi": "finnish",
+    "vi": "vietnamese",
+    "he": "hebrew",
+    "uk": "ukrainian",
+    "el": "greek",
+    "ms": "malay",
+    "cs": "czech",
+    "ro": "romanian",
+    "da": "danish",
+    "hu": "hungarian",
+    "ta": "tamil",
+    "no": "norwegian",
+    "th": "thai",
+    "ur": "urdu",
+    "hr": "croatian",
+    "bg": "bulgarian",
+    "lt": "lithuanian",
+    "la": "latin",
+    "mi": "maori",
+    "ml": "malayalam",
+    "cy": "welsh",
+    "sk": "slovak",
+    "te": "telugu",
+    "fa": "persian",
+    "lv": "latvian",
+    "bn": "bengali",
+    "sr": "serbian",
+    "az": "azerbaijani",
+    "sl": "slovenian",
+    "kn": "kannada",
+    "et": "estonian",
+    "mk": "macedonian",
+    "br": "breton",
+    "eu": "basque",
+    "is": "icelandic",
+    "hy": "armenian",
+    "ne": "nepali",
+    "mn": "mongolian",
+    "bs": "bosnian",
+    "kk": "kazakh",
+    "sq": "albanian",
+    "sw": "swahili",
+    "gl": "galician",
+    "mr": "marathi",
+    "pa": "punjabi",
+    "si": "sinhala",
+    "km": "khmer",
+    "sn": "shona",
+    "yo": "yoruba",
+    "so": "somali",
+    "af": "afrikaans",
+    "oc": "occitan",
+    "ka": "georgian",
+    "be": "belarusian",
+    "tg": "tajik",
+    "sd": "sindhi",
+    "gu": "gujarati",
+    "am": "amharic",
+    "yi": "yiddish",
+    "lo": "lao",
+    "uz": "uzbek",
+    "fo": "faroese",
+    "ht": "haitian creole",
+    "ps": "pashto",
+    "tk": "turkmen",
+    "nn": "nynorsk",
+    "mt": "maltese",
+    "sa": "sanskrit",
+    "lb": "luxembourgish",
+    "my": "myanmar",
+    "bo": "tibetan",
+    "tl": "tagalog",
+    "mg": "malagasy",
+    "as": "assamese",
+    "tt": "tatar",
+    "haw": "hawaiian",
+    "ln": "lingala",
+    "ha": "hausa",
+    "ba": "bashkir",
+    "jw": "javanese",
+    "su": "sundanese",
+}
+
+# language code lookup by name, with a few language aliases
+TO_LANGUAGE_CODE = {
+    **{language: code for code, language in LANGUAGES.items()},
+    "burmese": "my",
+    "valencian": "ca",
+    "flemish": "nl",
+    "haitian": "ht",
+    "letzeburgesch": "lb",
+    "pushto": "ps",
+    "panjabi": "pa",
+    "moldavian": "ro",
+    "moldovan": "ro",
+    "sinhalese": "si",
+    "castilian": "es",
+}
+
+TASK_IDS = ["translate", "transcribe"]
+
+ADDITIONAL_SPECIAL_TOKENS = [
+    "<|endoftext|>",
+    "<|endoftext|>",
+    "<|startoftranscript|>",
+    "<|en|>",
+    "<|zh|>",
+    "<|de|>",
+    "<|es|>",
+    "<|ru|>",
+    "<|ko|>",
+    "<|fr|>",
+    "<|ja|>",
+    "<|pt|>",
+    "<|tr|>",
+    "<|pl|>",
+    "<|ca|>",
+    "<|nl|>",
+    "<|ar|>",
+    "<|sv|>",
+    "<|it|>",
+    "<|id|>",
+    "<|hi|>",
+    "<|fi|>",
+    "<|vi|>",
+    "<|he|>",
+    "<|uk|>",
+    "<|el|>",
+    "<|ms|>",
+    "<|cs|>",
+    "<|ro|>",
+    "<|da|>",
+    "<|hu|>",
+    "<|ta|>",
+    "<|no|>",
+    "<|th|>",
+    "<|ur|>",
+    "<|hr|>",
+    "<|bg|>",
+    "<|lt|>",
+    "<|la|>",
+    "<|mi|>",
+    "<|ml|>",
+    "<|cy|>",
+    "<|sk|>",
+    "<|te|>",
+    "<|fa|>",
+    "<|lv|>",
+    "<|bn|>",
+    "<|sr|>",
+    "<|az|>",
+    "<|sl|>",
+    "<|kn|>",
+    "<|et|>",
+    "<|mk|>",
+    "<|br|>",
+    "<|eu|>",
+    "<|is|>",
+    "<|hy|>",
+    "<|ne|>",
+    "<|mn|>",
+    "<|bs|>",
+    "<|kk|>",
+    "<|sq|>",
+    "<|sw|>",
+    "<|gl|>",
+    "<|mr|>",
+    "<|pa|>",
+    "<|si|>",
+    "<|km|>",
+    "<|sn|>",
+    "<|yo|>",
+    "<|so|>",
+    "<|af|>",
+    "<|oc|>",
+    "<|ka|>",
+    "<|be|>",
+    "<|tg|>",
+    "<|sd|>",
+    "<|gu|>",
+    "<|am|>",
+    "<|yi|>",
+    "<|lo|>",
+    "<|uz|>",
+    "<|fo|>",
+    "<|ht|>",
+    "<|ps|>",
+    "<|tk|>",
+    "<|nn|>",
+    "<|mt|>",
+    "<|sa|>",
+    "<|lb|>",
+    "<|my|>",
+    "<|bo|>",
+    "<|tl|>",
+    "<|mg|>",
+    "<|as|>",
+    "<|tt|>",
+    "<|haw|>",
+    "<|ln|>",
+    "<|ha|>",
+    "<|ba|>",
+    "<|jw|>",
+    "<|su|>",
+    "<|translate|>",
+    "<|transcribe|>",
+    "<|startoflm|>",
+    "<|startofprev|>",
+    "<|nocaptions|>",
+    "<|notimestamps|>",
+]
+
+
+
+[docs] +@dataclass +class WhisperBPEConfig(BPEConfig): + name = "whisper_bpe_tokenizer" + unk_token: str = "<|endoftext|>" + bos_token: str = "<|startoftranscript|>" + eos_token: str = "<|endoftext|>" + translate_token: str = "<|translate|>" + transcribe_token: str = "<|transcribe|>" + notimestamps_token: str = "<|notimestamps|>" + additional_special_tokens: List = field(default_factory=lambda: ADDITIONAL_SPECIAL_TOKENS) + padding_direction: str = "right" + add_prefix_space: bool = False + add_bos_token: bool = False + model_max_length: int = 1024 + language: str = None + task: str = None + predict_timestamps: str = False
+ + + +
+[docs] +@register_preprocessor("whisper_bpe_tokenizer", config_class=WhisperBPEConfig) +class WhisperBPETokenizer(BPETokenizer): + required_backends = _required_backends + + def __init__(self, config, tokenizer_file=None, **kwargs): + super().__init__(config, tokenizer_file=tokenizer_file, **kwargs) + self.language = self.config.language + self.task = self.config.task + self.predict_timestamps = self.config.predict_timestamps + +
+[docs] + def decode( + self, + token_ids, + skip_special_tokens: bool = False, + output_offsets: bool = False, + time_precision=0.02, + decode_with_timestamps: bool = False, + **kwargs, + ): + """ + Override decode method to enable timestamps and offsets. + """ + text = super().decode(token_ids, skip_special_tokens=skip_special_tokens, **kwargs) + if decode_with_timestamps: + text = [ + self._decode_with_timestamps( + token_id, + time_precision=time_precision, + skip_special_tokens=skip_special_tokens, + ) + for token_id in token_ids + ] + # retrieve offsets + if output_offsets: + offsets = self._compute_offsets(token_ids, time_precision=time_precision) + return {"text": text, "offsets": offsets} + return text
+ + + def _decode_with_timestamps(self, token_ids, skip_special_tokens=False, time_precision=0.02) -> str: + """ + Timestamp tokens are above the special tokens' id range and are ignored by `decode()`. This method decodes + given tokens with timestamps tokens annotated, e.g. "<|1.08|>". + """ + timestamp_begin = self.token_to_id(self.config.notimestamps_token) + 1 + outputs = [[]] + for token in token_ids: + if token >= timestamp_begin: + timestamp = f"<|{(token - timestamp_begin) * time_precision:.2f}|>" + outputs.append(timestamp) + outputs.append([]) + else: + outputs[-1].append(token) + outputs = self.decode(outputs, skip_special_tokens=skip_special_tokens) + return "".join(outputs) + + def _compute_offsets(self, token_ids, time_precision=0.02): + """ + Compute offsets for a given tokenized input + + Args: + token_ids: + List of tokenized input ids. Can be obtained using the `__call__` method. + time_precision (`float`, `optional`, defaults to 0.02): + The time ratio to convert from token to time. + """ + offsets = [] + token_ids = np.array(token_ids) + if token_ids.shape[0] > 1 and len(token_ids.shape) > 1: + raise ValueError("Can only process a single input at a time") + timestamp_begin = self.token_to_id(self.config.notimestamps_token) + 1 + timestamp_tokens = token_ids >= timestamp_begin + + consecutive = np.where(timestamp_tokens[:-1] & timestamp_tokens[1:])[0] + 1 + if consecutive.shape[0] == 0 and timestamp_tokens.sum() <= 1: + # either there are no timestamps or there are no consecutive ones + return [] + elif np.where(timestamp_tokens)[0][-1] + 1 not in consecutive: + # we add the final timestamp if it is not already in the list + consecutive = np.append(consecutive, np.where(timestamp_tokens)[0][-1] + 1) + + last_slice = np.where(timestamp_tokens)[0][0] + for current_slice in consecutive: + sliced_tokens = token_ids[last_slice:current_slice] + if len(sliced_tokens) > 1: + start_timestamp_position = sliced_tokens[0].item() - timestamp_begin + end_timestamp_position = sliced_tokens[-1].item() - timestamp_begin + offsets.append( + { + "text": self.decode(sliced_tokens), + "timestamp": ( + start_timestamp_position * time_precision, + end_timestamp_position * time_precision, + ), + } + ) + last_slice = current_slice + + return offsets + +
+[docs] + def get_prompt_ids(self, text: str, return_tensors="np"): + """Converts prompt text to IDs that can be passed to [`~WhisperForConditionalGeneration.generate`].""" + batch_encoding = self([("<|startofprev|>", " " + text.strip())], add_special_tokens=False) + + # Check for special tokens + prompt_text_ids = batch_encoding["input_ids"][1:] + special_token_id = next((x for x in prompt_text_ids if x >= self.special_ids[0]), None) + if special_token_id is not None: + token = self.convert_ids_to_tokens(special_token_id) + raise ValueError(f"Encountered text in the prompt corresponding to disallowed special token: {token}.") + + batch_encoding.convert_to_tensors(tensor_type=return_tensors) + return batch_encoding["input_ids"]
+ + + @staticmethod + def _strip_prompt(token_ids, prompt_token_id: int, decoder_start_token_id: int): + has_prompt = isinstance(token_ids, list) and token_ids and token_ids[0] == prompt_token_id + if has_prompt: + if decoder_start_token_id in token_ids: + return token_ids[token_ids.index(decoder_start_token_id):] + else: + return [] + + return token_ids + +
+[docs] + def set_prefix_tokens(self, language: str = None, task: str = None, predict_timestamps: bool = None): + self.language = language if language is not None else self.language + self.task = task if task is not None else self.task + self.predict_timestamps = predict_timestamps if predict_timestamps is not None else self.predict_timestamps + + prefix_token_ids = self.prefix_tokens + prefixes = self.convert_ids_to_tokens(prefix_token_ids) + prefix_template = " ".join([f"{token}:0" for token in prefixes]) + self._tokenizer.post_processor = processors.TemplateProcessing( + single=f"{prefix_template} $A:0 {self.eos_token}:0", + pair=f"{prefix_template} $A:0 $B:1 {self.eos_token}:1", + special_tokens=[ + (self.eos_token, self.eos_token_id), + *zip(prefixes, prefix_token_ids), + ], + )
+ + + @property + def prefix_tokens(self) -> List[int]: + translate_token_id = self.token_to_id(self.config.translate_token) + transcribe_token_id = self.token_to_id(self.config.transcribe_token) + notimestamps_token_id = self.token_to_id(self.config.notimestamps_token) + langs = tuple(LANGUAGES.keys()) + + if self.language is not None: + self.language = self.language.lower() + if self.language in TO_LANGUAGE_CODE: + language_id = TO_LANGUAGE_CODE[self.language] + elif self.language in TO_LANGUAGE_CODE.values(): + language_id = self.language + else: + is_language_code = len(self.language) == 2 + raise ValueError( + f"Unsupported language: {self.language}. Language should be one of:" + f" {list(TO_LANGUAGE_CODE.values()) if is_language_code else list(TO_LANGUAGE_CODE.keys())}." + ) + + if self.task is not None: + if self.task not in TASK_IDS: + raise ValueError(f"Unsupported task: {self.task}. Task should be in: {TASK_IDS}") + + bos_sequence = [self.bos_token_id] + if self.language is not None: + bos_sequence.append(self.bos_token_id + 1 + langs.index(language_id)) + if self.task is not None: + bos_sequence.append(transcribe_token_id if self.task == "transcribe" else translate_token_id) + if not self.predict_timestamps: + bos_sequence.append(notimestamps_token_id) + return bos_sequence + +
+[docs] + def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True): + self.set_prefix_tokens(task=task, language=language, predict_timestamps=not no_timestamps) + # prefix tokens are of the form: <|startoftranscript|> <|lang_id|> <|task|> <|notimestamps|> + # we don't want to force the bos token at position 1, as this is the starting token + # when we generate, so we slice the prefix tokens to: <|lang_id|> <|task|> <|notimestamps|> + # to get the forced tokens + forced_tokens = self.prefix_tokens[1:] + forced_decoder_ids = [(rank + 1, token) for rank, token in enumerate(forced_tokens)] + return forced_decoder_ids
+ + + def _decode_asr(self, model_outputs, *, return_timestamps, return_language, time_precision): + """ + Internal method meant to only be used by asr pipeline. Handles all the little quirks specific to whisper + to handle the various options not allowed in other seq2seq models + """ + + # =========== Overview ============ + # - iterate over all outputs + # - all tokens within output + # - Each token can be + # - language token + # - special token + # - timestamp token + # - text token + # - We accumulate the text tokens. + # - We split on end timestamps + # - Lots of complexity comes from stride and timestamps + + last_language = None + + def new_chunk(): + return {"language": last_language, "timestamp": [None, None], "text": ""} + + # Welcome to the state machine ! + chunks = [] + chunk = new_chunk() + time_offset = 0.0 + timestamp_begin = self.convert_tokens_to_ids("<|notimestamps|>") + 1 + previous_tokens = [] + skip = False + right_stride_start = None + + all_special_ids = set(self.special_ids) + # - iterate over all outputs + for chunk_id, output in enumerate(model_outputs): + # We can drop everything to Python list, it's going to make + # our lives easier + token_ids = output["tokens"][0].tolist() + + # Those keep track of timestamps within strides + # Which need to be skipped and resolve all tokens in a single + # chunk. + last_timestamp = None + first_timestamp = timestamp_begin + + if "stride" in output: + chunk_len, stride_left, stride_right = output["stride"] + # Offset the timings to account for the other `model_outputs`. + time_offset -= stride_left + right_stride_start = chunk_len - stride_right + + # Keeping track of timestamps within strides + # We're going to NOT split on those, and delay until we're + # out of BOTH stride. Otherwise, lots of issues occur and + # corner cases + if stride_left: + first_timestamp = stride_left / time_precision + timestamp_begin + if stride_right: + for token in reversed(token_ids): + if token >= timestamp_begin: + # There can be several token in the right stride + # But the last one is ALWAYS going to be skipped + if ( + last_timestamp is not None + and (token - timestamp_begin) * time_precision < right_stride_start + ): + break + last_timestamp = token + + current_tokens = [] + + # - all tokens within output + for i, token in enumerate(token_ids): + # 4 possible states for each token + # - 1/ Language code + # - 2/ all other special tokens (which we ignore) + # - 3/ Timestamp + # - 4/ Regular text + if token in all_special_ids: + # Either language code or other + text = self.decode([token]) + # Removing outer shell <|XX|> + text = text[2:-2] + language = LANGUAGES.get(text, None) + if language is not None: + # 1/ Indeed some language + # TODO Handle when language is different from the previous + # one, and we cannot use timestamped tokens to create chunks + if last_language and language != last_language and not return_timestamps: + previous_tokens.append(current_tokens) + resolved_tokens = self._find_longest_common_sequence(previous_tokens) + resolved_text = self.decode(resolved_tokens) + chunk["text"] = resolved_text + chunks.append(chunk) + + # Flush all our temporary context + previous_tokens = [] + current_tokens = [] + chunk = new_chunk() + chunk["language"] = language + last_language = language + else: + # 2/ This is a regular special token, ignoring it + pass + elif token >= timestamp_begin: + # 3/ Timestamp token + time = (token - timestamp_begin) * time_precision + time_offset + time = round(time, 2) + if last_timestamp and token >= last_timestamp: + # Whisper outputted a timestamp token, but it falls within + # our stride, so we're going to skip it for the time being + # and resolve this later + # Skip is necessary because timestamp tokens always come + # by pair, so we need to skip the next one too (which would mark the start of another chunk). + skip = True + elif skip or (previous_tokens and token < first_timestamp): + skip = False + elif chunk["timestamp"][0] is None: + chunk["timestamp"][0] = time + else: + # This is the end of the timestamp chunk + if time == chunk["timestamp"][0]: + # This is a bug in timestamp token output + # where we're taking the duplicate token + # as a stop where it should be a start. + # This is an issue in the underlying model output + pass + else: + chunk["timestamp"][1] = time + # Handling merges. + previous_tokens.append(current_tokens) + resolved_tokens = self._find_longest_common_sequence(previous_tokens) + resolved_text = self.decode(resolved_tokens) + chunk["text"] = resolved_text + chunks.append(chunk) + + # Flush all our temporary context + previous_tokens = [] + current_tokens = [] + chunk = new_chunk() + else: + # 4/ Regular token + # We just append to the list of all tokens so we can handle + # merges later and decode into text. + current_tokens.append(token) + + if "stride" in output: + time_offset += chunk_len - stride_right + + # Leftover tokens + if current_tokens: + previous_tokens.append(current_tokens) + elif not (any(p for p in previous_tokens)): + # print("Flushing previous tokens (END)") + chunk = new_chunk() + previous_tokens = [] + current_tokens = [] + + if previous_tokens: + if return_timestamps: + logger.warning( + "There was an error while processing timestamps, we haven't found a timestamp as last token. Was" + " WhisperTimeStampLogitsProcessor used?" + ) + # Happens when we don't use timestamps + resolved_tokens = self._find_longest_common_sequence(previous_tokens) + # print("Flushing previous tokens (FINAL)") + resolved_text = self.decode(resolved_tokens) + chunk["text"] = resolved_text + chunks.append(chunk) + + # Preparing and cleaning up the pipeline output + full_text = "".join(chunk["text"] for chunk in chunks) + if return_timestamps or return_language: + for chunk in chunks: + if not return_timestamps: + chunk.pop("timestamp") + else: + chunk["timestamp"] = tuple(chunk["timestamp"]) + if not return_language: + chunk.pop("language") + optional = {"chunks": chunks} + else: + optional = {} + return full_text, optional + + @staticmethod + def _find_longest_common_sequence(sequences): + # It would be much harder to do O(n) because of fault tolerance. + # We actually have a good property which is that the total sequence + # MUST be those subsequences in order. + left_sequence = sequences[0] + left_length = len(left_sequence) + total_sequence = [] + for right_sequence in sequences[1:]: + # index = 0 + max_ = 0.0 + max_indices = (left_length, left_length, 0, 0) + # Here we're sliding matches + # [a, b, c, d] + # [c, d, f] + # = [c] == [d] + # + # [a, b, c, d] + # [c, d, f] + # = [c, d] == [c, d] + # + # + # [a, b, c, d] + # [c, d, f] + # + # = [b, c, d] == [c, d, f] + # + # [a, b, c, d] + # [c, d, f] + # + # [a, b, c] == [c, d, f] + # + # [a, b, c, d] + # [d, f] + # + # [a, b] == [d, f] + # + # [a, b, c, d] + # [f] + # + # [a] == [f] + right_length = len(right_sequence) + for i in range(1, left_length + right_length): + # epsilon to favor long perfect matches + eps = i / 10000.0 + + # Slightly convoluted because we don't want out of bound indices + # This will be necessary for a small conflict resolution optimization + # later + left_start = max(0, left_length - i) + left_stop = min(left_length, left_length + right_length - i) + left = np.array(left_sequence[left_start:left_stop]) + + right_start = max(0, i - left_length) + right_stop = min(right_length, i) + right = np.array(right_sequence[right_start:right_stop]) + + # We can only match subsequences of the same size. + if len(left) != len(right): + raise RuntimeError( + "There is a bug within whisper `decode_asr` function, please report it. " + "Dropping to prevent bad inference." + ) + + matches = np.sum(left == right) + matching = matches / i + eps + if matches > 1 and matching > max_: + max_ = matching + max_indices = (left_start, left_stop, right_start, right_stop) + + (left_start, left_stop, right_start, right_stop) = max_indices + + # This is a small conflict optimization since those sequences overlap + # in audio. + # We're going to give more confidence to the left sequence + # for the left of the overlap, + # and to the right of the sequence, for the right of the overlap + left_mid = (left_stop + left_start) // 2 + right_mid = (right_stop + right_start) // 2 + total_sequence.extend(left_sequence[:left_mid]) + left_sequence = right_sequence[right_mid:] + left_length = len(left_sequence) + + total_sequence.extend(left_sequence) + + return total_sequence
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/bert/bert_text_classification.html b/_modules/hezar/models/text_classification/bert/bert_text_classification.html new file mode 100644 index 00000000..75415f5c --- /dev/null +++ b/_modules/hezar/models/text_classification/bert/bert_text_classification.html @@ -0,0 +1,639 @@ + + + + + + + + hezar.models.text_classification.bert.bert_text_classification - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.bert.bert_text_classification

+"""
+A BERT model for text classification built using HuggingFace Transformers
+"""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+from torch import nn
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import TextClassificationOutput
+from .bert_text_classification_config import BertTextClassificationConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import BertConfig, BertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model(model_name="bert_text_classification", config_class=BertTextClassificationConfig) +class BertTextClassification(Model): + """ + A standard 🤗Transformers Bert model for text classification + + Args: + config: The whole model config including arguments needed for the inner 🤗Transformers model. + """ + + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + skip_keys_on_load = [ + "model.embeddings.position_ids", # For older versions + "bert.embeddings.position_ids", + "model.bert.embeddings.position_ids", + ] + + def __init__(self, config: BertTextClassificationConfig, **kwargs): + super().__init__(config, **kwargs) + self.bert = BertModel(self._build_inner_config()) + classifier_dropout = ( + self.config.classifier_dropout + if self.config.classifier_dropout is not None + else self.config.hidden_dropout_prob + ) + self.dropout = nn.Dropout(classifier_dropout) + self.classifier = nn.Linear(self.config.hidden_size, self.config.num_labels) + + def _build_inner_config(self): + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + bert_config = BertConfig(**self.config) + return bert_config + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(logits.view(-1, self.config.num_labels), labels.view(-1)) + return loss
+ + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + past_key_values=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ) -> Dict: + lm_outputs = self.bert( + token_ids, + attention_mask=attention_mask, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + pooled_output = lm_outputs[1] + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + } + return outputs
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs: dict, top_k=1): + output_logits = model_outputs["logits"] + outputs = [] + for logits in output_logits: + probs = logits.softmax(dim=-1) + scores, label_ids = probs.sort(descending=True) + row = [] + for i, (score, label_id) in enumerate(zip(scores, label_ids)): + if i == top_k: + break + label_str = self.config.id2label[label_id.item()] + score = score.item() + row.append(TextClassificationOutput(label=label_str, score=score)) + outputs.append(row) + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/bert/bert_text_classification_config.html b/_modules/hezar/models/text_classification/bert/bert_text_classification_config.html new file mode 100644 index 00000000..f61b84a0 --- /dev/null +++ b/_modules/hezar/models/text_classification/bert/bert_text_classification_config.html @@ -0,0 +1,522 @@ + + + + + + + + hezar.models.text_classification.bert.bert_text_classification_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.bert.bert_text_classification_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class BertTextClassificationConfig(ModelConfig): + name = "bert_text_classification" + task: str = TaskType.TEXT_CLASSIFICATION + num_labels: int = None + id2label: dict = None + vocab_size: int = 42000 + hidden_size: int = 768 + num_hidden_layers: int = 12 + num_attention_heads: int = 12 + intermediate_size: int = 3072 + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + attention_probs_dropout_prob: float = 0.1 + max_position_embeddings: int = 512 + type_vocab_size: int = 2 + initializer_range: float = 0.02 + layer_norm_eps: float = 1e-12 + pad_token_id: int = 0 + position_embedding_type: str = "absolute" + use_cache: bool = True + classifier_dropout: float = None
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification.html b/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification.html new file mode 100644 index 00000000..a919ac67 --- /dev/null +++ b/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification.html @@ -0,0 +1,623 @@ + + + + + + + + hezar.models.text_classification.distilbert.distilbert_text_classification - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.distilbert.distilbert_text_classification

+"""
+A DistilBERT model for text classification built using HuggingFace Transformers
+"""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+from torch import nn
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import TextClassificationOutput
+from .distilbert_text_classification_config import DistilBertTextClassificationConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import DistilBertConfig, DistilBertModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model(model_name="distilbert_text_classification", config_class=DistilBertTextClassificationConfig) +class DistilBertTextClassification(Model): + """ + A standard 🤗Transformers DistilBert model for text classification + + Args: + config: The whole model config including arguments needed for the inner 🤗Transformers model. + """ + + required_backends = _required_backends + tokenizer_name = "wordpiece_tokenizer" + + def __init__(self, config: DistilBertTextClassificationConfig, **kwargs): + super().__init__(config, **kwargs) + self.distilbert = DistilBertModel(self._build_inner_config()) + self.pre_classifier = nn.Linear(self.config.dim, self.config.dim) + self.classifier = nn.Linear(self.config.dim, self.config.num_labels) + self.dropout = nn.Dropout(self.config.seq_classif_dropout) + + def _build_inner_config(self): + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + bert_config = DistilBertConfig(**self.config) + return bert_config + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ) -> Dict: + lm_outputs = self.distilbert( + input_ids=token_ids, + attention_mask=attention_mask, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + hidden_state = lm_outputs[0] + pooled_output = hidden_state[:, 0] # classification output + pooled_output = self.pre_classifier(pooled_output) + pooled_output = nn.ReLU()(pooled_output) + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + } + return outputs
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(logits.view(-1, self.config.num_labels), labels.view(-1)) + return loss
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs: dict, top_k=1): + output_logits = model_outputs["logits"] + outputs = [] + for logits in output_logits: + probs = logits.softmax(dim=-1) + scores, label_ids = probs.sort(descending=True) + row = [] + for i, (score, label_id) in enumerate(zip(scores, label_ids)): + if i == top_k: + break + label_str = self.config.id2label[label_id.item()] + score = score.item() + row.append(TextClassificationOutput(label=label_str, score=score)) + outputs.append(row) + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification_config.html b/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification_config.html new file mode 100644 index 00000000..77b241f1 --- /dev/null +++ b/_modules/hezar/models/text_classification/distilbert/distilbert_text_classification_config.html @@ -0,0 +1,521 @@ + + + + + + + + hezar.models.text_classification.distilbert.distilbert_text_classification_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.distilbert.distilbert_text_classification_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class DistilBertTextClassificationConfig(ModelConfig): + name = "distilbert_text_classification" + task: str = TaskType.TEXT_CLASSIFICATION + num_labels: int = None + id2label: dict = None + activation: str = "gelu" + attention_dropout: float = 0.1 + dim: int = 768 + dropout: float = 0.1 + hidden_dim: int = 3072 + initializer_range: float = 0.02 + max_position_embeddings: int = 512 + n_heads: int = 12 + n_layers: int = 6 + output_past: bool = True + pad_token_id: int = 0 + qa_dropout: float = 0.1 + seq_classif_dropout: float = 0.2 + tie_weights_: bool = True + vocab_size: int = 42000
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/roberta/roberta_text_classification.html b/_modules/hezar/models/text_classification/roberta/roberta_text_classification.html new file mode 100644 index 00000000..697b4400 --- /dev/null +++ b/_modules/hezar/models/text_classification/roberta/roberta_text_classification.html @@ -0,0 +1,651 @@ + + + + + + + + hezar.models.text_classification.roberta.roberta_text_classification - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.roberta.roberta_text_classification

+"""
+A RoBERTa Language Model (HuggingFace Transformers) wrapped by a Hezar Model class
+"""
+from __future__ import annotations
+
+from typing import List
+
+import torch
+from torch import nn, tanh
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import TextClassificationOutput
+from .roberta_text_classification_config import RobertaTextClassificationConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import RobertaConfig, RobertaModel
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("roberta_text_classification", config_class=RobertaTextClassificationConfig) +class RobertaTextClassification(Model): + """ + A standard 🤗Transformers RoBERTa model for text classification + + Args: + config: The whole model config including arguments needed for the inner 🤗Transformers model. + """ + + required_backends = _required_backends + tokenizer_name = "bpe_tokenizer" + skip_keys_on_load = ["model.embeddings.position_ids", "roberta.embeddings.position_ids"] # For older versions + + def __init__(self, config, **kwargs): + super().__init__(config=config, **kwargs) + self.roberta = RobertaModel(self._build_inner_config(), add_pooling_layer=False) + self.classifier = RobertaClassificationHead(self.config) + + def _build_inner_config(self): + if self.config.num_labels is None and self.config.id2label is None: + raise ValueError("Both `num_labels` and `id2label` are None. Please provide at least one of them!") + if self.config.id2label and self.config.num_labels is None: + self.config.num_labels = len(self.config.id2label) + bert_config = RobertaConfig(**self.config) + return bert_config + +
+[docs] + def forward( + self, + token_ids, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + **kwargs, + ): + lm_outputs = self.roberta( + token_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + sequence_output = lm_outputs[0] + logits = self.classifier(sequence_output) + + outputs = { + "logits": logits, + "hidden_states": lm_outputs.hidden_states, + "attentions": lm_outputs.attentions, + } + return outputs
+ + +
+[docs] + def compute_loss(self, inputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: + criterion = nn.CrossEntropyLoss() + loss = criterion(inputs.view(-1, self.config.num_labels), targets.view(-1)) + return loss
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], **kwargs): + if isinstance(inputs, str): + inputs = [inputs] + if "text_normalizer" in self.preprocessor: + normalizer = self.preprocessor["text_normalizer"] + inputs = normalizer(inputs) + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, model_outputs: dict, top_k=1): + output_logits = model_outputs["logits"] + outputs = [] + for logits in output_logits: + probs = logits.softmax(dim=-1) + scores, label_ids = probs.sort(descending=True) + row = [] + for i, (score, label_id) in enumerate(zip(scores, label_ids)): + if i == top_k: + break + label_str = self.config.id2label[label_id.item()] + score = score.item() + row.append(TextClassificationOutput(label=label_str, score=score)) + outputs.append(row) + return outputs
+
+ + + +
+[docs] +class RobertaClassificationHead(nn.Module): + """Head for sentence-level classification tasks.""" + + def __init__(self, config): + super().__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + classifier_dropout = ( + config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob + ) + self.dropout = nn.Dropout(classifier_dropout) + self.out_proj = nn.Linear(config.hidden_size, config.num_labels) + +
+[docs] + def forward(self, inputs, **kwargs): + x = inputs[:, 0, :] # take <s> token (equiv. to [CLS]) + x = self.dropout(x) + x = self.dense(x) + x = tanh(x) + x = self.dropout(x) + x = self.out_proj(x) + return x
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_classification/roberta/roberta_text_classification_config.html b/_modules/hezar/models/text_classification/roberta/roberta_text_classification_config.html new file mode 100644 index 00000000..8e9799c1 --- /dev/null +++ b/_modules/hezar/models/text_classification/roberta/roberta_text_classification_config.html @@ -0,0 +1,525 @@ + + + + + + + + hezar.models.text_classification.roberta.roberta_text_classification_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_classification.roberta.roberta_text_classification_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+from ....constants import TaskType
+
+
+
+[docs] +@dataclass +class RobertaTextClassificationConfig(ModelConfig): + name = "roberta_text_classification" + task: str = TaskType.TEXT_CLASSIFICATION + num_labels: int = None + id2label: dict = None + attention_probs_dropout_prob: float = 0.1 + bos_token_id: int = 0 + eos_token_id: int = 2 + gradient_checkpointing: bool = False + hidden_act: str = "gelu" + hidden_dropout_prob: float = 0.1 + hidden_size: int = 768 + classifier_dropout: float = None + initializer_range: int = 0.02 + intermediate_size: int = 3072 + layer_norm_eps: float = 1e-12 + max_position_embeddings: int = 514 + num_attention_heads: int = 12 + num_hidden_layers: int = 12 + pad_token_id: int = 1 + position_embedding_type: str = "absolute" + type_vocab_size: int = 1 + use_cache: bool = True + vocab_size: int = 42000
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation.html b/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation.html new file mode 100644 index 00000000..c99c7833 --- /dev/null +++ b/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation.html @@ -0,0 +1,604 @@ + + + + + + + + hezar.models.text_generation.gpt2.gpt2_text_generation - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_generation.gpt2.gpt2_text_generation

+from __future__ import annotations
+
+from typing import List
+
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import TextGenerationOutput
+from .gpt2_text_generation_config import GPT2TextGenerationConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import (
+        GenerationConfig,
+        GPT2Config,
+        GPT2LMHeadModel,
+    )
+
+_required_backends = [Backends.TRANSFORMERS, Backends.TOKENIZERS]
+
+
+
+[docs] +@register_model("gpt2_text_generation", config_class=GPT2TextGenerationConfig) +class GPT2TextGeneration(Model): + is_generative = True + tokenizer_name = "bpe_tokenizer" + required_backends = _required_backends + loss_fn_name = "cross_entropy" + + def __init__(self, config: GPT2TextGenerationConfig, **kwargs): + super().__init__(config, **kwargs) + self.gpt2 = GPT2LMHeadModel(config=GPT2Config(**self.config)) + +
+[docs] + def forward( + self, + token_ids, + past_key_values=None, + attention_mask=None, + token_type_ids=None, + position_ids=None, + head_mask=None, + inputs_embeds=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, + **kwargs + ): + outputs = self.gpt2( + input_ids=token_ids, + past_key_values=past_key_values, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + position_ids=position_ids, + head_mask=head_mask, + inputs_embeds=inputs_embeds, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + return dict(outputs)
+ + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + labels = labels.to(logits.device) + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Compute loss + loss = self.criterion(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) + return loss
+ + +
+[docs] + def generate(self, token_ids, **kwargs): + self.config.generation.update(kwargs or {}) + generation_config = GenerationConfig(**self.config.generation) + generated_ids = self.gpt2.generate(token_ids, generation_config=generation_config) + return generated_ids
+ + +
+[docs] + def preprocess(self, texts: str | List[str], **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(texts, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, generated_ids: torch.Tensor): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(generated_ids.cpu().numpy().tolist()) + outputs = [TextGenerationOutput(text=text) for text in decoded_outputs] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation_config.html b/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation_config.html new file mode 100644 index 00000000..91e646fc --- /dev/null +++ b/_modules/hezar/models/text_generation/gpt2/gpt2_text_generation_config.html @@ -0,0 +1,539 @@ + + + + + + + + hezar.models.text_generation.gpt2.gpt2_text_generation_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_generation.gpt2.gpt2_text_generation_config

+from dataclasses import dataclass, field
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class GenerationConfig(ModelConfig): + bos_token_id: int = 0 + decoder_start_token_id: int = 0 + early_stopping: bool = True + eos_token_id: int = 2 + length_penalty: float = 2.0 + max_new_tokens: int = 50 + no_repeat_ngram_size: int = 3 + num_beams: int = 4 + pad_token_id: int = 1
+ + + +
+[docs] +@dataclass +class GPT2TextGenerationConfig(ModelConfig): + name = "gpt2_text_generation" + add_cross_attention: bool = False + vocab_size: int = 42001 + attn_pdrop: float = 0.1 + bos_token_id: int = 5 + embd_pdrop: float = 0.1 + eos_token_id: int = 5 + gradient_checkpointing: bool = False + initializer_range: float = 0.02 + layer_norm_epsilon: float = 1e-05 + model_type: str = "gpt2" + n_ctx: int = 1024 + n_embd: int = 768 + n_head: int = 12 + n_inner: int = None + n_layer: int = 12 + n_positions: int = 1024 + resid_pdrop: float = 0.1 + summary_activation: bool = False + summary_first_dropout: float = 0.1 + use_cache: bool = True + generation: GenerationConfig = field(default_factory=GenerationConfig)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_generation/t5/t5_text_generation.html b/_modules/hezar/models/text_generation/t5/t5_text_generation.html new file mode 100644 index 00000000..b2fdefe7 --- /dev/null +++ b/_modules/hezar/models/text_generation/t5/t5_text_generation.html @@ -0,0 +1,623 @@ + + + + + + + + hezar.models.text_generation.t5.t5_text_generation - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_generation.t5.t5_text_generation

+from __future__ import annotations
+
+from typing import Dict, List
+
+import torch
+
+from ....constants import Backends
+from ....registry import register_model
+from ....utils import is_backend_available
+from ...model import Model
+from ...model_outputs import TextGenerationOutput
+from .t5_text_generation_config import T5TextGenerationConfig
+
+
+if is_backend_available(Backends.TRANSFORMERS):
+    from transformers import T5Config, T5ForConditionalGeneration
+
+_required_backends = [
+    Backends.TRANSFORMERS,
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@register_model("t5_text_generation", config_class=T5TextGenerationConfig) +class T5TextGeneration(Model): + """ + T5 for text to text generation + """ + + is_generative = True + required_backends = _required_backends + tokenizer_name = "sentencepiece_unigram_tokenizer" + loss_fn_name = "cross_entropy" + + def __init__(self, config: T5TextGenerationConfig, **kwargs): + super().__init__(config=config, **kwargs) + + self.t5 = T5ForConditionalGeneration(T5Config(**self.config)) + +
+[docs] + def forward( + self, + token_ids, + labels=None, + attention_mask=None, + decoder_input_ids=None, + decoder_attention_mask=None, + head_mask=None, + decoder_head_mask=None, + cross_attn_head_mask=None, + encoder_outputs=None, + past_key_values=None, + inputs_embeds=None, + decoder_inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + **kwargs, + ) -> Dict: + + if labels is not None and decoder_input_ids is None and decoder_inputs_embeds is None: + # get decoder inputs from shifting lm labels to the right + decoder_input_ids = self._shift_right(labels) + + outputs = self.t5( + input_ids=token_ids, + attention_mask=attention_mask, + decoder_input_ids=decoder_input_ids, + decoder_attention_mask=decoder_attention_mask, + head_mask=head_mask, + decoder_head_mask=decoder_head_mask, + cross_attn_head_mask=cross_attn_head_mask, + encoder_outputs=encoder_outputs, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + decoder_inputs_embeds=decoder_inputs_embeds, + labels=None, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + ) + + return dict(outputs)
+ + + def _shift_right(self, input_ids): + return self.t5._shift_right(input_ids) + +
+[docs] + def compute_loss(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: + labels = labels.clone() + labels[labels == self.config.pad_token_id] = -100 + loss = self.criterion(logits.view(-1, logits.size(-1)), labels.view(-1)) + return loss
+ + +
+[docs] + def generate(self, token_ids, attention_mask=None, **kwargs): + # TODO Merge kwargs into generation config so users can control generation from kwargs + model_inputs = {"input_ids": token_ids, "attention_mask": attention_mask} + generation_kwargs = {"min_length": self.config.min_length, "max_length": self.config.max_length} + output_ids = self.t5.generate(**model_inputs, **generation_kwargs) + return output_ids
+ + +
+[docs] + def preprocess(self, inputs: str | List[str], prefix=None): + if isinstance(inputs, str): + inputs = [inputs] + prefix = prefix or self.config.input_prefix + if prefix: + inputs = [f"{prefix}{x}" for x in inputs] + tokenizer = self.preprocessor[self.tokenizer_name] + inputs = tokenizer(inputs, return_tensors="pt", device=self.device) + return inputs
+ + +
+[docs] + def post_process(self, generated_ids: torch.Tensor, **kwargs): + tokenizer = self.preprocessor[self.tokenizer_name] + decoded_outputs = tokenizer.decode(generated_ids.cpu().numpy().tolist()) + outputs = [TextGenerationOutput(text=text) for text in decoded_outputs] + return outputs
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/models/text_generation/t5/t5_text_generation_config.html b/_modules/hezar/models/text_generation/t5/t5_text_generation_config.html new file mode 100644 index 00000000..575e5c5f --- /dev/null +++ b/_modules/hezar/models/text_generation/t5/t5_text_generation_config.html @@ -0,0 +1,524 @@ + + + + + + + + hezar.models.text_generation.t5.t5_text_generation_config - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.models.text_generation.t5.t5_text_generation_config

+from dataclasses import dataclass
+
+from ....configs import ModelConfig
+
+
+
+[docs] +@dataclass +class T5TextGenerationConfig(ModelConfig): + name = "t5_text_generation" + vocab_size: int = 32103 + d_model: int = 768 + d_kv: int = 64 + d_ff: int = 2048 + num_layers: int = 12 + num_decoder_layers: int = 12 + num_heads: int = 12 + relative_attention_num_buckets: int = 32 + relative_attention_max_distance: int = 128 + dropout_rate: float = 0.1 + layer_norm_epsilon: float = 1e-6 + initializer_factor: float = 1.0 + feed_forward_proj: str = "gated-gelu" + is_encoder_decoder: bool = True + tie_word_embeddings: bool = False + use_cache: bool = True + pad_token_id: int = 0 + decoder_start_token_id: int = 0 + eos_token_id: int = 1 + min_length: int = 0 + max_length: int = 100 + input_prefix: str = None
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/audio_feature_extractor.html b/_modules/hezar/preprocessors/audio_feature_extractor.html new file mode 100644 index 00000000..2a10a14c --- /dev/null +++ b/_modules/hezar/preprocessors/audio_feature_extractor.html @@ -0,0 +1,841 @@ + + + + + + + + hezar.preprocessors.audio_feature_extractor - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.audio_feature_extractor

+from dataclasses import dataclass
+from typing import Mapping
+
+import numpy as np
+
+from ..builders import build_preprocessor
+from ..configs import PreprocessorConfig
+from ..constants import DEFAULT_FEATURE_EXTRACTOR_CONFIG_FILE
+from ..utils import convert_batch_dict_dtype
+from .preprocessor import Preprocessor
+
+
+
+[docs] +@dataclass +class AudioFeatureExtractorConfig(PreprocessorConfig): + feature_size: int = None + sampling_rate: int = 16000 + padding: str = None + padding_value: float = 0.0 + padding_side: str = None
+ + + +
+[docs] +class AudioFeatureExtractor(Preprocessor): + """ + Base class for all audio feature extractors. + """ + + model_input_name = "input_features" + config_filename = DEFAULT_FEATURE_EXTRACTOR_CONFIG_FILE + + def __init__(self, config: AudioFeatureExtractorConfig, **kwargs): + super().__init__(config=config, **kwargs) + + def __call__(self, inputs, **kwargs): + raise NotImplementedError + +
+[docs] + def pad( + self, + processed_features, + padding=None, + max_length=None, + truncation=None, + pad_to_multiple_of=None, + return_attention_mask=None, + return_tensors=None, + ): + """ + Pad input values / input vectors or a batch of input values / input vectors up to predefined length or to the + max sequence length in the batch. + + Args: + processed_features: Processed inputs to add padding to + padding: Padding strategy which can be `longest`, `max_length`, `False` + max_length: Max input length (Only effective if padding is `max_length` too, ignored otherwise) + truncation: Whether to truncate long inputs or not + pad_to_multiple_of: If set will pad the sequence to a multiple of the provided value. + return_attention_mask: Whether to return the attention mask. + return_tensors: Tensors return type among `pt`, `np`, `list` + """ + return_attention_mask = return_attention_mask or self.config.return_attention_mask + padding = padding or self.config.padding + if isinstance(processed_features, (list, tuple)) and isinstance(processed_features[0], Mapping): + processed_features = { + key: np.array([example[key] for example in processed_features]) for key in processed_features[0].keys() + } + if self.model_input_name not in processed_features: + raise ValueError( + f"Processed inputs must have a `{self.model_input_name}` key!\n" + f"Provided keys: {list(processed_features.keys())}" + ) + + required_input = processed_features[self.model_input_name] + + if len(required_input) == 0: + if return_attention_mask: + processed_features["attention_mask"] = [] + return processed_features + + first_element = required_input[0] + if isinstance(first_element, (list, tuple)): + # first_element might be an empty list/tuple in some edge cases so we grab the first non-empty element. + index = 0 + while len(required_input[index]) == 0: + index += 1 + if index < len(required_input): + first_element = required_input[index][0] + + # processed_features = convert_batch_dict_dtype(processed_features, dtype="np") + padding_strategy = self._get_padding_strategy(padding=padding, max_length=max_length) + + required_input = processed_features[self.model_input_name] + + batch_size = len(required_input) + if not all(len(v) == batch_size for v in processed_features.values()): + raise ValueError("Some items in the output dictionary have a different batch size than others.") + + truncated_inputs = [] + for i in range(batch_size): + inputs = {k: v[i] for k, v in processed_features.items()} + # truncation + inputs_slice = self._truncate( + inputs, + max_length=max_length, + pad_to_multiple_of=pad_to_multiple_of, + truncation=truncation, + ) + truncated_inputs.append(inputs_slice) + + if padding_strategy == "longest": + # make sure that `max_length` cannot be longer than the longest truncated length + max_length = max(len(input_slice[self.model_input_name]) for input_slice in truncated_inputs) + padding_strategy = "max_length" + + batch_outputs = {} + for i in range(batch_size): + # padding + outputs = self._pad( + truncated_inputs[i], + max_length=max_length, + padding_strategy=padding_strategy, + pad_to_multiple_of=pad_to_multiple_of, + return_attention_mask=return_attention_mask, + ) + for key, value in outputs.items(): + if key not in batch_outputs: + batch_outputs[key] = [] + if value.dtype is np.dtype(np.float64): + value = value.astype(np.float32) + batch_outputs[key].append(value) + + batch_outputs = {k: np.array(v) for k, v in batch_outputs.items()} + + padded_features = convert_batch_dict_dtype(batch_outputs, dtype=return_tensors) + + return padded_features
+ + + def _pad( + self, + processed_features, + max_length=None, + padding_strategy=None, + pad_to_multiple_of=None, + return_attention_mask=None, + ) -> dict: + """ + Pad inputs based on padding strategy and max length + + Args: + processed_features: Processed inputs to add padding to + padding_strategy: Padding strategy which can be `longest`, `max_length`, `False` + max_length: Max input length (Only effective if padding is `max_length` too, ignored otherwise) + pad_to_multiple_of: If set will pad the sequence to a multiple of the provided value. + return_attention_mask: Whether to return the attention mask. + + Returns: + Batch dict of the padded features + """ + required_input = processed_features[self.model_input_name] + + if padding_strategy == "longest": + max_length = len(required_input) + + if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): + max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of + + needs_to_be_padded = padding_strategy is not None and len(required_input) < max_length + + if return_attention_mask and "attention_mask" not in processed_features: + processed_features["attention_mask"] = np.ones(len(required_input), dtype=np.int32) + + if needs_to_be_padded: + difference = max_length - len(required_input) + if self.config.padding_side == "right": + if return_attention_mask: + processed_features["attention_mask"] = np.pad(processed_features["attention_mask"], (0, difference)) + padding_shape = ((0, difference), (0, 0)) if self.config.feature_size > 1 else (0, difference) + processed_features[self.model_input_name] = np.pad( + required_input, padding_shape, "constant", constant_values=self.config.padding_value + ) + elif self.config.padding_side == "left": + if return_attention_mask: + processed_features["attention_mask"] = np.pad(processed_features["attention_mask"], (difference, 0)) + padding_shape = ((difference, 0), (0, 0)) if self.config.feature_size > 1 else (difference, 0) + processed_features[self.model_input_name] = np.pad( + required_input, padding_shape, "constant", constant_values=self.config.padding_value + ) + else: + raise ValueError("Invalid padding strategy:" + str(self.config.padding_side)) + + return processed_features + + def _get_padding_strategy(self, padding=False, max_length=None): + """ + Find the correct padding strategy + """ + if padding == "longest" or padding is True: + padding_strategy = "longest" + if self.config.padding_value is None: + raise ValueError(f"Setting padding to `{padding_strategy}`, but `config.padding_value` is `None`!") + + elif padding == "max_length": + if max_length is None: + raise ValueError("Setting `padding=max_length` but leaving `max_length` as `None` is invalid!") + padding_strategy = "max_length" + + else: + padding_strategy = None + + return padding_strategy + + def _truncate( + self, + processed_features, + max_length: int = None, + pad_to_multiple_of: int = None, + truncation: bool = None, + ): + """ + Truncate inputs to predefined length or max length in the batch + + Args: + processed_features: Dictionary of input values + max_length: maximum length of the returned list and optionally padding length + pad_to_multiple_of: Integer if set will pad the sequence to a multiple of the provided value. + truncation: Activates truncation to cut input sequences longer than `max_length` to `max_length`. + """ + if not truncation: + return processed_features + elif truncation and max_length is None: + raise ValueError("When setting ``truncation=True``, make sure that ``max_length`` is defined.") + + required_input = processed_features[self.model_input_name] + + # find `max_length` that fits `pad_to_multiple_of` + if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): + max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of + + needs_to_be_truncated = len(required_input) > max_length + + if needs_to_be_truncated: + processed_features[self.model_input_name] = processed_features[self.model_input_name][:max_length] + if "attention_mask" in processed_features: + processed_features["attention_mask"] = processed_features["attention_mask"][:max_length] + + return processed_features + +
+[docs] + def save( + self, + path, + subfolder=None, + config_filename=None, + ): + """ + Save the feature extractor to the path. This normally is equal to only saving the + `feature_extractor_config.yaml` file. + + Args: + path: Main path to save the feature extractor files + subfolder: Optional subfolder, defaults to `preprocessor` + config_filename: Optional config file name, defaults to `feature_extractor_config.yaml` + """ + subfolder = subfolder or self.preprocessor_subfolder + config_filename = config_filename or self.config_filename + + self.config.save(path, filename=config_filename, subfolder=subfolder)
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + subfolder=None, + commit_message=None, + private=None, + config_filename=None, + ): + """ + Push the feature extractor files to a repo on the Hub. + + Args: + repo_id: Hub repo id + subfolder: Subfolder to save, defaults to `self.preprocessor_subfolder` (`preprocessor`) + commit_message: Commit message for the push + private: If the repo does not exist already, specify whether the created repo must be private or not + config_filename: Config filename, defaults to `self.config_filename` (`feature_extractor_config.yaml`) + """ + subfolder = subfolder or self.preprocessor_subfolder + config_filename = config_filename or self.config_filename + + if commit_message is None: + commit_message = "Hezar: Upload feature extractor" + + self.config.push_to_hub( + repo_id, + subfolder=subfolder, + filename=config_filename, + private=private, + commit_message=commit_message, + )
+ + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + subfolder: str = None, + config_filename: str = None, + cache_dir: str = None, + **kwargs, + ): + """ + Load a feature extractor from Hub or local path. + + Args: + hub_or_local_path: Hub repo id or local path + subfolder: Preprocessor subfolder path + config_filename: Config file name + cache_dir: Path to cache directory + **kwargs: + + Returns: + A AudioFeatureExtractor object + """ + subfolder = subfolder or cls.preprocessor_subfolder + config_filename = config_filename or cls.config_filename + + config = AudioFeatureExtractorConfig.load( + hub_or_local_path, + subfolder=subfolder, + filename=config_filename, + cache_dir=cache_dir, + ) + + feature_extractor = build_preprocessor(config.name, config=config, **kwargs) + + return feature_extractor
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/image_processor.html b/_modules/hezar/preprocessors/image_processor.html new file mode 100644 index 00000000..c510e104 --- /dev/null +++ b/_modules/hezar/preprocessors/image_processor.html @@ -0,0 +1,761 @@ + + + + + + + + hezar.preprocessors.image_processor - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.image_processor

+from dataclasses import dataclass, field
+from typing import Iterable, List, Tuple
+
+import numpy as np
+
+from ..builders import build_preprocessor
+from ..configs import PreprocessorConfig
+from ..constants import (
+    DEFAULT_IMAGE_PROCESSOR_CONFIG_FILE,
+    DEFAULT_PREPROCESSOR_SUBFOLDER,
+    Backends,
+    ImageType,
+)
+from ..registry import register_preprocessor
+from ..utils import (
+    convert_batch_dict_dtype,
+    convert_image_type,
+    gray_scale_image,
+    load_image,
+    mirror_image,
+    normalize_image,
+    rescale_image,
+    resize_image,
+    transpose_channels_axis_side,
+)
+from .preprocessor import Preprocessor
+
+
+# List of backends required for the image processor
+_required_backends = [
+    Backends.PILLOW,
+]
+
+_DESCRIPTION = r"""
+A general image processor to perform various image transformations in a composable/configurable pipeline.
+"""
+
+# Aliases for different image types
+_image_type_aliases = {
+    "pt": ImageType.TORCH,
+    "pytorch": ImageType.TORCH,
+    "torch": ImageType.TORCH,
+    "np": ImageType.NUMPY,
+    "numpy": ImageType.NUMPY,
+    "pil": ImageType.PILLOW,
+    "pillow": ImageType.PILLOW,
+}
+
+
+
+[docs] +@dataclass +class ImageProcessorConfig(PreprocessorConfig): + """ + Configuration class for the ImageProcessor. + """ + name = "image_processor" + mean: List[float] = None + std: List[float] = None + rescale: float = None + resample: int = None + size: Tuple[int, int] = field( + default=None, + metadata={"description": "Image size tuple (width, height)"}, + ) + mirror: bool = False + gray_scale: bool = False
+ + + +
+[docs] +@register_preprocessor("image_processor", config_class=ImageProcessorConfig, description=_DESCRIPTION) +class ImageProcessor(Preprocessor): + """ + General image processor to perform sequential transforms on a list of images. + """ + + required_backends = _required_backends + + preprocessor_subfolder = DEFAULT_PREPROCESSOR_SUBFOLDER + image_processor_config_file = DEFAULT_IMAGE_PROCESSOR_CONFIG_FILE + + def __init__(self, config: ImageProcessorConfig, **kwargs): + """ + Initializes the ImageProcessor. + + Args: + config (ImageProcessorConfig): Configuration for the image processor. + **kwargs: Additional keyword arguments. + """ + super().__init__(config, **kwargs) + + def __call__( + self, + images: List, + device: str = None, + mean: float = None, + std: float = None, + rescale: float = None, + size: Tuple[int, int] = None, + resample: float = None, + mirror: bool = None, + gray_scale: bool = None, + return_tensors: str = "pt", + **kwargs, + ): + """ + Perform sequential image processing on a list of input images. + + Args: + images (List): A list of input images of types torch, numpy, pillow. + mean (float): Image mean value for normalization. + std (float): Image std value for normalization. + rescale (float): Scale factor for rescaling the image. + size (Tuple[int, int]): Image size tuple (width, height) for resizing. + resample (float): Resample method value based on Image.Resampling. + mirror (bool): Flag to mirror the images. + gray_scale (bool): Flag to convert images to grayscale. + return_tensors (str): The type of the output images. + **kwargs: Extra parameters. + + Returns: + dict: Transformed images list. + """ + mean = mean or self.config.mean + std = std or self.config.std + rescale = rescale or self.config.rescale + size = size or self.config.size + resample = resample or self.config.resample + mirror = mirror or self.config.mirror + gray_scale = gray_scale or self.config.gray_scale + + if not isinstance(images, Iterable) or isinstance(images, str): + images = [images] + + # Load images if inputs are list of files + images = [load_image(x, return_type="numpy") if isinstance(x, str) else x for x in images] + + # Cast image types + images = [convert_image_type(image, target_type="numpy") for image in images] + + # Convert to grayscale + if gray_scale: + images = [gray_scale_image(image, return_type="numpy") for image in images] + + # Mirror images if mirror is set + if mirror: + images = [mirror_image(image, return_type="numpy") for image in images] + + if size is not None: + if not isinstance(size, Iterable) or len(size) > 2: + raise ValueError(f"The parameter `size` must be a tuple/list of (width, height), got `{size}`") + images = [resize_image(image, size=size, resample=resample) for image in images] + + if rescale is not None: + images = [rescale_image(image, scale=rescale) for image in images] + + if mean is not None and std is not None: + images = [normalize_image(image, mean=mean, std=std, channel_axis="last") for image in images] + + # Transpose channels axis + images = [transpose_channels_axis_side(image, axis_side="first") for image in images] + + # Return images batch dict + images = np.array([convert_image_type(image, target_type="numpy") for image in images], dtype=np.float32) + + images = convert_batch_dict_dtype({"pixel_values": images}, dtype=return_tensors) + + if device: + import torch + + images = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in images.items()} + + return images + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + subfolder: str = None, + force_return_dict: bool = False, + config_filename: str = None, + cache_dir: str = None, + **kwargs, + ) -> "ImageProcessor": + """ + Load an ImageProcessor from a specified path. + + Args: + hub_or_local_path: Path to the hub or local location. + subfolder (str): Subfolder within the specified path. + force_return_dict (bool): Flag to force return as a dictionary. + config_filename (str): Configuration filename. + cache_dir: Path to cache directory + **kwargs: Additional keyword arguments. + + Returns: + ImageProcessor: Loaded image processor instance. + """ + subfolder = subfolder or cls.preprocessor_subfolder + config_filename = config_filename or cls.image_processor_config_file + config = ImageProcessorConfig.load( + hub_or_local_path, + filename=config_filename, + subfolder=subfolder, + cache_dir=cache_dir, + ) + image_processor = build_preprocessor(config.name, config, **kwargs) + return image_processor
+ + +
+[docs] + def save( + self, + path, + subfolder=None, + config_filename=None, + ): + """ + Save the ImageProcessor configuration. + + Args: + path: Path to save the configuration. + subfolder (str): Subfolder within the specified path. + config_filename (str): Configuration filename. + """ + subfolder = subfolder or self.preprocessor_subfolder + config_filename = config_filename or self.image_processor_config_file + self.config.save(path, subfolder=subfolder, filename=config_filename)
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + subfolder=None, + commit_message=None, + private=None, + config_filename=None + ): + """ + Push the ImageProcessor configuration to the hub. + + Args: + repo_id: ID of the repository. + subfolder (str): Subfolder within the repository. + commit_message (str): Commit message. + private (bool): Flag indicating whether the repository is private. + config_filename (str): Configuration filename. + """ + subfolder = subfolder or self.preprocessor_subfolder + config_filename = config_filename or self.image_processor_config_file + + if commit_message is None: + commit_message = "Hezar: Upload image processor files" + + self.config.push_to_hub( + repo_id, + subfolder=subfolder, + filename=config_filename, + private=private, + commit_message=commit_message, + )
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/preprocessor.html b/_modules/hezar/preprocessors/preprocessor.html new file mode 100644 index 00000000..46bbbee9 --- /dev/null +++ b/_modules/hezar/preprocessors/preprocessor.html @@ -0,0 +1,660 @@ + + + + + + + + hezar.preprocessors.preprocessor - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.preprocessor

+from __future__ import annotations
+
+import os
+from collections import OrderedDict
+from typing import List
+
+from huggingface_hub import hf_hub_download
+from omegaconf import OmegaConf
+
+from ..configs import PreprocessorConfig
+from ..constants import DEFAULT_PREPROCESSOR_SUBFOLDER, HEZAR_CACHE_DIR, Backends, RegistryType, RepoType
+from ..utils import get_module_class, list_repo_files, verify_dependencies
+
+
+
+[docs] +class Preprocessor: + """ + Base class for all data preprocessors. + + Args: + config: Preprocessor properties + """ + + required_backends: List[str | Backends] = [] + + preprocessor_subfolder = DEFAULT_PREPROCESSOR_SUBFOLDER + + def __init__(self, config: PreprocessorConfig, **kwargs): + verify_dependencies(self, self.required_backends) # Check if all the required dependencies are installed + + self.config = config.update(kwargs) + + def __call__(self, inputs, **kwargs): + """ + An abstract call method for a preprocessor. All preprocessors must implement this. + + Args: + inputs: Raw inputs to process. Usually a list or a dict + **kwargs: Extra keyword arguments depending on the preprocessor + """ + raise NotImplementedError + +
+[docs] + def save(self, path, **kwargs): + raise NotImplementedError
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + subfolder=None, + commit_message=None, + private=None, + **kwargs, + ): + raise NotImplementedError
+ + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + subfolder: str = None, + force_return_dict: bool = False, + cache_dir: str = None, + **kwargs + ): + """ + Load a preprocessor or a pipeline of preprocessors from a local or Hub path. This method automatically detects + any preprocessor in the path. If there's only one preprocessor, returns it and if there are more, returns a + dictionary of preprocessors. + + This method must also be overriden by subclasses as it internally calls this method for every possible + preprocessor found in the repo. + + Args: + hub_or_local_path: Path to hub or local repo + subfolder: Subfolder for the preprocessor. + force_return_dict: Whether to return a dict even if there's only one preprocessor available on the repo + cache_dir: Path to cache directory + **kwargs: Extra kwargs + + Returns: + A Preprocessor subclass or a dict of Preprocessor subclass instances + """ + subfolder = subfolder or cls.preprocessor_subfolder + cache_dir = cache_dir or HEZAR_CACHE_DIR + preprocessor_files = list_repo_files(hub_or_local_path, subfolder=subfolder) + preprocessors = PreprocessorsContainer() + for f in preprocessor_files: + if f.endswith(".yaml"): + if os.path.isdir(hub_or_local_path): + config_file = os.path.join(hub_or_local_path, subfolder, f) + else: + config_file = hf_hub_download( + hub_or_local_path, + filename=f, + subfolder=subfolder, + repo_type=RepoType.MODEL, + cache_dir=cache_dir, + ) + config = OmegaConf.load(config_file) + name = config.get("name", None) + if name: + preprocessor_cls = get_module_class(name, registry_type=RegistryType.PREPROCESSOR) + preprocessor = preprocessor_cls.load(hub_or_local_path, subfolder=subfolder, cache_dir=cache_dir) + preprocessors[name] = preprocessor + else: + raise ValueError(f"The config file `{config_file}` does not have the property `name`!") + if len(preprocessors) == 1 and not force_return_dict: + return list(preprocessors.values())[0] + + return preprocessors
+
+ + + +
+[docs] +class PreprocessorsContainer(OrderedDict): + """ + A class to hold the preprocessors by their name + """ + + def __getattr__(self, item): + """ + Override this method to be able to access preprocessors as attributes + + Examples: + >>> preprocessor["text_normalizer"] is preprocessor.text_normalizer # noqa + ... True + """ + if item in self: + return self[item] + else: + super().__getattribute__(item) + +
+[docs] + def save(self, path): + """ + Save every preprocessor item in the container + """ + for name, preprocessor in self.items(): + preprocessor.save(path)
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + subfolder=None, + commit_message=None, + private=None, + ): + """ + Push every preprocessor item in the container + """ + for name, preprocessor in self.items(): + preprocessor.push_to_hub(repo_id, subfolder=subfolder, commit_message=commit_message, private=private)
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/text_normalizer.html b/_modules/hezar/preprocessors/text_normalizer.html new file mode 100644 index 00000000..0d913b76 --- /dev/null +++ b/_modules/hezar/preprocessors/text_normalizer.html @@ -0,0 +1,655 @@ + + + + + + + + hezar.preprocessors.text_normalizer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.text_normalizer

+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Dict, List, Mapping, Tuple
+
+from ..builders import build_preprocessor
+from ..configs import PreprocessorConfig
+from ..constants import (
+    DEFAULT_NORMALIZER_CONFIG_FILE,
+    DEFAULT_PREPROCESSOR_SUBFOLDER,
+    Backends,
+)
+from ..registry import register_preprocessor
+from ..utils import Logger, is_backend_available
+from .preprocessor import Preprocessor
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Regex, normalizers
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class TextNormalizerConfig(PreprocessorConfig): + name = "text_normalizer" + replace_patterns: List[Tuple[str, str]] | List[List[str]] | List[Dict[str, List]] = None + nfkd: bool = True + nfkc: bool = True + + def __post_init__(self): + # convert a list of dicts in replace_patterns to a list of tuples + if self.replace_patterns is not None and len(self.replace_patterns): + if isinstance(self.replace_patterns, Mapping): + patterns = [] + for v in self.replace_patterns.values(): + patterns += v + self.replace_patterns = patterns
+ + + +
+[docs] +@register_preprocessor("text_normalizer", config_class=TextNormalizerConfig) +class TextNormalizer(Preprocessor): + """ + A simple configurable text normalizer + """ + + required_backends = _required_backends + + preprocessor_subfolder = DEFAULT_PREPROCESSOR_SUBFOLDER + normalizer_config_file = DEFAULT_NORMALIZER_CONFIG_FILE + + def __init__(self, config: TextNormalizerConfig, **kwargs): + super().__init__(config, **kwargs) + + def __call__( + self, + inputs: str | List[str], + replace_patterns: List[Tuple[str, str]] | List[List[str]] = None, + nfkd: bool = None, + nfkc: bool = None, + **kwargs, + ): + if isinstance(inputs, str): + inputs = [inputs] + + nfkd = nfkd or self.config.nfkd + nfkc = nfkc or self.config.nfkc + replace_patterns = replace_patterns or self.config.replace_patterns + + if nfkd: + inputs = [normalizers.NFKD().normalize_str(x) for x in inputs] + if nfkc: + inputs = [normalizers.NFKC().normalize_str(x) for x in inputs] + + if replace_patterns is not None: + replacer = normalizers.Sequence( + [normalizers.Replace(Regex(src), trg) for src, trg in self.config.replace_patterns] # noqa + ) + inputs = [replacer.normalize_str(x) for x in inputs] + + return inputs + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + subfolder=None, + config_filename=None, + cache_dir=None, + **kwargs + ) -> "TextNormalizer": + config_filename = config_filename or cls.normalizer_config_file + subfolder = subfolder or cls.preprocessor_subfolder + config = TextNormalizerConfig.load( + hub_or_local_path, + filename=config_filename, + subfolder=subfolder, + cache_dir=cache_dir, + ) + normalizer = build_preprocessor(config.name, config, **kwargs) + return normalizer
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + commit_message: str = None, + subfolder: str = None, + config_filename: str = None, + private: bool = None, + ): + """ + Push normalizer config and other optional files to the Hub. + + Args: + repo_id: Repo id on the Hub + commit_message: Commit message + subfolder: Optional subfolder for the normalizer + config_filename: Optional normalizer config filename + private: Whether to create a private repo if it does not exist already + """ + subfolder = subfolder or self.preprocessor_subfolder + config_filename = config_filename or self.normalizer_config_file + + if commit_message is None: + commit_message = "Hezar: Upload normalizer" + + # upload config + self.config.push_to_hub( + repo_id=repo_id, + filename=config_filename, + subfolder=subfolder, + commit_message=commit_message, + )
+ + +
+[docs] + def save( + self, + path, + subfolder=None, + config_filename=None, + ): + config_filename = config_filename or self.normalizer_config_file + subfolder = subfolder or self.preprocessor_subfolder + os.makedirs(path, exist_ok=True) + self.config.save(path, filename=config_filename, subfolder=subfolder)
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/tokenizers/bpe.html b/_modules/hezar/preprocessors/tokenizers/bpe.html new file mode 100644 index 00000000..a6f3ab45 --- /dev/null +++ b/_modules/hezar/preprocessors/tokenizers/bpe.html @@ -0,0 +1,616 @@ + + + + + + + + hezar.preprocessors.tokenizers.bpe - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.tokenizers.bpe

+from dataclasses import dataclass, field
+from typing import List
+
+from ...constants import DEFAULT_TOKENIZER_CONFIG_FILE, DEFAULT_TOKENIZER_FILE, Backends
+from ...registry import register_preprocessor
+from ...utils import is_backend_available
+from .tokenizer import Tokenizer, TokenizerConfig
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Tokenizer as HFTokenizer
+    from tokenizers import decoders, models, pre_tokenizers, processors, trainers
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@dataclass +class BPEConfig(TokenizerConfig): + name = "bpe_tokenizer" + max_length: int = 512 + truncation_strategy: str = "longest_first" + truncation_direction: str = "right" + stride: int = 0 + padding_strategy: str = "longest" + padding_direction: str = "right" + pad_to_multiple_of: int = 0 + bos_token: str = "<s>" + eos_token: str = "</s>" + unk_token: str = "<unk>" + sep_token: str = "<sep>" + pad_token: str = "<pad>" + cls_token: str = "<cls>" + mask_token: str = "<mask>" + additional_special_tokens: List[str] = None + dropout: float = None + continuing_subword_prefix: str = "" + end_of_word_suffix: str = "" + fuse_unk: bool = False + vocab_size: int = 30000 + min_frequency: int = 2 + limit_alphabet: int = 1000 + initial_alphabet: list = field(default_factory=list) + show_progress: bool = True
+ + + +
+[docs] +@register_preprocessor("bpe_tokenizer", config_class=BPEConfig) +class BPETokenizer(Tokenizer): + """ + A standard Byte-level BPE tokenizer using 🤗HuggingFace Tokenizers + + Args: + config: Preprocessor config for the tokenizer + **kwargs: Extra/manual config parameters + """ + + required_backends = _required_backends + + tokenizer_filename = DEFAULT_TOKENIZER_FILE + tokenizer_config_filename = DEFAULT_TOKENIZER_CONFIG_FILE + token_ids_name = "token_ids" + + def __init__(self, config, tokenizer_file=None, **kwargs): + super().__init__(config, tokenizer_file=tokenizer_file, **kwargs) + +
+[docs] + def build(self): + tokenizer = HFTokenizer( + models.BPE( + dropout=self.config.dropout, + unk_token=self.config.unk_token, + continuing_subword_prefix=self.config.continuing_subword_prefix, + end_of_word_suffix=self.config.end_of_word_suffix, + fuse_unk=self.config.fuse_unk, + ) + ) + tokenizer.decoder = decoders.ByteLevel() # noqa + tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False) # noqa + tokenizer.decoder = decoders.ByteLevel() # noqa + tokenizer.post_processor = processors.ByteLevel(trim_offsets=False) # noqa + + return tokenizer
+ + +
+[docs] + def train(self, files: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.BpeTrainer( + vocab_size=self.config.vocab_size, # noqa + min_frequency=self.config.min_frequency, # noqa + show_progress=self.config.show_progress, # noqa + special_tokens=self.config.special_tokens, # noqa + initial_alphabet=pre_tokenizers.ByteLevel.alphabet(), # noqa + ) + if isinstance(files, str): + files = [files] + self._tokenizer.train(files, trainer=trainer)
+ + +
+[docs] + def train_from_iterator(self, dataset: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.BpeTrainer( + vocab_size=self.config.vocab_size, # noqa + min_frequency=self.config.min_frequency, # noqa + show_progress=self.config.show_progress, # noqa + special_tokens=self.config.special_tokens, # noqa + initial_alphabet=pre_tokenizers.ByteLevel.alphabet(), # noqa + ) + self._tokenizer.train_from_iterator(dataset, trainer=trainer, length=len(dataset))
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/tokenizers/sentencepiece_bpe.html b/_modules/hezar/preprocessors/tokenizers/sentencepiece_bpe.html new file mode 100644 index 00000000..f1d7f693 --- /dev/null +++ b/_modules/hezar/preprocessors/tokenizers/sentencepiece_bpe.html @@ -0,0 +1,621 @@ + + + + + + + + hezar.preprocessors.tokenizers.sentencepiece_bpe - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.tokenizers.sentencepiece_bpe

+from dataclasses import dataclass, field
+from typing import List
+
+from ...constants import DEFAULT_TOKENIZER_CONFIG_FILE, DEFAULT_TOKENIZER_FILE, Backends
+from ...registry import register_preprocessor
+from ...utils import is_backend_available
+from .tokenizer import Tokenizer, TokenizerConfig
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Tokenizer as HFTokenizer
+    from tokenizers import decoders, models, pre_tokenizers, trainers
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@dataclass +class SentencePieceBPEConfig(TokenizerConfig): + name = "sentencepiece_bpe_tokenizer" + max_length: int = 512 + truncation_strategy: str = "longest_first" + truncation_direction: str = "right" + stride: int = 0 + padding_strategy: str = "longest" + padding_direction: str = "right" + bos_token: str = "<s>" + eos_token: str = "</s>" + unk_token: str = "<unk>" + sep_token: str = "<sep>" + pad_token: str = "<pad>" + cls_token: str = "<cls>" + mask_token: str = "<mask>" + additional_special_tokens: List[str] = None + pad_to_multiple_of: int = 0 + dropout: float = None + continuing_subword_prefix: str = "" + replacement: str = "_" + add_prefix_space: bool = True + end_of_word_suffix: str = "" + fuse_unk: bool = False + vocab_size: int = 30000 + min_frequency: int = 2 + limit_alphabet: int = 1000 + initial_alphabet: list = field(default_factory=list) + show_progress: bool = True
+ + + +
+[docs] +@register_preprocessor("sentencepiece_bpe_tokenizer", config_class=SentencePieceBPEConfig) +class SentencePieceBPETokenizer(Tokenizer): + """ + A standard SentencePiece BPE tokenizer using 🤗HuggingFace Tokenizers + + Args: + config: Preprocessor config for the tokenizer + **kwargs: Extra/manual config parameters + """ + + required_backends = _required_backends + + tokenizer_filename = DEFAULT_TOKENIZER_FILE + tokenizer_config_filename = DEFAULT_TOKENIZER_CONFIG_FILE + token_ids_name = "token_ids" + + def __init__(self, config, tokenizer_file=None, **kwargs): + super().__init__(config, tokenizer_file=tokenizer_file, **kwargs) + +
+[docs] + def build(self): + tokenizer = HFTokenizer( + models.BPE( + dropout=self.config.dropout, + unk_token=self.config.unk_token, + continuing_subword_prefix=self.config.continuing_subword_prefix, + end_of_word_suffix=self.config.end_of_word_suffix, + fuse_unk=self.config.fuse_unk, + ) + ) + tokenizer.normalizer = normalizers.NFKC() # noqa + tokenizer.pre_tokenizer = pre_tokenizers.Metaspace( # noqa + replacement=self.config.replacement, add_prefix_space=self.config.add_prefix_space + ) + tokenizer.decoder = decoders.Metaspace( # noqa + replacement=self.config.replacement, add_prefix_space=self.config.add_prefix_space + ) + + return tokenizer
+ + +
+[docs] + def train(self, files: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.BpeTrainer( + vocab_size=self.config.vocab_size, # noqa + min_frequency=self.config.min_frequency, # noqa + show_progress=self.config.show_progress, # noqa + special_tokens=self.config.special_tokens, # noqa + initial_alphabet=self.config.initial_alphabet, # noqa + ) + if isinstance(files, str): + files = [files] + self._tokenizer.train(files, trainer=trainer)
+ + +
+[docs] + def train_from_iterator(self, dataset: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.BpeTrainer( + vocab_size=self.config.vocab_size, # noqa + min_frequency=self.config.min_frequency, # noqa + show_progress=self.config.show_progress, # noqa + special_tokens=self.config.special_tokens, # noqa + initial_alphabet=self.config.initial_alphabet, # noqa + ) + self._tokenizer.train_from_iterator(dataset, trainer=trainer, length=len(dataset))
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/tokenizers/sentencepiece_unigram.html b/_modules/hezar/preprocessors/tokenizers/sentencepiece_unigram.html new file mode 100644 index 00000000..df0aadae --- /dev/null +++ b/_modules/hezar/preprocessors/tokenizers/sentencepiece_unigram.html @@ -0,0 +1,619 @@ + + + + + + + + hezar.preprocessors.tokenizers.sentencepiece_unigram - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.tokenizers.sentencepiece_unigram

+from dataclasses import dataclass, field
+from typing import List
+
+from ...constants import DEFAULT_TOKENIZER_CONFIG_FILE, DEFAULT_TOKENIZER_FILE, Backends
+from ...registry import register_preprocessor
+from ...utils import is_backend_available
+from .tokenizer import Tokenizer, TokenizerConfig
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Tokenizer as HFTokenizer
+    from tokenizers import decoders, models, pre_tokenizers, trainers
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@dataclass +class SentencePieceUnigramConfig(TokenizerConfig): + name = "sentencepiece_unigram_tokenizer" + max_length: int = 512 + truncation_strategy: str = "longest_first" + truncation_direction: str = "right" + stride: int = 0 + padding_strategy: str = "longest" + padding_direction: str = "right" + bos_token: str = "<s>" + eos_token: str = "</s>" + unk_token: str = "<unk>" + sep_token: str = "<sep>" + pad_token: str = "<pad>" + cls_token: str = "<cls>" + mask_token: str = "<mask>" + additional_special_tokens: List[str] = None + pad_to_multiple_of: int = 0 + dropout: float = None + continuing_subword_prefix: str = "" + replacement: str = "_" + add_prefix_space: bool = True + end_of_word_suffix: str = "" + fuse_unk: bool = False + vocab_size: int = 8000 + min_frequency: int = 2 + limit_alphabet: int = 1000 + initial_alphabet: list = field(default_factory=list) + show_progress: bool = True
+ + + +
+[docs] +@register_preprocessor("sentencepiece_unigram_tokenizer", config_class=SentencePieceUnigramConfig) +class SentencePieceUnigramTokenizer(Tokenizer): + """ + A standard SentencePiece Unigram tokenizer using 🤗HuggingFace Tokenizers + + Args: + config: Preprocessor config for the tokenizer + **kwargs: Extra/manual config parameters + """ + + required_backends = _required_backends + + tokenizer_filename = DEFAULT_TOKENIZER_FILE + tokenizer_config_filename = DEFAULT_TOKENIZER_CONFIG_FILE + token_ids_name = "token_ids" + + def __init__(self, config, tokenizer_file=None, **kwargs): + super().__init__(config, tokenizer_file=tokenizer_file, **kwargs) + +
+[docs] + def build(self): + tokenizer = HFTokenizer(models.Unigram()) # noqa + tokenizer.normalizer = normalizers.Sequence( # noqa + [ # noqa + normalizers.Nmt(), # noqa + normalizers.NFKC(), # noqa + normalizers.Replace(Regex(" {2,}"), " "), # noqa + ] # noqa + ) # noqa + tokenizer.pre_tokenizer = pre_tokenizers.Metaspace( # noqa + replacement=self.config.replacement, add_prefix_space=self.config.add_prefix_space + ) + tokenizer.decoder = decoders.Metaspace( # noqa + replacement=self.config.replacement, add_prefix_space=self.config.add_prefix_space + ) + + return tokenizer
+ + +
+[docs] + def train(self, files: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.UnigramTrainer( + vocab_size=self.config.vocab_size, # noqa + special_tokens=self.config.special_tokens, # noqa + show_progress=self.config.show_progress, # noqa + initial_alphabet=self.config.initial_alphabet, # noqa + unk_token=self.config.unk_token, # noqa + ) + if isinstance(files, str): + files = [files] + self._tokenizer.train(files, trainer=trainer)
+ + +
+[docs] + def train_from_iterator(self, dataset: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.UnigramTrainer( + vocab_size=self.config.vocab_size, # noqa + special_tokens=self.config.special_tokens, # noqa + show_progress=self.config.show_progress, # noqa + initial_alphabet=self.config.initial_alphabet, # noqa + unk_token=self.config.unk_token, # noqa + ) + self._tokenizer.train_from_iterator(dataset, trainer=trainer, length=len(dataset))
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/tokenizers/tokenizer.html b/_modules/hezar/preprocessors/tokenizers/tokenizer.html new file mode 100644 index 00000000..85172023 --- /dev/null +++ b/_modules/hezar/preprocessors/tokenizers/tokenizer.html @@ -0,0 +1,1338 @@ + + + + + + + + hezar.preprocessors.tokenizers.tokenizer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.tokenizers.tokenizer

+from __future__ import annotations
+
+import os
+import tempfile
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Dict, List, Mapping, Optional, Tuple
+
+import numpy as np
+import torch
+from huggingface_hub import create_repo, hf_hub_download, upload_file
+
+from ...builders import build_preprocessor
+from ...configs import PreprocessorConfig
+from ...constants import (
+    DEFAULT_TOKENIZER_CONFIG_FILE,
+    DEFAULT_TOKENIZER_FILE,
+    HEZAR_CACHE_DIR,
+    Backends,
+    PaddingType,
+)
+from ...utils import Logger, convert_batch_dict_dtype, is_backend_available, pad_batch_items
+from ..preprocessor import Preprocessor
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Tokenizer as HFTokenizer
+    from tokenizers.decoders import Decoder
+    from tokenizers.models import Model
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class TokenizerConfig(PreprocessorConfig): + """ + Configuration for the Tokenizer. + + Args: + max_length (int): Maximum length of the tokenized sequences. + truncation_strategy (str): Truncation strategy for tokenization. + truncation_direction (str): Truncation direction for tokenization. + stride (int): Stride for tokenization. + padding_strategy (str): Padding strategy for tokenization. + padding_direction (str): Padding direction for tokenization. + pad_to_multiple_of (int): Pad to a multiple of this value. + pad_token_type_id (int): ID of the padding token type. + bos_token (str): Beginning of sequence token. + eos_token (str): End of sequence token. + unk_token (str): Unknown token. + sep_token (str): Separator token. + pad_token (str): Padding token. + cls_token (str): Classification token. + mask_token (str): Mask token. + additional_special_tokens (List[str]): Additional special tokens. + """ + + name = "tokenizer" + max_length: int = None + truncation_strategy: str = None + truncation_direction: str = None + stride: int = None + padding_strategy: str = None + padding_direction: str = None + pad_to_multiple_of: int = None + pad_token_type_id: int = 0 + bos_token: str = None + eos_token: str = None + unk_token: str = None + sep_token: str = None + pad_token: str = None + cls_token: str = None + mask_token: str = None + additional_special_tokens: List[str] = None
+ + + +
+[docs] +class Tokenizer(Preprocessor): + """ + Base tokenizer class. Mostly copied from :class:`~tokenizers.implementations.BaseTokenizer`. + + Args: + config: A TokenizerConfig instance. + tokenizer_file (str): A tokenizer.json file to load the whole tokenizer from. + **kwargs: Extra config parameters that merge into the main config. + """ + + required_backends: List[str | Backends] = [] + + tokenizer_filename = DEFAULT_TOKENIZER_FILE + tokenizer_config_filename = DEFAULT_TOKENIZER_CONFIG_FILE + token_ids_name = "token_ids" + uncastable_keys = ["word_ids", "tokens", "offsets_mapping"] + + def __init__(self, config: TokenizerConfig, tokenizer_file=None, **kwargs): + super().__init__(config, **kwargs) + self._tokenizer = self.from_file(tokenizer_file) if tokenizer_file is not None else self.build() + self.special_tokens = self._get_all_special_tokens() + + def _get_all_special_tokens(self): + """ + Get a list of all special tokens. + + Returns: + List[str]: List of special tokens. + """ + _special_tokens = [ + self.config.bos_token, + self.config.eos_token, + self.config.unk_token, + self.config.sep_token, + self.config.pad_token, + self.config.cls_token, + self.config.mask_token, + ] + _special_tokens = [token for token in _special_tokens if token in self.vocab] + + if self.config.additional_special_tokens is not None: + for token in self.config.additional_special_tokens: + if token not in _special_tokens: + _special_tokens.append(token) + + valid_tokens = [token for token in _special_tokens if token is not None] + return valid_tokens + +
+[docs] + @staticmethod + def from_file(path): + """ + Create a tokenizer from a file. + + Args: + path (str): Path to the tokenizer file. + + Returns: + HFTokenizer: The created tokenizer. + """ + tokenizer = HFTokenizer.from_file(path) + return tokenizer
+ + +
+[docs] + def build(self): + """ + Build the tokenizer. + + Returns: + HFTokenizer: The built tokenizer. + """ + raise NotImplementedError
+ + +
+[docs] + def encode(self, inputs, is_pretokenized: bool = False, add_special_tokens: bool = True, **kwargs): + """ + Tokenize a list of inputs (could be raw or tokenized inputs). + + Args: + inputs: List of inputs. + is_pretokenized: Whether the inputs are already tokenized. + add_special_tokens: Whether to add special tokens to the inputs. Defaults to True. + **kwargs: Additional keyword arguments. + + Returns: + List[Dict]: List of dictionaries containing tokenized inputs. + """ + if isinstance(inputs, str): + inputs = [inputs] + elif isinstance(inputs, list) and is_pretokenized: + if isinstance(inputs[0], str): + inputs = [inputs] + return self._tokenizer.encode_batch(inputs, is_pretokenized, add_special_tokens)
+ + +
+[docs] + def decode(self, ids: List[int], skip_special_tokens: bool = True, **kwargs): + """ + Decode a list of token IDs. + + Args: + ids (List[int]): List of token IDs. + skip_special_tokens (bool): Whether to skip special tokens during decoding. + **kwargs: Additional keyword arguments. + + Returns: + List[str]: List of decoded strings. + """ + if isinstance(ids[0], int): + ids = [ids] + if isinstance(ids, torch.Tensor): + ids = ids.cpu().numpy().tolist() + if isinstance(ids, np.ndarray): + ids = ids.tolist() + return self._tokenizer.decode_batch(ids, skip_special_tokens=skip_special_tokens)
+ + +
+[docs] + def pad_encoded_batch( + self, + inputs, + padding: str | PaddingType = None, + max_length: Optional[int] = None, + truncation: bool = True, + return_tensors: Optional[str] = None, + include_keys: Optional[List[str]] = None, + exclude_keys: List = None, + ): + """ + Pad a batch of encoded inputs. + + Args: + inputs: Input batch of encoded tokens. + padding (str | PaddingType): Padding type. + max_length (Optional[int]): Max input length (only if padding is set to "max_length"). + truncation (bool): Whether to allow truncation. + return_tensors (Optional[str]): The type of tensors to return. + include_keys: (Optional[List[str]]): Only pad these given set of keys + exclude_keys (List): A list of keys to exclude when padding. + + Returns: + Dict: Padded inputs. + """ + if isinstance(inputs, (list, tuple)) and isinstance(inputs[0], Mapping): + inputs = {key: [example[key] for example in inputs] for key in inputs[0].keys()} + + exclude_keys = exclude_keys or [] + exclude_keys += self.uncastable_keys # avoid possible errors + inputs = convert_batch_dict_dtype(inputs, dtype="list", skip_keys=exclude_keys) + + include_keys = include_keys or list(inputs.keys()) + + for key, batch in inputs.items(): + if key in exclude_keys: + continue + if key in include_keys: + pad_id = 0 if key == "attention_mask" else self.pad_token_id + padded_ids = pad_batch_items( + inputs[key], + padding_type=padding, + padding_side=self.config.padding_direction, + pad_id=pad_id, + max_length=max_length, truncation=truncation, + ) + inputs[key] = padded_ids + + inputs = convert_batch_dict_dtype(inputs, dtype=return_tensors, skip_keys=exclude_keys) + + return inputs
+ + + def __call__( + self, + inputs: List[str] | List[Tuple[str, str]], + device: str | torch.device = None, + add_special_tokens: bool = True, + padding_strategy=None, + truncation_strategy=None, + max_length: int = None, + return_tensors: str = "list", + stride: int = 0, + is_split_into_words: bool = False, + pad_to_multiple_of: int = None, + return_tokens: bool = None, + return_token_type_ids: bool = None, + return_attention_mask: bool = True, + return_overflowing_tokens: bool = False, + return_special_tokens_mask: bool = False, + return_offsets_mapping: bool = False, + return_length: bool = False, + return_word_ids: bool = False, + verbose: bool = True, + **kwargs, + ): + """ + Tokenize a batch of string inputs and return the relevant properties e.g, token ids, attention mask, etc. + + Args: + inputs: A list of string inputs to tokenize + add_special_tokens: Whether to add special tokens or not + padding_strategy: Determines how to pad inputs + truncation_strategy: Determines how to truncate inputs + max_length: Max input length of the sequences + return_tensors: The type of the returning tensors in the batch e.g, pt, np, list + stride: Stride level + is_split_into_words: Are inputs pre-tokenized or raw string inputs + pad_to_multiple_of: Pad inputs by a factor of this value + return_tokens: Whether to return tokens lists + return_token_type_ids: Whether to return token type ids + return_attention_mask: Whether to return attention masks + return_overflowing_tokens: Whether to return overflowing tokens + return_special_tokens_mask: Whether to return special tokens mask + return_offsets_mapping: Whether to return offsets + return_length: Whether to return input lengths + **kwargs: Extra arguments reside here and therefore ignored + + Returns: + A dictionary of encoded inputs like + {"token_ids": [batch_size x input_len], "attention_mask": [batch_size x input_len], ...} + """ + if isinstance(inputs, list) and not len(inputs): + raise ValueError("Tokenizer cannot process an empty list!") + + if isinstance(inputs, str): + inputs = [inputs] + + padding_strategy = padding_strategy or self.config.padding_strategy + truncation_strategy = truncation_strategy or self.config.truncation_strategy + max_length = max_length or self.config.max_length + pad_to_multiple_of = pad_to_multiple_of or self.config.pad_to_multiple_of + + self.set_truncation_and_padding( + padding_strategy=padding_strategy, + truncation_strategy=truncation_strategy, + padding_side=self.config.padding_direction, + truncation_side=self.config.truncation_direction, + max_length=max_length, + stride=self.config.stride, + pad_to_multiple_of=pad_to_multiple_of, + ) + encodings = self.encode( + inputs, + add_special_tokens=add_special_tokens, + is_pretokenized=is_split_into_words, + ) + encodings_dict = [ + self._convert_encodings( + encoding=encoding, + return_tokens=return_tokens, + return_token_type_ids=return_token_type_ids, + return_attention_mask=return_attention_mask, + return_overflowing_tokens=return_overflowing_tokens, + return_special_tokens_mask=return_special_tokens_mask, + return_offsets_mapping=return_offsets_mapping, + return_length=return_length, + return_word_ids=return_word_ids, + ) + for encoding in encodings + ] + # Permute output dict from [batch_0: Dict[key, value], ...] to Dict[key, [batch_0, batch_1, ...], ...] + sanitized_outputs = {} + for key in encodings_dict[0].keys(): + stack = [e for item in encodings_dict for e in item[key]] + sanitized_outputs[key] = stack + + # If returning overflowing tokens, we need to return a mapping + # from the batch idx to the original sample + if return_overflowing_tokens: + overflow_to_sample_mapping = [] + for i, encodings_ in enumerate(encodings_dict): + overflow_to_sample_mapping += [i] * len(encodings_["input_ids"]) + sanitized_outputs["overflow_to_sample_mapping"] = overflow_to_sample_mapping + + if return_tensors == "list" or return_tensors is None: + sanitized_outputs = { + key: value[0] if len(value) > 0 and isinstance(value[0], list) else value + for key, value in sanitized_outputs.items() + } + + outputs = convert_batch_dict_dtype(sanitized_outputs, dtype=return_tensors, skip_keys=self.uncastable_keys) + if device and return_tensors == "pt": + outputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in outputs.items()} + + return outputs + +
+[docs] + def set_truncation_and_padding( + self, + padding_strategy=None, + truncation_strategy=None, + padding_side=None, + truncation_side=None, + max_length: int = None, + stride: int = None, + pad_to_multiple_of: int = None, + ): + # Set truncation and padding on the backend tokenizer + if truncation_strategy == "no_truncation": + if self.truncation is not None: + self.no_truncation() + else: + target = { + "max_length": max_length, + "stride": stride, + "strategy": truncation_strategy, + "direction": truncation_side, + } + if self.truncation is None: + current = None + else: + current = {k: self.truncation.get(k, None) for k in target} + + if current != target: + self.enable_truncation(**target) + + if padding_strategy == "no_padding": + if self.padding is not None: + self.no_padding() + else: + length = max_length if self.config.padding_strategy == "max_length" else None + target = { + "length": length, + "direction": padding_side, + "pad_id": self.token_to_id(self.pad_token), + "pad_token": self.pad_token, + "pad_type_id": self.config.pad_token_type_id, + "pad_to_multiple_of": pad_to_multiple_of, + } + if self.padding != target: + self.enable_padding(**target)
+ + + def _convert_encodings( + self, + encoding, + return_tokens: bool = None, + return_token_type_ids: bool = None, + return_attention_mask: bool = None, + return_overflowing_tokens: bool = False, + return_special_tokens_mask: bool = False, + return_offsets_mapping: bool = False, + return_length: bool = False, + return_word_ids: bool = False, + ): + if return_overflowing_tokens and encoding.overflowing is not None: + encodings = [encoding] + encoding.overflowing + else: + encodings = [encoding] + + encoding_dict = defaultdict(list) + for e in encodings: + encoding_dict[self.token_ids_name].append(e.ids) + + if return_token_type_ids: + encoding_dict["token_type_ids"].append(e.type_ids) + if return_attention_mask: + encoding_dict["attention_mask"].append(e.attention_mask) + if return_special_tokens_mask: + encoding_dict["special_tokens_mask"].append(e.special_tokens_mask) + if return_offsets_mapping: + encoding_dict["offsets_mapping"].append(e.offsets) + if return_length: + encoding_dict["length"].append(len(e.ids)) + if return_tokens: + text = self._tokenizer.decode(e.ids) + tokens = self.get_tokens_from_offsets(text, e.ids, e.offsets) + encoding_dict["tokens"].append(tokens) + if return_word_ids: + encoding_dict["word_ids"].append(e.word_ids) + + return encoding_dict + +
+[docs] + def convert_tokens_to_ids(self, tokens: str | List[str]) -> int | List[int]: + if isinstance(tokens, str): + tokens = [tokens] + + return [self._tokenizer.token_to_id(token) for token in tokens]
+ + +
+[docs] + def convert_ids_to_tokens(self, ids: int | List[int], skip_special_tokens: bool = False): + if isinstance(ids, int): + ids = [ids] + tokens = [] + for index in ids: + index = int(index) + if skip_special_tokens and index in self.special_ids: + continue + tokens.append(self._tokenizer.id_to_token(index)) + return tokens
+ + +
+[docs] + def num_special_tokens_to_add(self, is_pair: bool) -> int: + return self._tokenizer.num_special_tokens_to_add(is_pair)
+ + +
+[docs] + def get_vocab(self, with_added_tokens: bool = True) -> Dict[str, int]: + return self._tokenizer.get_vocab(with_added_tokens=with_added_tokens)
+ + +
+[docs] + def get_vocab_size(self, with_added_tokens: bool = True) -> int: + return self._tokenizer.get_vocab_size(with_added_tokens=with_added_tokens)
+ + +
+[docs] + def enable_padding( + self, + direction: str = "right", + pad_to_multiple_of: int = None, + pad_id: int = 0, + pad_type_id: int = 0, + pad_token: str = "[PAD]", + length: int = None, + ): + return self._tokenizer.enable_padding( + direction=direction, + pad_to_multiple_of=pad_to_multiple_of, + pad_id=pad_id, + pad_type_id=pad_type_id, + pad_token=pad_token, + length=length, + )
+ + +
+[docs] + def no_padding(self): + return self._tokenizer.no_padding()
+ + +
+[docs] + def enable_truncation(self, max_length, stride=0, strategy="longest_first", direction="right"): + return self._tokenizer.enable_truncation(max_length, stride=stride, strategy=strategy, direction=direction)
+ + +
+[docs] + def no_truncation(self): + return self._tokenizer.no_truncation()
+ + +
+[docs] + def add_tokens(self, tokens) -> int: + return self._tokenizer.add_tokens(tokens)
+ + +
+[docs] + def add_special_tokens(self, special_tokens) -> int: + return self._tokenizer.add_special_tokens(special_tokens)
+ + +
+[docs] + def token_to_id(self, token: str) -> int: + return self._tokenizer.token_to_id(token)
+ + +
+[docs] + def id_to_token(self, id: int) -> str: + return self._tokenizer.id_to_token(id)
+ + +
+[docs] + def get_added_vocab(self) -> Dict[str, int]: + """ + Returns the added tokens in the vocabulary as a dictionary of token to index. + + Returns: + `Dict[str, int]`: The added tokens. + """ + base_vocab = self._tokenizer.get_vocab(with_added_tokens=False) + full_vocab = self._tokenizer.get_vocab(with_added_tokens=True) + added_vocab = {token: index for token, index in full_vocab.items() if token not in base_vocab} + return added_vocab
+ + + def __len__(self) -> int: + """ + Size of the full vocabulary with the added tokens. + """ + return self._tokenizer.get_vocab_size(with_added_tokens=True) + +
+[docs] + def get_tokens_from_offsets( + self, + text: str | List[str], + ids: List[int], + offsets_mapping: List[Tuple[int, int]], + ): + """ + Extract human-readable tokens using the original text and offsets mapping + Args: + text: Raw string text + ids: Token ids + offsets_mapping: A list of tuples representing offsets + + Returns: + A list of tokens + """ + if not isinstance(text, str): + raise ValueError(f"Expected str type for `text`, got `{type(text)}({text})`") + if isinstance(offsets_mapping, list) and not isinstance(offsets_mapping[0], Tuple): + raise ValueError(f"Expected a list of tuples for `offsets_mapping`, got List[{type(offsets_mapping[0])}]") + tokens = [] + for offset in offsets_mapping: + offset_start, offset_end = offset + tokens.append(text[offset_start:offset_end]) + for i, token in enumerate(tokens): + if ids[i] in self.special_ids: + tokens[i] = self._tokenizer.id_to_token(ids[i]) + return tokens
+ + +
+[docs] + @classmethod + def load( + cls, + hub_or_local_path, + subfolder=None, + config_filename=None, + tokenizer_filename=None, + cache_dir=None, + **kwargs, + ) -> "Tokenizer": + """ + Load a tokenizer from a specified path or Hub repository. + + Args: + cls: Class reference. + hub_or_local_path: Path or Hub repository ID. + subfolder: Subfolder containing tokenizer files. + config_filename: Tokenizer config filename. + tokenizer_filename: Tokenizer filename. + cache_dir: Path to cache directory + **kwargs: Additional arguments. + + Returns: + Tokenizer: Loaded tokenizer. + + """ + tokenizer_filename = tokenizer_filename or cls.tokenizer_filename + config_filename = config_filename or cls.tokenizer_config_filename + subfolder = subfolder or cls.preprocessor_subfolder + cache_dir = cache_dir or HEZAR_CACHE_DIR + + config = TokenizerConfig.load( + hub_or_local_path, + filename=config_filename, + subfolder=subfolder, + cache_dir=cache_dir, + ) + + if os.path.isdir(hub_or_local_path): + tokenizer_path = os.path.join(hub_or_local_path, subfolder, tokenizer_filename) + else: + tokenizer_path = hf_hub_download( + hub_or_local_path, + filename=tokenizer_filename, + subfolder=subfolder, + cache_dir=cache_dir, + resume_download=True, + ) + tokenizer = build_preprocessor(config.name, config, tokenizer_file=tokenizer_path, **kwargs) + return tokenizer
+ + +
+[docs] + def save(self, path, save_config=True, pretty=True): + """ + Save the tokenizer and its configuration. + + Args: + path (str): Path to save the tokenizer. + save_config (bool): Whether to save the configuration. + pretty (bool): Whether to format the saved JSON file with indentation. + + """ + os.makedirs(path, exist_ok=True) + # save config + if save_config: + self.config.vocab_size = self.get_vocab_size(with_added_tokens=True) + self.config.save(path, filename=self.tokenizer_config_filename, subfolder=self.preprocessor_subfolder) + # save tokenizer.json + save_path = os.path.join(path, self.preprocessor_subfolder, self.tokenizer_filename) + self._tokenizer.save(save_path, pretty=pretty)
+ + +
+[docs] + def push_to_hub( + self, + repo_id, + commit_message=None, + subfolder=None, + tokenizer_filename=None, + config_filename=None, + private=False, + ): + """ + Push tokenizer and config to the Hub + + Args: + repo_id: The path (id or repo name) on the hub + commit_message: Commit message for this push + subfolder: subfolder to save the files + tokenizer_filename: tokenizer filename + config_filename: tokenizer config filename + private: If the repo should be private (ignored if the repo exists) + """ + subfolder = subfolder or self.preprocessor_subfolder + tokenizer_filename = tokenizer_filename or self.tokenizer_filename + config_filename = config_filename or self.tokenizer_config_filename + + # create remote repo + create_repo(repo_id, exist_ok=True, private=private) + # save to tmp and prepare for push + cache_path = tempfile.mkdtemp() + # save tokenizer.json + tokenizer_save_path = os.path.join(cache_path, subfolder, tokenizer_filename) + self.save(cache_path, pretty=True) + + if commit_message is None: + commit_message = "Hezar: Upload tokenizer and config" + + # upload config + self.config.push_to_hub( + repo_id=repo_id, + filename=config_filename, + subfolder=subfolder, + commit_message=commit_message, + ) + # upload tokenizer + upload_file( + repo_id=repo_id, + path_or_fileobj=tokenizer_save_path, + repo_type="model", + path_in_repo=f"{subfolder}/{tokenizer_filename}", + commit_message=commit_message, + ) + logger.log_upload_success( + name=f"{self.__class__.__name__}(name={self.config.name})", + target_path=os.path.join(repo_id, subfolder, tokenizer_filename), + )
+ + + @property + def model(self) -> "Model": + return self._tokenizer.model + + @model.setter + def model(self, model: "Model"): + self._tokenizer.model = model # noqa + + @property + def decoder(self) -> "Decoder": + return self._tokenizer.decoder + + @decoder.setter + def decoder(self, decoder: "Decoder"): + self._tokenizer.decoder = decoder # noqa + + @property + def padding(self): + return self._tokenizer.padding + + @property + def truncation(self) -> dict: + return self._tokenizer.truncation + + @property + def vocab(self): + return self._tokenizer.get_vocab(with_added_tokens=True) + + @property + def vocab_size(self) -> int: + """ + `int`: Size of the base vocabulary (without the added tokens). + """ + return self._tokenizer.get_vocab_size(with_added_tokens=False) + + @property + def special_ids(self): + return [self.token_to_id(t) for t in self.special_tokens] + + @property + def pad_token(self): + return self.config.pad_token + + @property + def bos_token(self): + return self.config.bos_token + + @property + def eos_token(self): + return self.config.eos_token + + @property + def unk_token(self): + return self.config.unk_token + + @property + def mask_token(self): + return self.config.mask_token + + @property + def cls_token(self): + return self.config.cls_token + + @property + def sep_token(self): + return self.config.sep_token + + @property + def pad_token_id(self): + return self.token_to_id(self.config.pad_token) + + @property + def bos_token_id(self): + return self.token_to_id(self.config.bos_token) + + @property + def eos_token_id(self): + return self.token_to_id(self.config.eos_token) + + @property + def unk_token_id(self): + return self.token_to_id(self.config.unk_token) + + @property + def mask_token_id(self): + return self.token_to_id(self.config.mask_token) + + @property + def cls_token_id(self): + return self.token_to_id(self.config.cls_token) + + @property + def sep_token_id(self): + return self.token_to_id(self.config.sep_token)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/preprocessors/tokenizers/wordpiece.html b/_modules/hezar/preprocessors/tokenizers/wordpiece.html new file mode 100644 index 00000000..466d1a84 --- /dev/null +++ b/_modules/hezar/preprocessors/tokenizers/wordpiece.html @@ -0,0 +1,604 @@ + + + + + + + + hezar.preprocessors.tokenizers.wordpiece - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.preprocessors.tokenizers.wordpiece

+from dataclasses import dataclass, field
+from typing import List
+
+from ...constants import DEFAULT_TOKENIZER_CONFIG_FILE, DEFAULT_TOKENIZER_FILE, Backends
+from ...registry import register_preprocessor
+from ...utils import is_backend_available
+from .tokenizer import Tokenizer, TokenizerConfig
+
+
+if is_backend_available(Backends.TOKENIZERS):
+    from tokenizers import Tokenizer as HFTokenizer
+    from tokenizers import decoders, models, trainers
+
+_required_backends = [
+    Backends.TOKENIZERS,
+]
+
+
+
+[docs] +@dataclass +class WordPieceConfig(TokenizerConfig): + name = "wordpiece_tokenizer" + max_length: int = 512 + truncation_strategy: str = "longest_first" + truncation_direction: str = "right" + stride: int = 0 + padding_strategy: str = "longest" + padding_direction: str = "right" + pad_to_multiple_of: int = 0 + pad_token: str = "[PAD]" + unk_token: str = "[UNK]" + sep_token: str = "[SEP]" + cls_token: str = "[CLS]" + mask_token: str = "[MASK]" + pad_token_type_id: int = 0 + additional_special_tokens: List[str] = None + wordpieces_prefix: str = "##" + vocab_size: int = 30000 + min_frequency: int = 2 + limit_alphabet: int = 1000 + initial_alphabet: list = field(default_factory=list) + show_progress: bool = True
+ + + +
+[docs] +@register_preprocessor("wordpiece_tokenizer", config_class=WordPieceConfig) +class WordPieceTokenizer(Tokenizer): + """ + A standard WordPiece tokenizer using 🤗HuggingFace Tokenizers + + Args: + config: Preprocessor config for the tokenizer + **kwargs: Extra/manual config parameters + """ + + required_backends = _required_backends + + tokenizer_filename = DEFAULT_TOKENIZER_FILE + tokenizer_config_filename = DEFAULT_TOKENIZER_CONFIG_FILE + token_ids_name = "token_ids" + + def __init__(self, config, tokenizer_file=None, **kwargs): + super().__init__(config, tokenizer_file=tokenizer_file, **kwargs) + +
+[docs] + def build(self): + tokenizer = HFTokenizer(models.WordPiece(unk_token=self.config.unk_token)) # noqa + tokenizer.decoder = decoders.WordPiece(self.config.wordpieces_prefix) # noqa + return tokenizer
+ + +
+[docs] + def train(self, files: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.WordPieceTrainer( + vocab_size=self.config.vocab_size, + min_frequency=self.config.min_frequency, + limit_alphabet=self.config.limit_alphabet, + initial_alphabet=self.config.initial_alphabet, + special_tokens=self.config.special_tokens, + show_progress=self.config.show_progress, + continuing_subword_prefix=self.config.wordpieces_prefix, + ) + if isinstance(files, str): + files = [files] + self._tokenizer.train(files, trainer=trainer)
+ + +
+[docs] + def train_from_iterator(self, dataset: List[str], **train_kwargs): + """Train the model using the given files""" + self.config.update(train_kwargs) + + trainer = trainers.WordPieceTrainer( + vocab_size=self.config.vocab_size, + min_frequency=self.config.min_frequency, + limit_alphabet=self.config.limit_alphabet, + initial_alphabet=self.config.initial_alphabet, + special_tokens=self.config.special_tokens, + show_progress=self.config.show_progress, + continuing_subword_prefix=self.config.wordpieces_prefix, + ) + self._tokenizer.train_from_iterator(dataset, trainer=trainer, length=len(dataset))
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/registry.html b/_modules/hezar/registry.html new file mode 100644 index 00000000..6777f732 --- /dev/null +++ b/_modules/hezar/registry.html @@ -0,0 +1,699 @@ + + + + + + + + hezar.registry - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.registry

+r"""
+Hezar uses a registry system in a way that for any core module like model, dataset, etc. there is an entry in its
+specific registry. These registries are simple python dictionaries that map a module's name to its class and its config
+class. These registries are initialized here and filled automatically when you import hezar or a registry itself.
+
+Examples:
+    >>> # read models registry
+    >>> from hezar.registry import models_registry
+    >>> print(models_registry)
+    {'distilbert_lm': {'module_class': <class 'hezar.models.language_modeling.distilbert.distilbert_lm.DistilBertLM'>,
+    'config_class': <class 'hezar.models.language_modeling.distilbert.distilbert_lm_config.DistilBertLMConfig'>},
+    'description': 'Optional model description here...'}
+
+    >>> # add a model class to models_registry
+    >>> from hezar.models import Model, register_model
+    >>> @register_model(name="my_awesome_model", config_class=MyAwesomeModelConfig, description="My Awesome Model!")
+    >>> class MyAwesomeModel(Model):
+    ...    def __init__(config: MyAwesomeModelConfig):
+    ...        ...
+
+Keep in mind that registries usually don't need to be used directly. There is a bunch of functions to build modules
+using a module's registry name in `hezar.builders` module. See the file `builders.py` for more info.
+
+Note: In case of adding a new registry container, make sure to add to `__all__` below!
+"""
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Dict, Optional, Type
+
+
+if TYPE_CHECKING:
+    from .configs import (
+        Config,
+        DatasetConfig,
+        EmbeddingConfig,
+        MetricConfig,
+        ModelConfig,
+        PreprocessorConfig,
+    )
+
+from .utils import Logger
+
+
+__all__ = [
+    "register_model",
+    "register_preprocessor",
+    "register_dataset",
+    "register_embedding",
+    "register_metric",
+    "Registry",
+    "models_registry",
+    "preprocessors_registry",
+    "datasets_registry",
+    "embeddings_registry",
+    "metrics_registry",
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +@dataclass +class Registry: + module_class: type + config_class: type = None + description: Optional[str] = None
+ + + +models_registry: Dict[str, Registry] = {} +preprocessors_registry: Dict[str, Registry] = {} +datasets_registry: Dict[str, Registry] = {} +embeddings_registry: Dict[str, Registry] = {} +metrics_registry: Dict[str, Registry] = {} + + +def _register_module( + cls: Type, + registry: Dict[str, Registry], + module_name: str, + config_class: Type["Config"], + description: str = None +): + """ + Add module to the registry. + + Args: + cls: The module class + registry: Module's registry container + module_name: Module's registry name (key) + config_class: Module's config class + description: Optional description for the module + """ + if module_name in registry: + logger.warning(f"`{module_name}` is already registered. Overwriting...") + + if config_class.name != module_name: + raise ValueError( + f"Module's registry name and `config.name` are not compatible for `{cls.__name__}`\n" + f"Registry name: {module_name}\n" + f"{config_class.__name__}.name: {config_class.name}" + ) + registry[module_name] = Registry(module_class=cls, config_class=config_class, description=description) + + +
+[docs] +def register_model(model_name: str, config_class: Type["ModelConfig"], description: str = None): + """ + A class decorator that adds the model class and the config class to the `models_registry` + + Args: + model_name: Model's registry name e.g, `bert_sequence_labeling` + config_class: Model's config class e.g, `BertSequenceLabelingConfig`. This parameter must be the config class + itself not a config instance! + description: Optional model description + """ + + def register(cls): + _register_module(cls, models_registry, model_name, config_class, description) + return cls + + return register
+ + + +
+[docs] +def register_dataset(dataset_name: str, config_class: Type["DatasetConfig"], description: str = None): + """ + A class decorator that adds the dataset class and the config class to the `datasets_registry` + + Args: + dataset_name: Dataset's registry name e.g, `text_classification`. + config_class: Dataset's config class e.g, `TextClassificationDatasetConfig`. This parameter must be the config + class itself not a config instance! + description: Optional dataset description + """ + + def register(cls): + _register_module(cls, datasets_registry, dataset_name, config_class, description) + return cls + + return register
+ + + +
+[docs] +def register_preprocessor(preprocessor_name: str, config_class: Type["PreprocessorConfig"], description: str = None): + """ + A class decorator that adds the preprocessor class and the config class to the `preprocessors_registry` + + Args: + preprocessor_name: Preprocessor's registry name e.g, `bpe_tokenizer`. + config_class: Preprocessor's config class e.g, BPEConfig. This parameter must be the config + class itself not a config instance! + description: Optional preprocessor description + """ + + def register(cls): + _register_module(cls, preprocessors_registry, preprocessor_name, config_class, description) + return cls + + return register
+ + + +
+[docs] +def register_embedding(embedding_name: str, config_class: Type["EmbeddingConfig"], description: str = None): + """ + A class decorator that adds the embedding class and the config class to the `embeddings_registry` + + Args: + embedding_name: Embedding's registry name e.g, `word2vec_cbow`. + config_class: Embedding's config class e.g, Word2VecCBOWConfig. This parameter must be the config + class itself not a config instance! + description: Optional embedding description + """ + + def register(cls): + _register_module(cls, embeddings_registry, embedding_name, config_class, description) + return cls + + return register
+ + + +
+[docs] +def register_metric(metric_name: str, config_class: Type["MetricConfig"], description: str = None): + """ + A class decorator that adds the metric class and the config class to the `metrics_registry` + + Args: + metric_name: Metric registry name e.g, `f1` + config_class: Metric config class + description: Optional metric description + """ + + def register(cls): + _register_module(cls, metrics_registry, metric_name, config_class, description) + return cls + + return register
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/trainer/metrics_handlers.html b/_modules/hezar/trainer/metrics_handlers.html new file mode 100644 index 00000000..b0c14666 --- /dev/null +++ b/_modules/hezar/trainer/metrics_handlers.html @@ -0,0 +1,715 @@ + + + + + + + + hezar.trainer.metrics_handlers - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.trainer.metrics_handlers

+from __future__ import annotations
+
+from typing import List
+
+import numpy as np
+import torch
+
+from ..builders import build_metric
+from ..configs import MetricConfig
+from ..constants import MetricType
+from ..metrics import Metric
+from .trainer_utils import MetricsTracker
+
+
+__all__ = [
+    "MetricsHandler",
+    "TextClassificationMetricsHandler",
+    "SequenceLabelingMetricsHandler",
+    "SpeechRecognitionMetricsHandler",
+    "AudioClassificationMetricsHandler",
+    "Image2TextMetricHandler",
+    "TextGenerationMetricsHandler",
+]
+
+
+
+[docs] +class MetricsHandler: + """ + Base metrics handler class for computing metrics. Subclasses must implement `compute_metrics` method based on + their specific task. + + Args: + metrics: A list of metrics (metric raw name or Metric object) + model_config: Optional model config + trainer_config: Optional trainer config + """ + valid_metrics: List[MetricType] = [] + + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None, **kwargs): + self.metrics = self._setup_metrics(metrics) + self.trainer = trainer + self.tracker = MetricsTracker(self.metrics) + self.objective = self._configure_objective() + + def _configure_objective(self): + target_metric = self.trainer.config.metric_for_best_model + objective_metric = target_metric.split(".")[1] if "." in target_metric else target_metric + if "loss" in objective_metric: + objective = "minimize" + else: + if objective_metric not in self.metrics: + raise ValueError( + f"{objective_metric} is not a valid metric for this task, " + f"available metrics: {list(self.tracker.trackers.values())}" + ) + objective = self.metrics[objective_metric].config.objective + return objective + + def _setup_metrics(self, metrics): + metrics_dict = {} + metrics = metrics or [] + if not len(metrics): + return metrics_dict + for metric in metrics: + if isinstance(metric, str): + if metric not in self.valid_metrics: + raise ValueError(f"Invalid metric `{metric}`! Available metrics: {self.valid_metrics}") + metrics_dict[metric] = build_metric(metric) + elif isinstance(metric, MetricConfig): + metrics_dict[metric.name] = build_metric(metric.name, config=metric) + else: + raise ValueError(f"Invalid metric type `{type(metric)}`! Available metrics: {self.valid_metrics}") + return metrics_dict + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + """ + Given a batch of predictions and a batch of labels, compute all metrics + + Args: + predictions: Predictions batch usually containing logits + labels: Ground truth labels batch + """ + raise NotImplementedError
+
+ + + +
+[docs] +class TextClassificationMetricsHandler(MetricsHandler): + valid_metrics = [ + MetricType.ACCURACY, + MetricType.RECALL, + MetricType.PRECISION, + MetricType.F1, + ] + + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + predictions = np.array(predictions).argmax(1).flatten() + labels = np.array(labels).flatten() + results = {} + for metric_name, metric in self.metrics.items(): + results.update(metric.compute(predictions, labels)) + return results
+
+ + + +
+[docs] +class SequenceLabelingMetricsHandler(MetricsHandler): + valid_metrics = [MetricType.SEQEVAL] + + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + predictions = np.array(predictions).argmax(2).squeeze() + labels = np.array(labels).squeeze() + + # Remove ignored index (special tokens) and append `B-` in the beginning for seqeval + prefix = "" if self.trainer.train_dataset.config.is_iob_schema else "B-" + true_predictions = [ + [f"{prefix}{self.trainer.model.config.id2label[p]}" for (p, l) in zip(prediction, label) if l != -100] + for prediction, label in zip(predictions, labels) + ] + true_labels = [ + [f"{prefix}{self.trainer.model.config.id2label[l]}" for (p, l) in zip(prediction, label) if l != -100] + for prediction, label in zip(predictions, labels) + ] + + results = {} + for metric_name, metric in self.metrics.items(): + x = metric.compute(true_predictions, true_labels) + results.update(x) + return results
+
+ + + +
+[docs] +class Image2TextMetricHandler(MetricsHandler): + valid_metrics = [MetricType.CER, MetricType.WER] + + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + predictions = self.trainer.model.post_process(torch.tensor(predictions)) + labels = self.trainer.model.post_process(torch.tensor(labels)) + predictions = [x["text"] for x in predictions] + labels = [x["text"] for x in labels] + results = {} + for metric_name, metric in self.metrics.items(): + x = metric.compute(predictions, labels) + results.update(x) + return results
+
+ + + +
+[docs] +class SpeechRecognitionMetricsHandler(MetricsHandler): + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + return {}
+
+ + + +
+[docs] +class TextGenerationMetricsHandler(MetricsHandler): + valid_metrics = [MetricType.ROUGE, MetricType.BLEU] + + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + predictions = self.trainer.model.post_process(torch.tensor(predictions)) + labels = self.trainer.model.post_process(torch.tensor(labels)) + predictions = [x["text"] for x in predictions] + labels = [x["text"] for x in labels] + results = {} + for metric_name, metric in self.metrics.items(): + x = metric.compute(predictions, labels) + results.update(x) + return results
+
+ + + +
+[docs] +class AudioClassificationMetricsHandler(MetricsHandler): + def __init__(self, metrics: List[str | MetricType | Metric | MetricConfig], trainer=None): + super().__init__(metrics=metrics, trainer=trainer) + +
+[docs] + def compute_metrics(self, predictions, labels, **kwargs): + return {}
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/trainer/trainer.html b/_modules/hezar/trainer/trainer.html new file mode 100644 index 00000000..6c8f78d6 --- /dev/null +++ b/_modules/hezar/trainer/trainer.html @@ -0,0 +1,1282 @@ + + + + + + + + hezar.trainer.trainer - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.trainer.trainer

+from __future__ import annotations
+
+import os
+import random
+from typing import TYPE_CHECKING, Any, Callable, Dict, Tuple
+
+import numpy as np
+import pandas as pd
+import torch
+from huggingface_hub import create_repo, hf_hub_download, upload_file
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+from ..configs import TrainerConfig
+from ..constants import (
+    DEFAULT_DATASET_CONFIG_FILE,
+    DEFAULT_TRAINER_CONFIG_FILE,
+    DEFAULT_TRAINER_CSV_LOG_FILE,
+    DEFAULT_TRAINER_STATE_FILE,
+    DEFAULT_TRAINER_SUBFOLDER,
+    HEZAR_CACHE_DIR,
+    TQDM_BAR_FORMAT,
+    Backends,
+    LRSchedulerType,
+    OptimizerType,
+    TaskType,
+)
+from ..data.datasets import Dataset
+from ..models import Model
+from ..preprocessors import Preprocessor, PreprocessorsContainer
+from ..utils import Logger, colorize_text, is_backend_available, sanitize_function_parameters
+from .metrics_handlers import (
+    AudioClassificationMetricsHandler,
+    Image2TextMetricHandler,
+    MetricsHandler,
+    SequenceLabelingMetricsHandler,
+    SpeechRecognitionMetricsHandler,
+    TextClassificationMetricsHandler,
+    TextGenerationMetricsHandler,
+)
+from .trainer_utils import CSVLogger, TrainerState, resolve_logdir, write_to_tensorboard
+
+
+if TYPE_CHECKING:
+    from accelerate import Accelerator
+
+logger = Logger(__name__)
+
+optimizers = {
+    OptimizerType.ADAM: torch.optim.Adam,
+    OptimizerType.ADAMW: torch.optim.AdamW,
+    OptimizerType.SDG: torch.optim.SGD,
+}
+lr_schedulers = {
+    LRSchedulerType.REDUCE_LR_ON_PLATEAU: torch.optim.lr_scheduler.ReduceLROnPlateau,
+    LRSchedulerType.COSINE_LR: torch.optim.lr_scheduler.CosineAnnealingLR,
+}
+
+task_to_metrics_handlers_mapping = {
+    TaskType.TEXT_CLASSIFICATION: TextClassificationMetricsHandler,
+    TaskType.SEQUENCE_LABELING: SequenceLabelingMetricsHandler,
+    TaskType.IMAGE2TEXT: Image2TextMetricHandler,
+    TaskType.SPEECH_RECOGNITION: SpeechRecognitionMetricsHandler,
+    TaskType.AUDIO_CLASSIFICATION: AudioClassificationMetricsHandler,
+    TaskType.TEXT_GENERATION: TextGenerationMetricsHandler,
+}
+
+
+
+[docs] +class Trainer: + """ + Base trainer class for training all Hezar models and all tasks. Usually you can use this class as-is, but for special + cases you can also override any of the core methods in your own custom Trainer. + + Args: + model ([`Model`] or `torch.nn.Module`): The main model to train and evaluate + config (TrainerConfig): Training configuration and parameters + train_dataset (Dataset): Train dataset + eval_dataset (Dataset): Evaluation dataset + data_collator: Collate function, usually included in the dataset object itself + preprocessor: Preprocessor object(s) + metrics_handler: Optional metrics handler + optimizer (optim.Optimizer): Model optimizer + lr_scheduler: Optional learning-rate scheduler + accelerator (Accelerator) : Accelerator object for a customized distributed environment + """ + + trainer_subfolder = DEFAULT_TRAINER_SUBFOLDER + trainer_config_file = DEFAULT_TRAINER_CONFIG_FILE + trainer_csv_log_file = DEFAULT_TRAINER_CSV_LOG_FILE + dataset_config_file = DEFAULT_DATASET_CONFIG_FILE + default_trainer_state_file = DEFAULT_TRAINER_STATE_FILE + default_optimizer = OptimizerType.ADAM + default_lr_scheduler = LRSchedulerType.REDUCE_LR_ON_PLATEAU + + def __init__( + self, + model: Model = None, + config: TrainerConfig = None, + train_dataset: Dataset = None, + eval_dataset: Dataset = None, + data_collator: Callable = None, + preprocessor: Preprocessor | PreprocessorsContainer = None, + metrics_handler: MetricsHandler = None, + optimizer: torch.optim.Optimizer = None, + lr_scheduler=None, + accelerator: "Accelerator" = None, + ): + self.config = config + self.device = "cuda" if torch.cuda.is_available() and not self.config.use_cpu else "cpu" + + # Set determinism + self._set_seed(self.config.seed) + + # Setup model and preprocessor(s) + self.model = self._setup_model(model) + if self.model.preprocessor is None: + if preprocessor is not None: + self.model.preprocessor = preprocessor + else: + raise ValueError( + "You must set a preprocessor for the model or pass the preprocessor parameter to the Trainer!" + ) + + # Configure datasets and data loaders + self.train_dataset = train_dataset + self.eval_dataset = eval_dataset + self.data_collator = data_collator or self.train_dataset.data_collator + self.train_dataloader, self.eval_dataloader = self._setup_dataloaders() + + # Setup optimizer and (optionally) lr scheduler + self.optimizer, self.lr_scheduler = self._setup_optimizers(optimizer, lr_scheduler) + + # Setup accelerated objects if possible + if accelerator is None and self.config.distributed and not self.config.use_cpu: + self.accelerator = self._setup_accelerator() + self.scaler = self.accelerator.scaler + self.device = self.accelerator.device + else: + self.accelerator = accelerator + enabled = True if ( + self.config.mixed_precision is not None and not (self.config.use_cpu or self.device == "cpu") + ) else False + self.scaler = torch.cuda.amp.GradScaler(enabled=enabled) + + # Setup metrics handler and inner trackers for the trainer + self.metrics_handler = metrics_handler or self._setup_metrics_handler() + + # Configure checkpoints and logging directories + self.logs_dir = os.path.join(self.config.output_dir, self.config.logs_dir) + self.checkpoints_dir = os.path.join(self.config.output_dir, self.config.checkpoints_dir) + + # Setup logging properties + self.tensorboard = SummaryWriter(log_dir=resolve_logdir(self.logs_dir)) + self.csv_logger = CSVLogger(logs_dir=self.logs_dir, csv_filename=self.trainer_csv_log_file) + + self.current_epoch = 1 + + # Configure trainer state + self.state = TrainerState( + epoch=self.current_epoch, + total_epochs=self.config.num_epochs, + metric_for_best_checkpoint=self.config.metric_for_best_model, + ) + + @staticmethod + def _set_seed(seed): + torch.manual_seed(seed) + np.random.seed(seed) + random.seed(seed) + + def _setup_model(self, model: Model) -> Model: + """ + Download the model from HuggingFace Hub if `init_weights_from` is given in the config. Load the model to the + device and return it. + """ + if model is None: + raise ValueError("`model` must be given to the Trainer!") + hub_path = self.config.init_weights_from + if hub_path is not None: + if os.path.isdir(hub_path): + model_path = os.path.join(hub_path, model.model_filename) + else: + model_path = hf_hub_download( + hub_path, + filename=model.model_filename, + cache_dir=HEZAR_CACHE_DIR, + resume_download=True, + ) + model.load_state_dict(torch.load(model_path, map_location="cpu")) + model.to(self.device) + return model + + def _setup_dataloaders(self) -> Tuple[DataLoader, DataLoader | None]: + """ + Set up data loaders (train/eval) and return them. + + Returns: + A tuple of train and eval dataloaders + """ + if self.train_dataset is not None: + train_dataloader = DataLoader( + dataset=self.train_dataset, + batch_size=self.config.batch_size, + collate_fn=self.data_collator, + num_workers=self.config.num_dataloader_workers, + drop_last=True, + shuffle=True, + ) + else: + raise ValueError("Cannot create train dataloader because `train_dataset` is not given!") + if self.eval_dataset is not None: + eval_dataloader = DataLoader( + dataset=self.eval_dataset, + batch_size=self.config.eval_batch_size or self.config.batch_size, + collate_fn=self.data_collator, + num_workers=self.config.num_dataloader_workers, + drop_last=True, + shuffle=True, + ) + else: + logger.warning( + "Cannot create eval dataloader because `eval_dataset` is not given to the Trainer! " + "Setting eval_dataloader to None..." + ) + eval_dataloader = None + + return train_dataloader, eval_dataloader + + def _setup_optimizers(self, optimizer: torch.optim.Optimizer = None, lr_scheduler=None): + """ + Set up the optimizer and lr lr_scheduler if they're not already given + + Args: + optimizer: If None do nothing and return it, otherwise build it using the train config + lr_scheduler: If None do nothing and return it, otherwise build it using the train config + + Returns: + Optimizer and lr_scheduler + """ + if optimizer is None: + optimizer_type = self.config.optimizer or self.default_optimizer + optimizer = optimizers[optimizer_type]( + self.model.parameters(), + lr=self.config.learning_rate, + weight_decay=self.config.weight_decay, + ) + + if lr_scheduler is None: + scheduler_name = self.config.lr_scheduler or self.default_lr_scheduler + if scheduler_name is None: + lr_scheduler = None + else: + lr_scheduler = lr_schedulers[scheduler_name](optimizer, verbose=True) + return optimizer, lr_scheduler + + def _setup_accelerator(self): + if is_backend_available(Backends.ACCELERATE): + from accelerate import Accelerator + + accelerator = Accelerator( + mixed_precision=self.config.mixed_precision, + step_scheduler_with_optimizer=True if self.lr_scheduler is not None else False, + ) + self.model, self.optimizer, self.lr_scheduler, self.train_dataloader, self.eval_dataloader = ( + accelerator.prepare( + self.model, + self.optimizer, + self.lr_scheduler, + self.train_dataloader, + self.eval_dataloader, + ) + ) + else: + raise ValueError( + "The configuration for this trainer requires the package `accelerate` to be installed! " + "(config.distributed=True)" + ) + + return accelerator + + def _setup_metrics_handler(self): + """ + Setup MetricsHandler instance for the trainer + + Returns: + A MetricsHandler subclass instance based on self.config.task + """ + metrics_handler_cls = task_to_metrics_handlers_mapping[self.config.task] + metrics_handler = metrics_handler_cls(metrics=self.config.metrics, trainer=self) # noqa + return metrics_handler + +
+[docs] + def load_from_checkpoint(self, checkpoint: str | bool = True, load_best: bool = False): + """ + Load trainer states like model weights, optimizer, etc. from a checkpoint + + Args: + checkpoint: Path to checkpoint directory + load_best: Whether to load the best checkpoint or not, if False, loads the latest checkpoint + """ + if os.path.isdir(checkpoint) and load_best: + logger.warning("The `load_best` parameter has no effect when `checkpoint` is a path!") + + self.state = TrainerState.load(os.path.join(self.checkpoints_dir, self.default_trainer_state_file)) + if isinstance(checkpoint, bool): + if load_best: + checkpoint = os.path.join(self.checkpoints_dir, str(self.state.best_checkpoint)) + else: + checkpoint = os.path.join(self.checkpoints_dir, str(self.state.epoch)) + if os.path.isdir(checkpoint): + # Figure out the epoch number + epoch = os.path.basename(checkpoint) if os.path.basename(checkpoint).isdigit() else self.state.epoch + if str(epoch).isdigit(): + self.state.epoch = int(epoch) + # Load model's state dict + model_path = os.path.join(checkpoint, self.model.model_filename) + if os.path.isfile(model_path): + self.model.load_state_dict(torch.load(model_path)) + if self.accelerator is not None: + self.model = self.accelerator.prepare(self.model) + logger.info(f"Successfully loaded checkpoint from `{checkpoint}` ") + else: + raise FileNotFoundError( + f"Could not find `{self.model.model_filename}` at `{os.path.dirname(model_path)}`!\n" + ) + else: + logger.warning( + f"{checkpoint} does not seem to be a valid checkpoint!" + )
+ + +
+[docs] + def load_csv_logs(self, logs_dir=None): + """ + Load the CSV log file + Args: + logs_dir: Path to logs directory, defaults to self.config.logs_dir + + Returns: + Logs dictionary + """ + logs_dir = logs_dir or self.config.logs_dir + csv_path = os.path.join(logs_dir, self.trainer_csv_log_file) + logs = pd.read_csv(csv_path) + return logs.to_dict()
+ + +
+[docs] + def prepare_input_batch(self, input_batch) -> Dict[str, torch.Tensor]: + """ + Every operation required to prepare the inputs for model forward like moving to device, permutations, etc. + + Args: + input_batch: Raw input batch from the dataloader + + Returns: + The proper input batch required by model forward + """ + # Put inputs on device manually if accelerator is not available, otherwise it's taken care of by the accelerator + if self.accelerator is None: + input_batch = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v for k, v in input_batch.items()} + + return input_batch
+ + +
+[docs] + def amp_context_manager(self): + """ + An auto context manager for mixed precision. + + Returns: + A torch autocast context manager + """ + if self.accelerator is not None: + context_manager = self.accelerator.autocast() + else: + device_type = "cuda" if "cuda" in self.device else "cpu" + dtype = torch.bfloat16 if self.config.mixed_precision == "bf16" or device_type == "cpu" else torch.float16 + enabled = self.config.mixed_precision is not None or self.config.mixed_precision == "no" + context_manager = torch.autocast( + device_type=device_type, + dtype=dtype, + enabled=enabled + ) + return context_manager
+ + +
+[docs] + def forward(self, input_batch): + """ + Perform model forward on the input batch + + In special cases, one can override this method in their desired trainer. + + Args: + input_batch: Input batch + + Returns: + Model outputs + """ + if isinstance(input_batch, torch.Tensor): + outputs = self.model(input_batch) + elif isinstance(input_batch, Dict): + forward_inputs = sanitize_function_parameters(self.model.forward, input_batch) + outputs = self.model(**forward_inputs) + else: + raise ValueError( + f"`input_batch` must be a tensor or a dict-like object containing key/value pairs of tensors, " + f"but got {type(input_batch)}" + ) + if not isinstance(outputs, Dict): + raise ValueError(f"Model outputs must be dict-like not `{type(outputs)}`") + + return outputs
+ + +
+[docs] + def compute_loss(self, model_outputs: Dict, labels: torch.Tensor, **kwargs) -> torch.Tensor: + """ + Compute loss from model outputs + + Args: + model_outputs: Logits from model outputs + labels: Ground truth labels + + Returns: + The loss tensor + """ + compute_loss_inputs = sanitize_function_parameters(self.model.compute_loss, model_outputs, **kwargs) + compute_loss_inputs["labels"] = labels + + loss = self.model.compute_loss(**compute_loss_inputs) + + return loss
+ + +
+[docs] + def training_step(self, input_batch: Dict[str, torch.Tensor]) -> Dict[str, Any]: + """ + Train one batch of data and return loss and model outputs + + Args: + input_batch: A batch of inputs to train + + Returns: + Train step outputs including loss, logits, etc. + """ + with self.amp_context_manager(): + outputs = self.forward(input_batch) + + loss = self.compute_loss(outputs, **input_batch) + + if self.accelerator is not None: + self.accelerator.backward(loss) + self.optimizer.step() + else: + self.scaler.scale(loss).backward() + self.scaler.step(self.optimizer) + self.scaler.update() + + self.optimizer.zero_grad() + + outputs["loss"] = loss.item() if isinstance(loss, torch.Tensor) else loss + + return outputs
+ + +
+[docs] + def evaluation_step(self, input_batch: Dict[str, torch.Tensor]) -> Dict[str, Any]: + """ + Evaluate one batch of data and return loss and model outputs + + Args: + input_batch: A batch of inputs to evaluate + + Returns: + Evaluation step outputs including loss, logits, etc. + """ + with self.amp_context_manager(): + outputs = self.forward(input_batch) + + loss = self.compute_loss(outputs, **input_batch) + + if self.model.is_generative and self.config.evaluate_with_generate: + generate_inputs = sanitize_function_parameters(self.model.generate, input_batch) + generated_ids = self.model.generate(**generate_inputs) + outputs["logits"] = generated_ids["generated_ids"] if isinstance(generated_ids, dict) else generated_ids + + outputs["loss"] = loss.item() if isinstance(loss, torch.Tensor) else loss + + return outputs
+ + +
+[docs] + def inner_training_loop(self, epoch_num: int): + """ + Train the model for one epoch on the whole train dataset and verbose live metric values in the progress bar + + Args: + epoch_num: Number of the current epoch + + Returns: + Metrics averages through the full iteration + """ + losses_sum = 0.0 + self.model.train() + with tqdm( + self.train_dataloader, + unit="batch", + desc=f"Epoch: {epoch_num}/{self.config.num_epochs} ", + bar_format=TQDM_BAR_FORMAT, + ascii=" #", + ) as iterator: + for step, input_batch in enumerate(iterator): + input_batch = self.prepare_input_batch(input_batch) + # Training on one batch + outputs = self.training_step(input_batch) + losses_sum += outputs["loss"] + # Gather outputs for metrics + avg_loss = losses_sum / (step + 1) + iterator.set_postfix(loss=avg_loss) + self.state.global_step += 1 + + return {"loss": avg_loss}
+ + +
+[docs] + def evaluate(self): + """ + Evaluates the model on the whole eval dataset and verbose live metric values in the progress bar + + Returns: + Evaluation results + """ + self.metrics_handler.tracker.reset() + self.model.eval() + with tqdm( + self.eval_dataloader, + unit="batch", + desc="Evaluating... ", + bar_format=TQDM_BAR_FORMAT, + ascii=" #", + ) as iterator: + with torch.inference_mode(): + for step, input_batch in enumerate(iterator): + input_batch = self.prepare_input_batch(input_batch) + # Evaluation on one batch + outputs = self.evaluation_step(input_batch) + logits = outputs["logits"].detach().cpu().numpy() + labels = input_batch["labels"].detach().cpu().numpy() + # Compute metrics + evaluation_results = self.metrics_handler.compute_metrics(logits, labels) + evaluation_results["loss"] = outputs["loss"] + # Gather outputs for metrics + self.metrics_handler.tracker.update(evaluation_results) + iterator.set_postfix(**self.metrics_handler.tracker.avg()) + + return self.metrics_handler.tracker.avg()
+ + +
+[docs] + def print_info(self): + """ + Print training info + """ + + def _print_info_line(key, value): + line = f" {colorize_text(key, 'bold')}: `{colorize_text(str(value), 'italic')}`" + print(line) + + header = f"{'*' * 20} Training Info {'*' * 20}" + footer = "*" * len(header) + + # Header + print(f"\n{colorize_text(header, 'bold')}\n") + # Info + _print_info_line("Output Directory", self.config.output_dir) + _print_info_line("Task", self.config.task) + _print_info_line("Model", type(self.model).__name__) + _print_info_line("Init Weights", self.config.init_weights_from or "N/A") + _print_info_line("Device(s)", self.device) + _print_info_line("Training Dataset", self.train_dataset) + _print_info_line("Evaluation Dataset", self.eval_dataset) + _print_info_line("Optimizer", self.config.optimizer or self.default_optimizer) + _print_info_line("Initial Learning Rate", self.config.learning_rate) + _print_info_line("Learning Rate Decay", self.config.weight_decay) + _print_info_line("Epochs", self.config.num_epochs) + _print_info_line("Batch Size", self.config.batch_size) + _print_info_line("Number of Parameters", self.model.num_parameters) + _print_info_line("Number of Trainable Parameters", self.model.num_trainable_parameters) + _print_info_line("Mixed Precision", self.config.mixed_precision or "Full (fp32)") + _print_info_line("Metrics", list(self.metrics_handler.metrics.keys())) + _print_info_line("Checkpoints Path", self.checkpoints_dir) + _print_info_line("Logs Path", self.logs_dir) + # Footer + print(f"\n{colorize_text(footer, 'bold')}\n")
+ + +
+[docs] + def train(self, resume_from_checkpoint: str | bool = None): + """ + The full training process like training, evaluation, logging and saving model checkpoints. + + Args: + resume_from_checkpoint: Resume from checkpoint path (if value is a path) or automatically load from the + latest checkpoint (if value is True) + """ + if resume_from_checkpoint: + self.load_from_checkpoint(resume_from_checkpoint) + if self.current_epoch >= self.config.num_epochs: + logger.info( + f"Unable to resume from `{os.path.join(self.checkpoints_dir, str(self.state.epoch))}` " + f"since it belongs to the ending epoch!" + ) + self.current_epoch = self.state.epoch + 1 + + self.print_info() + + for epoch in range(self.current_epoch, self.config.num_epochs + 1): + print() + training_results = self.inner_training_loop(epoch) + evaluation_results = self.evaluate() + if self.lr_scheduler is not None: + self.lr_scheduler.step(evaluation_results["loss"]) + + train_logs = {f"train.{metric_name}": value for metric_name, value in training_results.items()} + evaluation_logs = {f"evaluation.{metric_name}": value for metric_name, value in evaluation_results.items()} + all_logs = {**train_logs, **evaluation_logs} + + self.state.epoch = epoch + self.state.update_best_results( + metric_value=all_logs[self.config.metric_for_best_model], + objective=self.metrics_handler.objective, + step=epoch, + ) + + # maybe save checkpoint + if epoch % self.config.save_freq == 0: + ckpt_save_path = os.path.join(self.checkpoints_dir, str(epoch)) + self.save(ckpt_save_path) + + self.log(train_logs, evaluation_logs, epoch) + + logger.info("Training done!")
+ + +
+[docs] + def log(self, train_logs: Dict[str, Any], evaluation_logs: Dict[str, Any], step: int): + """ + Log metrics results + """ + # Log to tensorboard + write_to_tensorboard(self.tensorboard, train_logs, step) + write_to_tensorboard(self.tensorboard, evaluation_logs, step) + + # Log to CSV + self.csv_logger.write({**train_logs, **evaluation_logs}, step) + + # Save trainer state + self.state.save( + os.path.join( + self.checkpoints_dir, + self.default_trainer_state_file, + ) + )
+ + +
+[docs] + def save( + self, + path: str, + config_filename=None, + model_filename=None, + model_config_filename=None, + subfolder=None, + dataset_config_file=None, + ): + """ + Save the trainer and relevant files to a path. + + Files to save are train config, model weights, model config, preprocessor files and preprocessor config. + + Args: + path: A directory to save everything + config_filename: Config filename + model_filename: Model file name + model_config_filename: Model config file name + subfolder: Optional sub-folder + dataset_config_file: Dataset config filename + """ + config_filename = config_filename or self.trainer_config_file + subfolder = subfolder or self.trainer_subfolder + dataset_config_file = dataset_config_file or self.dataset_config_file + + self.config.save(path, filename=config_filename, subfolder=subfolder) + self.model.save(path, filename=model_filename, config_filename=model_config_filename) + self.train_dataset.config.save(path, filename=dataset_config_file, subfolder=subfolder)
+ + +
+[docs] + def push_to_hub( + self, + repo_id: str, + config_filename: str = None, + push_model: bool = True, + push_logs: bool = True, + model_filename: str = None, + model_config_filename: str = None, + subfolder: str = None, + dataset_config_filename: str = None, + commit_message: str = None, + private: bool = False, + ): + """ + Push everything to the Hub + + Args: + repo_id: Path to hub + config_filename: Trainer config file name + push_model: Whether to push the model or not + push_logs: Whether to push training logs or not + model_filename: Model file name + model_config_filename: Model config file name + subfolder: Path to Trainer files + dataset_config_filename: Dataset config file name + commit_message: Commit message for the push + private: Whether to create a private repo if it doesn't exist already + """ + config_filename = config_filename or self.trainer_config_file + subfolder = subfolder or self.trainer_subfolder + dataset_config_file = dataset_config_filename or self.dataset_config_file + + # create remote repo + create_repo(repo_id, repo_type="model", exist_ok=True, private=private) + + if not commit_message: + commit_message = "Hezar: Upload training files" + + # upload train files + self.config.push_to_hub( + repo_id, + filename=config_filename, + subfolder=subfolder, + private=private, + commit_message=commit_message, + ) + self.train_dataset.config.push_to_hub( + repo_id, + filename=dataset_config_file, + subfolder=subfolder, + private=private, + commit_message=commit_message + ) + + # upload model + if push_model: + self.model.push_to_hub( + repo_id, + filename=model_filename, + config_filename=model_config_filename, + commit_message=commit_message, + private=private, + ) + + if push_logs: + upload_file( + path_or_fileobj=self.csv_logger.save_path, + path_in_repo=os.path.join(self.trainer_subfolder, self.trainer_csv_log_file), + repo_id=repo_id, + commit_message=commit_message, + )
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/trainer/trainer_utils.html b/_modules/hezar/trainer/trainer_utils.html new file mode 100644 index 00000000..a2bc2923 --- /dev/null +++ b/_modules/hezar/trainer/trainer_utils.html @@ -0,0 +1,702 @@ + + + + + + + + hezar.trainer.trainer_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.trainer.trainer_utils

+import os
+from dataclasses import asdict, dataclass
+
+import numpy as np
+import pandas as pd
+from omegaconf import OmegaConf
+from torch.utils.tensorboard import SummaryWriter
+
+
+__all__ = [
+    "TrainerState",
+    "AverageMeter",
+    "MetricsTracker",
+    "CSVLogger",
+    "write_to_tensorboard",
+    "resolve_logdir",
+]
+
+
+
+[docs] +@dataclass +class TrainerState: + """ + A Trainer state is a container for holding specific updating values in the training process and is saved when + checkpointing. + + Args: + epoch: Current epoch number + total_epochs: Total epochs to train the model + global_step: Number of the update steps so far, one step is a full training step (one batch) + metric_for_best_checkpoint: The metric key for choosing the best checkpoint (Also given in the TrainerConfig) + best_metric_value: The value of the best checkpoint saved so far + best_checkpoint: Path to the best model checkpoint so far + """ + epoch: int = 1 + total_epochs: int = None + global_step: int = 0 + metric_for_best_checkpoint: str = None + best_metric_value: float = None + best_checkpoint: str = None + +
+[docs] + def update(self, items: dict, **kwargs): + items.update(kwargs) + for k, v in items.items(): + if hasattr(self, k): + setattr(self, k, v)
+ + +
+[docs] + def update_best_results(self, metric_value, objective, step): + if objective == "maximize": + operator = np.greater + elif objective == "minimize": + operator = np.less + else: + raise ValueError(f"`objective` must be either `maximize` or `minimize`, got `{objective}`!") + + if self.best_metric_value is None: + self.best_metric_value = metric_value + self.best_checkpoint = step + + elif operator(metric_value, self.best_metric_value): + self.best_metric_value = metric_value + self.best_checkpoint = step
+ + +
+[docs] + def save(self, path, drop_none: bool = False): + """ + Save the state to a .yaml file at `path` + """ + state = asdict(self) + if drop_none: + state = {k: v for k, v in state.items() if v is not None} + os.makedirs(os.path.dirname(path), exist_ok=True) + OmegaConf.save(state, path)
+ + +
+[docs] + @classmethod + def load(cls, path): + """ + Load a trainer state from `path` + """ + state_file = OmegaConf.load(path) + state_dict = OmegaConf.to_container(state_file) + state = cls(**state_dict) + return state
+
+ + + +
+[docs] +class AverageMeter: + """Compute and store the average and current value""" + + def __init__(self, name, fmt=":f"): + self.name = name + self.fmt = fmt + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + self.reset() + +
+[docs] + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0
+ + +
+[docs] + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count
+ + + def __str__(self): + fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" + return fmtstr.format(**self.__dict__)
+ + + +
+[docs] +class MetricsTracker: + def __init__(self, metrics): + self.metrics = metrics or [] + self.trackers = {} + if len(self.metrics): + for m in self.metrics.values(): + for metric_key in m.config.output_keys: + self.trackers[metric_key] = AverageMeter(metric_key) + if "loss" not in self.trackers: + self.trackers["loss"] = AverageMeter("loss") + +
+[docs] + def update(self, results): + for metric_name, tracker in self.trackers.items(): + tracker.update(results[metric_name])
+ + +
+[docs] + def reset(self): + for tracker in self.trackers.values(): + tracker.reset()
+ + +
+[docs] + def avg(self): + avg_results = {} + for metric_name, tracker in self.trackers.items(): + avg_results[metric_name] = tracker.avg + + return avg_results
+
+ + + +
+[docs] +class CSVLogger: + def __init__(self, logs_dir: str, csv_filename: str): + self.save_path = os.path.join(logs_dir, csv_filename) + self.df = pd.DataFrame({}) + +
+[docs] + def write(self, logs: dict, step: int): + all_logs = {"step": step} + all_logs.update({k: [v] for k, v in logs.items()}) + row = pd.DataFrame(all_logs) + self.df = pd.concat([self.df, row]) + self.df.to_csv(self.save_path, index=False)
+
+ + + +
+[docs] +def write_to_tensorboard(writer: SummaryWriter, logs: dict, step: int): + for metric_name, value in logs.items(): + writer.add_scalar(metric_name, value, step)
+ + + +
+[docs] +def resolve_logdir(log_dir) -> str: + import socket + from datetime import datetime + + current_time = datetime.now().strftime("%b%d_%H-%M-%S") + return os.path.join(log_dir, current_time + "_" + socket.gethostname())
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/audio_utils.html b/_modules/hezar/utils/audio_utils.html new file mode 100644 index 00000000..aedcd5e2 --- /dev/null +++ b/_modules/hezar/utils/audio_utils.html @@ -0,0 +1,1066 @@ + + + + + + + + hezar.utils.audio_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.audio_utils

+"""
+Common audio utils taken from `transformers.audio_utils`
+"""
+from __future__ import annotations
+
+from typing import List, Optional
+
+import numpy as np
+
+from ..constants import Backends
+from .integration_utils import is_backend_available
+from .logging import Logger
+
+
+__all__ = [
+    "load_audio_files",
+    "spectrogram",
+    "amplitude_to_db",
+    "power_to_db",
+    "window_function",
+    "mel_filter_bank",
+    "hertz_to_mel",
+    "mel_to_hertz",
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +def load_audio_files(paths: str | List[str], sampling_rate: int = 16000): + if is_backend_available(Backends.LIBROSA): + import librosa + if isinstance(paths, str): + paths = [paths] + inputs = [librosa.load(x, sr=sampling_rate)[0] for x in paths] + else: + raise ImportError("`librosa` must be installed to load audio files!") + return inputs
+ + + +
+[docs] +def spectrogram( + waveform: np.ndarray, + window: np.ndarray, + frame_length: int, + hop_length: int, + fft_length: Optional[int] = None, + power: Optional[float] = 1.0, + center: bool = True, + pad_mode: str = "reflect", + onesided: bool = True, + preemphasis: Optional[float] = None, + mel_filters: Optional[np.ndarray] = None, + mel_floor: float = 1e-10, + log_mel: Optional[str] = None, + reference: float = 1.0, + min_value: float = 1e-10, + db_range: Optional[float] = None, + dtype: np.dtype = np.float32, +) -> np.ndarray: + """ + Calculates a spectrogram over one waveform using the Short-Time Fourier Transform. + + This function can create the following kinds of spectrograms: + + - amplitude spectrogram (`power = 1.0`) + - power spectrogram (`power = 2.0`) + - complex-valued spectrogram (`power = None`) + - log spectrogram (use `log_mel` argument) + - mel spectrogram (provide `mel_filters`) + - log-mel spectrogram (provide `mel_filters` and `log_mel`) + + How this works: + + 1. The input waveform is split into frames of size `frame_length` that are partially overlapping by `frame_length + - hop_length` samples. + 2. Each frame is multiplied by the window and placed into a buffer of size `fft_length`. + 3. The DFT is taken of each windowed frame. + 4. The results are stacked into a spectrogram. + + We make a distinction between the following "blocks" of sample data, each of which may have a different lengths: + + - The analysis frame. This is the size of the time slices that the input waveform is split into. + - The window. Each analysis frame is multiplied by the window to avoid spectral leakage. + - The FFT input buffer. The length of this determines how many frequency bins are in the spectrogram. + + In this implementation, the window is assumed to be zero-padded to have the same size as the analysis frame. A + padded window can be obtained from `window_function()`. The FFT input buffer may be larger than the analysis frame, + typically the next power of two. + + Note: This function is not optimized for speed yet. It should be mostly compatible with `librosa.stft` and + `torchaudio.functional.transforms.Spectrogram`, although it is more flexible due to the different ways spectrograms + can be constructed. + + Args: + waveform (`np.ndarray` of shape `(length,)`): + The input waveform. This must be a single real-valued, mono waveform. + window (`np.ndarray` of shape `(frame_length,)`): + The windowing function to apply, including zero-padding if necessary. The actual window length may be + shorter than `frame_length`, but we're assuming the array has already been zero-padded. + frame_length (`int`): + The length of the analysis frames in samples. With librosa this is always equal to `fft_length` but we also + allow smaller sizes. + hop_length (`int`): + The stride between successive analysis frames in samples. + fft_length (`int`, *optional*): + The size of the FFT buffer in samples. This determines how many frequency bins the spectrogram will have. + For optimal speed, this should be a power of two. If `None`, uses `frame_length`. + power (`float`, *optional*, defaults to 1.0): + If 1.0, returns the amplitude spectrogram. If 2.0, returns the power spectrogram. If `None`, returns + complex numbers. + center (`bool`, *optional*, defaults to `True`): + Whether to pad the waveform so that frame `t` is centered around time `t * hop_length`. If `False`, frame + `t` will start at time `t * hop_length`. + pad_mode (`str`, *optional*, defaults to `"reflect"`): + Padding mode used when `center` is `True`. Possible values are: `"constant"` (pad with zeros), `"edge"` + (pad with edge values), `"reflect"` (pads with mirrored values). + onesided (`bool`, *optional*, defaults to `True`): + If True, only computes the positive frequencies and returns a spectrogram containing `fft_length // 2 + 1` + frequency bins. If False, also computes the negative frequencies and returns `fft_length` frequency bins. + preemphasis (`float`, *optional*) + Coefficient for a low-pass filter that applies pre-emphasis before the DFT. + mel_filters (`np.ndarray` of shape `(num_freq_bins, num_mel_filters)`, *optional*): + The mel filter bank. If supplied, applies a this filter bank to create a mel spectrogram. + mel_floor (`float`, *optional*, defaults to 1e-10): + Minimum value of mel frequency banks. + log_mel (`str`, *optional*): + How to convert the spectrogram to log scale. Possible options are: `None` (don't convert), `"log"` (take + the natural logarithm) `"log10"` (take the base-10 logarithm), `"dB"` (convert to decibels). Can only be + used when `power` is not `None`. + reference (`float`, *optional*, defaults to 1.0): + Sets the input spectrogram value that corresponds to 0 dB. For example, use `np.max(spectrogram)` to set + the loudest part to 0 dB. Must be greater than zero. + min_value (`float`, *optional*, defaults to `1e-10`): + The spectrogram will be clipped to this minimum value before conversion to decibels, to avoid taking + `log(0)`. For a power spectrogram, the default of `1e-10` corresponds to a minimum of -100 dB. For an + amplitude spectrogram, the value `1e-5` corresponds to -100 dB. Must be greater than zero. + db_range (`float`, *optional*): + Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the difference between the + peak value and the smallest value will never be more than 80 dB. Must be greater than zero. + dtype (`np.dtype`, *optional*, defaults to `np.float32`): + Data type of the spectrogram tensor. If `power` is None, this argument is ignored and the dtype will be + `np.complex64`. + + Returns: + `nd.array` containing a spectrogram of shape `(num_frequency_bins, length)` for a regular spectrogram or shape + `(num_mel_filters, length)` for a mel spectrogram. + """ + window_length = len(window) + + if fft_length is None: + fft_length = frame_length + + if frame_length > fft_length: + raise ValueError(f"frame_length ({frame_length}) may not be larger than fft_length ({fft_length})") + + if window_length != frame_length: + raise ValueError(f"Length of the window ({window_length}) must equal frame_length ({frame_length})") + + if hop_length <= 0: + raise ValueError("hop_length must be greater than zero") + + if waveform.ndim != 1: + raise ValueError(f"Input waveform must have only one dimension, shape is {waveform.shape}") + + if np.iscomplexobj(waveform): + raise ValueError("Complex-valued input waveforms are not currently supported") + + # center pad the waveform + if center: + padding = [(int(frame_length // 2), int(frame_length // 2))] + waveform = np.pad(waveform, padding, mode=pad_mode) + + # promote to float64, since np.fft uses float64 internally + waveform = waveform.astype(np.float64) + window = window.astype(np.float64) + + # split waveform into frames of frame_length size + num_frames = int(1 + np.floor((waveform.size - frame_length) / hop_length)) + + num_frequency_bins = (fft_length // 2) + 1 if onesided else fft_length + spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64) + + # rfft is faster than fft + fft_func = np.fft.rfft if onesided else np.fft.fft + buffer = np.zeros(fft_length) + + timestep = 0 + for frame_idx in range(num_frames): + buffer[:frame_length] = waveform[timestep:timestep + frame_length] + + if preemphasis is not None: + buffer[1:frame_length] -= preemphasis * buffer[: frame_length - 1] + buffer[0] *= 1 - preemphasis + + buffer[:frame_length] *= window + + spectrogram[frame_idx] = fft_func(buffer) + timestep += hop_length + + # note: ** is much faster than np.power + if power is not None: + spectrogram = np.abs(spectrogram, dtype=np.float64) ** power + + spectrogram = spectrogram.T + + if mel_filters is not None: + spectrogram = np.maximum(mel_floor, np.dot(mel_filters.T, spectrogram)) + + if power is not None and log_mel is not None: + if log_mel == "log": + spectrogram = np.log(spectrogram) + elif log_mel == "log10": + spectrogram = np.log10(spectrogram) + elif log_mel == "dB": + if power == 1.0: + spectrogram = amplitude_to_db(spectrogram, reference, min_value, db_range) + elif power == 2.0: + spectrogram = power_to_db(spectrogram, reference, min_value, db_range) + else: + raise ValueError(f"Cannot use log_mel option '{log_mel}' with power {power}") + else: + raise ValueError(f"Unknown log_mel option: {log_mel}") + + spectrogram = np.asarray(spectrogram, dtype) + + return spectrogram
+ + + +
+[docs] +def amplitude_to_db( + spectrogram: np.ndarray, + reference: float = 1.0, + min_value: float = 1e-5, + db_range: Optional[float] = None, +) -> np.ndarray: + """ + Converts an amplitude spectrogram to the decibel scale. This computes `20 * log10(spectrogram / reference)`, using + basic logarithm properties for numerical stability. + + The motivation behind applying the log function on the (mel) spectrogram is that humans do not hear loudness on a + linear scale. Generally to double the perceived volume of a sound we need to put 8 times as much energy into it. + This means that large variations in energy may not sound all that different if the sound is loud to begin with. + This compression operation makes the (mel) spectrogram features match more closely what humans actually hear. + + Args: + spectrogram (`np.ndarray`): + The input amplitude (mel) spectrogram. + reference (`float`, *optional*, defaults to 1.0): + Sets the input spectrogram value that corresponds to 0 dB. For example, use `np.max(spectrogram)` to set + the loudest part to 0 dB. Must be greater than zero. + min_value (`float`, *optional*, defaults to `1e-5`): + The spectrogram will be clipped to this minimum value before conversion to decibels, to avoid taking + `log(0)`. The default of `1e-5` corresponds to a minimum of -100 dB. Must be greater than zero. + db_range (`float`, *optional*): + Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the difference between the + peak value and the smallest value will never be more than 80 dB. Must be greater than zero. + + Returns: + `np.ndarray`: the spectrogram in decibels + """ + if reference <= 0.0: + raise ValueError("reference must be greater than zero") + if min_value <= 0.0: + raise ValueError("min_value must be greater than zero") + + reference = max(min_value, reference) + + spectrogram = np.clip(spectrogram, a_min=min_value, a_max=None) + spectrogram = 20.0 * (np.log10(spectrogram) - np.log10(reference)) + + if db_range is not None: + if db_range <= 0.0: + raise ValueError("db_range must be greater than zero") + spectrogram = np.clip(spectrogram, a_min=spectrogram.max() - db_range, a_max=None) + + return spectrogram
+ + + +
+[docs] +def power_to_db( + spectrogram: np.ndarray, + reference: float = 1.0, + min_value: float = 1e-10, + db_range: Optional[float] = None, +) -> np.ndarray: + """ + Converts a power spectrogram to the decibel scale. This computes `10 * log10(spectrogram / reference)`, using basic + logarithm properties for numerical stability. + + The motivation behind applying the log function on the (mel) spectrogram is that humans do not hear loudness on a + linear scale. Generally to double the perceived volume of a sound we need to put 8 times as much energy into it. + This means that large variations in energy may not sound all that different if the sound is loud to begin with. + This compression operation makes the (mel) spectrogram features match more closely what humans actually hear. + + Based on the implementation of `librosa.power_to_db`. + + Args: + spectrogram (`np.ndarray`): + The input power (mel) spectrogram. Note that a power spectrogram has the amplitudes squared! + reference (`float`, *optional*, defaults to 1.0): + Sets the input spectrogram value that corresponds to 0 dB. For example, use `np.max(spectrogram)` to set + the loudest part to 0 dB. Must be greater than zero. + min_value (`float`, *optional*, defaults to `1e-10`): + The spectrogram will be clipped to this minimum value before conversion to decibels, to avoid taking + `log(0)`. The default of `1e-10` corresponds to a minimum of -100 dB. Must be greater than zero. + db_range (`float`, *optional*): + Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the difference between the + peak value and the smallest value will never be more than 80 dB. Must be greater than zero. + + Returns: + `np.ndarray`: the spectrogram in decibels + """ + if reference <= 0.0: + raise ValueError("reference must be greater than zero") + if min_value <= 0.0: + raise ValueError("min_value must be greater than zero") + + reference = max(min_value, reference) + + spectrogram = np.clip(spectrogram, a_min=min_value, a_max=None) + spectrogram = 10.0 * (np.log10(spectrogram) - np.log10(reference)) + + if db_range is not None: + if db_range <= 0.0: + raise ValueError("db_range must be greater than zero") + spectrogram = np.clip(spectrogram, a_min=spectrogram.max() - db_range, a_max=None) + + return spectrogram
+ + + +
+[docs] +def window_function( + window_length: int, + name: str = "hann", + periodic: bool = True, + frame_length: Optional[int] = None, + center: bool = True, +) -> np.ndarray: + """ + Returns an array containing the specified window. This window is intended to be used with `stft`. + + The following window types are supported: + + - `"boxcar"`: a rectangular window + - `"hamming"`: the Hamming window + - `"hann"`: the Hann window + + Args: + window_length (`int`): + The length of the window in samples. + name (`str`, *optional*, defaults to `"hann"`): + The name of the window function. + periodic (`bool`, *optional*, defaults to `True`): + Whether the window is periodic or symmetric. + frame_length (`int`, *optional*): + The length of the analysis frames in samples. Provide a value for `frame_length` if the window is smaller + than the frame length, so that it will be zero-padded. + center (`bool`, *optional*, defaults to `True`): + Whether to center the window inside the FFT buffer. Only used when `frame_length` is provided. + + Returns: + `np.ndarray` of shape `(window_length,)` or `(frame_length,)` containing the window. + """ + length = window_length + 1 if periodic else window_length + + if name == "boxcar": + window = np.ones(length) + elif name in ["hamming", "hamming_window"]: + window = np.hamming(length) + elif name in ["hann", "hann_window"]: + window = np.hanning(length) + else: + raise ValueError(f"Unknown window function '{name}'") + + if periodic: + window = window[:-1] + + if frame_length is None: + return window + + if window_length > frame_length: + raise ValueError(f"Length of the window ({window_length}) may not be larger than frame_length ({frame_length})") + + padded_window = np.zeros(frame_length) + offset = (frame_length - window_length) // 2 if center else 0 + padded_window[offset:offset + window_length] = window + return padded_window
+ + + +
+[docs] +def mel_filter_bank( + num_frequency_bins: int, + num_mel_filters: int, + min_frequency: float, + max_frequency: float, + sampling_rate: int, + norm: Optional[str] = None, + mel_scale: str = "htk", +) -> np.ndarray: + """ + Creates a frequency bin conversion matrix used to obtain a mel spectrogram. This is called a *mel filter bank*, and + various implementation exist, which differ in the number of filters, the shape of the filters, the way the filters + are spaced, the bandwidth of the filters, and the manner in which the spectrum is warped. The goal of these + features is to approximate the non-linear human perception of the variation in pitch with respect to the frequency. + + Different banks of mel filters were introduced in the literature. The following variations are supported: + + - MFCC FB-20: introduced in 1980 by Davis and Mermelstein, it assumes a sampling frequency of 10 kHz and a speech + bandwidth of `[0, 4600]` Hz. + - MFCC FB-24 HTK: from the Cambridge HMM Toolkit (HTK) (1995) uses a filter bank of 24 filters for a speech + bandwidth of `[0, 8000]` Hz. This assumes sampling rate ≥ 16 kHz. + - MFCC FB-40: from the Auditory Toolbox for MATLAB written by Slaney in 1998, assumes a sampling rate of 16 kHz and + speech bandwidth of `[133, 6854]` Hz. This version also includes area normalization. + - HFCC-E FB-29 (Human Factor Cepstral Coefficients) of Skowronski and Harris (2004), assumes a sampling rate of + 12.5 kHz and speech bandwidth of `[0, 6250]` Hz. + + This code is adapted from *torchaudio* and *librosa*. Note that the default parameters of torchaudio's + `melscale_fbanks` implement the `"htk"` filters while librosa uses the `"slaney"` implementation. + + Args: + num_frequency_bins (`int`): + Number of frequencies used to compute the spectrogram (should be the same as in `stft`). + num_mel_filters (`int`): + Number of mel filters to generate. + min_frequency (`float`): + Lowest frequency of interest in Hz. + max_frequency (`float`): + Highest frequency of interest in Hz. This should not exceed `sampling_rate / 2`. + sampling_rate (`int`): + Sample rate of the audio waveform. + norm (`str`, *optional*): + If `"slaney"`, divide the triangular mel weights by the width of the mel band (area normalization). + mel_scale (`str`, *optional*, defaults to `"htk"`): + The mel frequency scale to use, `"htk"` or `"slaney"`. + + Returns: + `np.ndarray` of shape (`num_frequency_bins`, `num_mel_filters`): Triangular filter bank matrix. This is a + projection matrix to go from a spectrogram to a mel spectrogram. + """ + if norm is not None and norm != "slaney": + raise ValueError('norm must be one of None or "slaney"') + + # frequencies of FFT bins in Hz + fft_freqs = np.linspace(0, sampling_rate // 2, num_frequency_bins) + + # center points of the triangular mel filters + mel_min = hertz_to_mel(min_frequency, mel_scale=mel_scale) + mel_max = hertz_to_mel(max_frequency, mel_scale=mel_scale) + mel_freqs = np.linspace(mel_min, mel_max, num_mel_filters + 2) + filter_freqs = mel_to_hertz(mel_freqs, mel_scale=mel_scale) + + mel_filters = _create_triangular_filter_bank(fft_freqs, filter_freqs) + + if norm is not None and norm == "slaney": + # Slaney-style mel is scaled to be approx constant energy per channel + enorm = 2.0 / (filter_freqs[2:num_mel_filters + 2] - filter_freqs[:num_mel_filters]) + mel_filters *= np.expand_dims(enorm, 0) + + if (mel_filters.max(axis=0) == 0.0).any(): + logger.warn( + "At least one mel filter has all zero values. " + f"The value for `num_mel_filters` ({num_mel_filters}) may be set too high. " + f"Or, the value for `num_frequency_bins` ({num_frequency_bins}) may be set too low." + ) + + return mel_filters
+ + + +
+[docs] +def hertz_to_mel(freq: float | np.ndarray, mel_scale: str = "htk") -> float | np.ndarray: + """ + Convert frequency from hertz to mels. + + Args: + freq (`float` or `np.ndarray`): + The frequency, or multiple frequencies, in hertz (Hz). + mel_scale (`str`, *optional*, defaults to `"htk"`): + The mel frequency scale to use, `"htk"` or `"slaney"`. + + Returns: + `float` or `np.ndarray`: The frequencies on the mel scale. + """ + + if mel_scale not in ["slaney", "htk"]: + raise ValueError('mel_scale should be one of "htk" or "slaney".') + + if mel_scale == "htk": + return 2595.0 * np.log10(1.0 + (freq / 700.0)) + + min_log_hertz = 1000.0 + min_log_mel = 15.0 + logstep = 27.0 / np.log(6.4) + mels = 3.0 * freq / 200.0 + + if isinstance(freq, np.ndarray): + log_region = freq >= min_log_hertz + mels[log_region] = min_log_mel + np.log(freq[log_region] / min_log_hertz) * logstep + elif freq >= min_log_hertz: + mels = min_log_mel + np.log(freq / min_log_hertz) * logstep + + return mels
+ + + +
+[docs] +def mel_to_hertz(mels: float | np.ndarray, mel_scale: str = "htk") -> float | np.ndarray: + """ + Convert frequency from mels to hertz. + + Args: + mels (`float` or `np.ndarray`): + The frequency, or multiple frequencies, in mels. + mel_scale (`str`, *optional*, `"htk"`): + The mel frequency scale to use, `"htk"` or `"slaney"`. + + Returns: + `float` or `np.ndarray`: The frequencies in hertz. + """ + + if mel_scale not in ["slaney", "htk"]: + raise ValueError('mel_scale should be one of "htk" or "slaney".') + + if mel_scale == "htk": + return 700.0 * (10.0 ** (mels / 2595.0) - 1.0) + + min_log_hertz = 1000.0 + min_log_mel = 15.0 + logstep = np.log(6.4) / 27.0 + freq = 200.0 * mels / 3.0 + + if isinstance(mels, np.ndarray): + log_region = mels >= min_log_mel + freq[log_region] = min_log_hertz * np.exp(logstep * (mels[log_region] - min_log_mel)) + elif mels >= min_log_mel: + freq = min_log_hertz * np.exp(logstep * (mels - min_log_mel)) + + return freq
+ + + +def _create_triangular_filter_bank(fft_freqs: np.ndarray, filter_freqs: np.ndarray) -> np.ndarray: + """ + Creates a triangular filter bank. + + Adapted from *torchaudio* and *librosa*. + + Args: + fft_freqs (`np.ndarray` of shape `(num_frequency_bins,)`): + Discrete frequencies of the FFT bins in Hz. + filter_freqs (`np.ndarray` of shape `(num_mel_filters,)`): + Center frequencies of the triangular filters to create, in Hz. + + Returns: + `np.ndarray` of shape `(num_frequency_bins, num_mel_filters)` + """ + filter_diff = np.diff(filter_freqs) + slopes = np.expand_dims(filter_freqs, 0) - np.expand_dims(fft_freqs, 1) + down_slopes = -slopes[:, :-2] / filter_diff[:-1] + up_slopes = slopes[:, 2:] / filter_diff[1:] + return np.maximum(np.zeros(1), np.minimum(down_slopes, up_slopes)) +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/common_utils.html b/_modules/hezar/utils/common_utils.html new file mode 100644 index 00000000..4fe6f1d2 --- /dev/null +++ b/_modules/hezar/utils/common_utils.html @@ -0,0 +1,652 @@ + + + + + + + + hezar.utils.common_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.common_utils

+from __future__ import annotations
+
+import inspect
+import re
+from time import perf_counter
+from typing import Callable, Dict, List, Mapping
+
+from ..constants import Color
+
+
+__all__ = [
+    "exec_timer",
+    "snake_case",
+    "reverse_string_digits",
+    "is_text_valid",
+    "is_url",
+    "colorize_text",
+    "permute_dict_list",
+    "sanitize_function_parameters",
+]
+
+
+
+[docs] +class exec_timer: + """ + A context manager that captures the execution time of all the operations inside it + + Examples: + >>> with exec_timer() as timer: + >>> # operations here + >>> print(timer.time) + """ + + def __enter__(self): + self.time = perf_counter() + return self + + def __exit__(self, type, value, traceback): + self.time = perf_counter() - self.time
+ + + +
+[docs] +def snake_case(s): + return "_".join(re.sub("([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", s.replace("-", " "))).split()).lower()
+ + + +
+[docs] +def reverse_string_digits(text): + """ + Reverse all digit segments in a given text + """ + # Capture one or more digits followed by any number of non-digits followed by another digit + pattern = r"(\d+(?:\D\d+)*)" + + def reverse_match(match): + return match.group(1)[::-1] # Reverse the matched digits and special characters + + return re.sub(pattern, reverse_match, text)
+ + + +
+[docs] +def is_text_valid(text, valid_characters): + """ + Given a list of valid characters, check if only those are included in the text + """ + pattern = re.compile(f'^[{re.escape("".join(valid_characters))}]+$') + return bool(pattern.match(text))
+ + + +
+[docs] +def is_url(text): + url_pattern = re.compile( + r'^(https?|ftp)://' # Protocol (http, https, ftp) + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # Domain + r'localhost|' # localhost + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # IPv4 + r'\[?[A-F0-9]*:[A-F0-9:]+]?)' # IPv6 + r'(?::\d+)?' # Port + r'(?:/?\S*)?$', re.IGNORECASE + ) + return bool(re.match(url_pattern, text))
+ + + +
+[docs] +def colorize_text(text: str, color: str | Color): + """ + Add colorization codes to the text. The output is the text with surrounding color codes and the colors are applied + on the console/terminal output like when using `print()` + """ + color_mapping = { + "header": Color.HEADER, + "normal": Color.NORMAL, + "bold": Color.BOLD, + "underline": Color.UNDERLINE, + "italic": Color.ITALIC, + "blue": Color.BLUE, + "cyan": Color.CYAN, + "green": Color.GREEN, + "yellow": Color.YELLOW, + "red": Color.RED, + "grey": Color.GREY, + } + if isinstance(color, str) and not hasattr(color, "value"): + color = color_mapping.get(color.lower(), Color.NORMAL) + return color + text + Color.NORMAL
+ + + +
+[docs] +def permute_dict_list(dict_list: List[Dict]) -> Dict[str, List]: + """ + Convert a list of dictionaries to a dictionary of lists + + Args: + dict_list: Input list of dicts + + Returns: + + """ + if not len(dict_list): + return {} + d = {key: [x[key] for x in dict_list] for key in dict_list[0]} + return d
+ + + +
+[docs] +def sanitize_function_parameters(func: Callable, params: Dict | Mapping, **kwargs): + """ + Given a dict of parameters or kwargs, you can figure out which ones must be passed to the `func` based on its + signature. + + Args: + func: The function object + params: A dict of parameters with values + kwargs: Keyword arguments that are merged with `params` + + Returns: + The proper dict of parameters keys and values + """ + params.update(**kwargs) + params_signature = dict(inspect.signature(func).parameters) + fn_parameters = {p for p, v in params_signature.items() if v.kind not in (v.VAR_KEYWORD, v.VAR_POSITIONAL)} + fn_params_names = set(fn_parameters) + input_params = {p: params[p] for p in fn_params_names if p in params} + return input_params
+ + +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/data_utils.html b/_modules/hezar/utils/data_utils.html new file mode 100644 index 00000000..534f509a --- /dev/null +++ b/_modules/hezar/utils/data_utils.html @@ -0,0 +1,724 @@ + + + + + + + + hezar.utils.data_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.data_utils

+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional
+
+from omegaconf import DictConfig
+
+from ..constants import PaddingType
+from .logging import Logger
+
+
+if TYPE_CHECKING:
+    import torch
+
+
+__all__ = [
+    "convert_batch_dict_dtype",
+    "resolve_inputs_length_for_padding",
+    "pad_batch_items",
+    "shift_tokens_right",
+    "get_non_numeric_keys",
+    "flatten_dict",
+]
+
+logger = Logger(__name__)
+
+
+# TODO: This code might be able to be written in a cleaner way, but be careful, any change might break a lot of things!
+
+[docs] +def convert_batch_dict_dtype(batch_dict: Dict[str, Any], dtype: str = None, skip_keys: list = None): + """ + Convert data dtypes of the values in a batch dict + + Args: + batch_dict: The batched dictionary. Each key in the dict has a batch of data as its value. + dtype: Target data type to convert to + skip_keys: A list of key names to skip conversion + + Returns: + The same dict with cast values + """ + import numpy as np + import torch + + dtype = dtype or "list" + skip_keys = skip_keys or [] + + if dtype == "list": + for k, v in batch_dict.items(): + if isinstance(v, np.ndarray): + batch_dict[k] = v.tolist() + elif isinstance(v, torch.Tensor): + batch_dict[k] = v.cpu().numpy().tolist() + return batch_dict + + if dtype in ["np", "numpy"]: + caster = np.asarray + cast_type = np.ndarray + elif dtype in ["pt", "torch", "pytorch"]: + caster = torch.tensor + cast_type = torch.Tensor + else: + raise ValueError(f"Invalid `dtype`: {dtype}") + + for k, v in batch_dict.items(): + if k not in skip_keys: + try: + if not isinstance(v, cast_type): + batch_dict[k] = caster(v) + except Exception as e: # noqa + logger.warning(f"Could not convert values of `{k}` to type `{dtype}`\n" f"Error: {e}") + return batch_dict
+ + + +
+[docs] +def resolve_inputs_length_for_padding( + inputs: List[List[Any]], + padding_type: str | PaddingType = None, + max_length: Optional[bool | int] = None, + truncation: Optional[bool] = True, +): + """ + Resolve final inputs length based on padding_strategy and max_length values + """ + inputs_max_length = max([len(x) for x in inputs]) + padding = padding_type or "longest" + + # Resolve padding and max_length values first + if padding is None: + if max_length is not None: + padding = "max_length" + elif max_length is None: + padding = "longest" + + # Now lets resolve any conflicts + if padding == "longest": + if max_length is not None: + logger.warning( + "Setting padding='longest' and max_length is not valid. You must set one of them" + " and leave the other as None. Falling back to padding='longest'" + ) + + inputs_length = inputs_max_length + + elif padding == "max_length": + if max_length is None: + logger.warning( + "Setting padding='max_length' but no max_length value is provided! Falling back to padding='longest'" + ) + inputs_length = inputs_max_length + else: + if max_length < inputs_max_length and not truncation: + logger.warning( + f"Cannot set max_length to {max_length} " + f"while max input length is {inputs_max_length} and `truncation` is `False`" + f"Either set `truncation=True` or increase `max_length`" + ) + inputs_length = inputs_max_length + else: + inputs_length = max_length + else: + raise ValueError(f"Invalid padding value `{padding}`, expected either `max_length` or `longest`") + + return inputs_length
+ + + +
+[docs] +def pad_batch_items( + inputs: List[List[int | float]], + padding_type: str | PaddingType = None, + padding_side: Literal["right", "left"] = "right", + pad_id: int = 0, + max_length: Optional[bool | int] = None, + truncation: Optional[bool] = True, +): + """ + Given a nested container of unequal sized iterables e.g, batch of token ids, pad them based on padding strategy + Args: + inputs: A nested iterable of unequal sized iterables (e.g, list of lists) + padding_type: Padding strategy, either max_length or longest + padding_side: Where to add padding ids, `left` or `right`, defaults to `right` + pad_id: Pad token id, defaults to `0` + max_length: Max input length after padding, only applicable when padding_strategy == "max_length" + truncation: Whether to truncate if an input in the batch is longer than max_length + + Returns: + A list of equal sized lists + """ + + inputs_length = resolve_inputs_length_for_padding(inputs, padding_type=padding_type, max_length=max_length, + truncation=truncation) + + padded_inputs = [] + for ids in inputs: + difference = inputs_length - len(ids) + if difference > 0: + paddings = [pad_id] * difference + padded_ids = ids + paddings if padding_side == "right" else paddings + ids + padded_inputs.append(padded_ids) + else: + padded_inputs.append(ids[:inputs_length]) + + return padded_inputs
+ + + +
+[docs] +def shift_tokens_right(input_ids: "torch.Tensor", pad_token_id: int, decoder_start_token_id: int): + """ + Shift input ids one token to the right. + """ + shifted_input_ids = input_ids.new_zeros(input_ids.shape) + shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() + shifted_input_ids[:, 0] = decoder_start_token_id + + # replace possible -100 values in labels by `pad_token_id` + shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) + + return shifted_input_ids
+ + + +
+[docs] +def get_non_numeric_keys(d: Dict, batched=True): + """ + Get keys that have string values in a dictionary + + Args: + d: The dict + batched: Are the input dict values batched or not + + Returns: + A list of string-valued keys + """ + keys = [] + for k, v in d.items(): + if len(v) and isinstance(v[0], list): + if batched and not isinstance(v[0][0], (int, float, complex)) and not isinstance(v[0][0], bool): + keys.append(k) + elif isinstance(v[0], str): + keys.append(k) + return keys
+ + + +
+[docs] +def flatten_dict(dict_config: Dict | DictConfig) -> DictConfig: + """ + Flatten a nested Dict/DictConfig object + + Args: + dict_config: A Dict/DictConfig object + + Returns: + The flattened version of the dict-like object + """ + + config = DictConfig({}) + for k, v in dict_config.items(): + if isinstance(v, (Dict, DictConfig)): + config.update(flatten_dict(v)) + else: + config[k] = v + + return config
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/file_utils.html b/_modules/hezar/utils/file_utils.html new file mode 100644 index 00000000..09e4bee4 --- /dev/null +++ b/_modules/hezar/utils/file_utils.html @@ -0,0 +1,552 @@ + + + + + + + + hezar.utils.file_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.file_utils

+from __future__ import annotations
+
+import gzip
+import json
+import os
+import shutil
+
+import omegaconf
+
+from .logging import Logger
+
+
+logger = Logger(__name__)
+
+__all__ = [
+    "gunzip",
+    "load_yaml_config",
+    "load_json_config",
+]
+
+
+
+[docs] +def load_yaml_config(path: str | os.PathLike): + """ + Load yaml file using omegaconf + """ + config = omegaconf.OmegaConf.load(path) + return config
+ + + +
+[docs] +def load_json_config(path: str | os.PathLike): + """ + Load json config file + """ + with open(path) as f: + config = json.load(f) + return config
+ + + +
+[docs] +def gunzip(src_path, dest_path): + """ + Unzip a .gz file from `src_path` and extract to `dest_path` + Args: + src_path: Path to .gz file + dest_path: Path to the destination file + + Returns: + + """ + with gzip.open(src_path, "rb") as f_in: + with open(dest_path, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + logger.debug(f"Extracted {src_path} to {dest_path}")
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/hub_utils.html b/_modules/hezar/utils/hub_utils.html new file mode 100644 index 00000000..176270bc --- /dev/null +++ b/_modules/hezar/utils/hub_utils.html @@ -0,0 +1,672 @@ + + + + + + + + hezar.utils.hub_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.hub_utils

+import os.path
+
+from huggingface_hub import HfApi, Repository
+
+from ..constants import HEZAR_CACHE_DIR, HEZAR_HUB_ID, RepoType
+from ..utils.logging import Logger
+
+
+__all__ = [
+    "resolve_pretrained_path",
+    "get_local_cache_path",
+    "exists_in_cache",
+    "exists_on_hub",
+    "clone_repo",
+    "list_repo_files",
+    "get_state_dict_from_hub",
+]
+
+logger = Logger(__name__)
+
+
+
+[docs] +def resolve_pretrained_path(hub_or_local_path): + """ + **DEPRECATED** + + Resolve a local or Hub path. If path exists locally it just returns the input, otherwise tries to resolve + hub_or_local_path. If it contains the namespace (author/org) leave it as is, otherwise change to hezarai/{hub_path} + + Args: + hub_or_local_path: Repo name or id + + Returns: + A proper pretrained path + """ + logger.warning("`resolve_pretrained_path` is deprecated! Use the raw `hub_or_local_path`!") + if os.path.isdir(hub_or_local_path): + return hub_or_local_path + repo_id = f"{HEZAR_HUB_ID}/{hub_or_local_path}" if "/" not in hub_or_local_path else hub_or_local_path + return repo_id
+ + + +
+[docs] +def get_local_cache_path(repo_id, repo_type): + """ + Given the hub path and repo type, configure the local path to save everything e.g, ~/.hezar/models/<repo_name> + + Args: + repo_id: Repo name or id + repo_type: Repo type e.g, model, dataset, etc + + Returns: + Path to local cache directory + """ + repo_owner, repo_name = repo_id.split("/") + cache_path = f"{HEZAR_CACHE_DIR}/{repo_type}s--{repo_owner}--{repo_name}" + return cache_path
+ + + +
+[docs] +def exists_in_cache(hub_path, repo_type="model"): + cache_path = get_local_cache_path(hub_path, repo_type) + return os.path.exists(cache_path)
+ + + +
+[docs] +def exists_on_hub(hub_path: str, repo_type="model"): + """ + Determine whether the repo exists on the hub or not + + Args: + hub_path: Repo name or id + repo_type: Repo type like model, dataset, etc. + + Returns: + True or False + """ + author, repo_name = hub_path.split("/") + api = HfApi() + if repo_type == "model": + paths = list(iter(api.list_models(author=author))) + elif repo_type == "dataset": + paths = list(iter(api.list_datasets(author=author))) + elif repo_type == "space": + paths = list(iter(api.list_spaces(author=author))) + else: + raise ValueError(f"Unknown type: {repo_type}! Use `model`, `dataset`, `space`, etc.") + + return hub_path in [path.id for path in paths]
+ + + +
+[docs] +def clone_repo(repo_id: str, save_path: str, **kwargs): + """ + Clone a repo on the hub to local directory + + Args: + repo_id: Repo name or id + save_path: Path to clone the repo to + + Returns: + the local path to the repo + """ + repo = Repository(local_dir=save_path, clone_from=repo_id, **kwargs) + return repo.local_dir
+ + + +
+[docs] +def list_repo_files(hub_or_local_path: str, subfolder: str = None): + """ + List all files in a Hub or local model repo + + Args: + hub_or_local_path: Path to hub or local repo + subfolder: Optional subfolder path + + Returns: + A list of all file names + """ + if os.path.isdir(hub_or_local_path): + files_itr = os.walk(hub_or_local_path) + files = [] + for r, d, f in files_itr: + if r == hub_or_local_path: + files.append(f) + else: + for x in f: + files.append(f"{r.replace(f'{hub_or_local_path}/', '')}/{x}") + else: + files = HfApi().list_repo_files(hub_or_local_path, repo_type=str(RepoType.MODEL)) + + if subfolder is not None: + files = [x.replace(f"{subfolder}/", "") for x in files if subfolder in x] + + return files
+ + + +
+[docs] +def get_state_dict_from_hub(hub_id, filename, subfolder=None): + """ + Load a state dict from a repo on the HF Hub. Works on any repo no matter the library. + + Args: + hub_id: Path to repo id + filename: Weights file name + subfolder: Optional subfolder in the repo + + Returns: + A PyTorch state dict obj + """ + import torch + + api = HfApi() + + subfolder = subfolder or "" + + # Download or load the cached file + weights_file = api.hf_hub_download( + repo_id=hub_id, + filename=filename, + subfolder=subfolder, + cache_dir=HEZAR_CACHE_DIR, + ) + + state_dict = torch.load(weights_file) + + return state_dict
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/image_utils.html b/_modules/hezar/utils/image_utils.html new file mode 100644 index 00000000..ccc72228 --- /dev/null +++ b/_modules/hezar/utils/image_utils.html @@ -0,0 +1,757 @@ + + + + + + + + hezar.utils.image_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.image_utils

+from __future__ import annotations
+
+from io import BytesIO
+from typing import Iterable, Tuple
+
+import numpy as np
+import requests
+import torch
+
+from ..constants import Backends, ChannelsAxisSide, ImageType
+from .common_utils import is_url
+from .integration_utils import is_backend_available
+from .logging import Logger
+
+
+logger = Logger(__name__)
+
+if is_backend_available(Backends.PILLOW):
+    from PIL import Image
+
+__all__ = [
+    "convert_image_type",
+    "normalize_image",
+    "load_image",
+    "show_image",
+    "rescale_image",
+    "resize_image",
+    "mirror_image",
+    "gray_scale_image",
+    "find_channels_axis_side",
+    "transpose_channels_axis_side",
+]
+
+
+def verify_image_dims(image: np.ndarray):
+    if len(image.shape) not in (2, 3):
+        raise ValueError(f"Image input must be a numpy array of size 2 or 3! Got {image.shape}")
+
+
+
+[docs] +def convert_image_type( + image: np.ndarray | "Image" | torch.Tensor, + target_type: str | ImageType = ImageType.NUMPY, +): + """ + Convert image lib type. Supports numpy array, pillow image and torch tensor. + """ + if isinstance(image, Image.Image): + if image.mode == "L": + image = np.asarray(image) + image = np.expand_dims(image, 0) + else: + image = np.asarray(image) + elif isinstance(image, torch.Tensor): + image = image.cpu().numpy() + + verify_image_dims(image) + + if target_type == ImageType.PILLOW: + # transpose channels to the last axis since pillow cannot handle it otherwise + if find_channels_axis_side(image) == ChannelsAxisSide.FIRST: + image = transpose_channels_axis_side( + image, + axis_side=ChannelsAxisSide.LAST, + src_axis_side=ChannelsAxisSide.FIRST, + ) + num_channels = image.shape[0] if find_channels_axis_side(image) == ChannelsAxisSide.FIRST else image.shape[-1] + if num_channels == 1: + image = image[:, :, -1] + image = Image.fromarray(image, "L") + else: + image = Image.fromarray(image) + elif target_type == ImageType.TORCH: + image = torch.tensor(image) + + return image
+ + + +
+[docs] +def load_image(path, return_type: str | ImageType = ImageType.PILLOW): + """ + Load an image file to a desired return format + + Args: + path: Path to image file + return_type: Image output type ("pillow", "numpy", "torch") + + Returns: + The desired output image of type `PIL.Image` or `numpy.ndarray` or `torch.Tensor` + """ + if is_url(path): + pil_image = Image.open(BytesIO(requests.get(path).content)).convert("RGB") + else: + pil_image = Image.open(path).convert("RGB") + converted_image = convert_image_type(pil_image, return_type) + return converted_image
+ + + +
+[docs] +def show_image(image: "Image" | torch.Tensor | np.ndarray, title: str = "Image"): + """ + Given any type of input image (PIL, numpy, torch), show the image in a window + + Args: + image: Input image of types PIL.Image, numpy.ndarray or torch.Tensor + title: Optional title for the preview window + """ + pil_image = convert_image_type(image, ImageType.PILLOW) + pil_image.show(title=title)
+ + + +
+[docs] +def rescale_image(image: np.ndarray, scale: float): + verify_image_dims(image) + image = image * scale + return image
+ + + +
+[docs] +def resize_image( + image: np.ndarray, + size: Tuple[int, int], + resample=None, + reducing_gap: float = None, + return_type: ImageType = ImageType.NUMPY, +): + """ + Resize a numpy array image (actually uses pillow PIL.Image.resize(...)) + + Args: + image: Numpy image + size: A tuple of (width, height) + resample: Resampling filter (refer to PIL.Image.Resampling) for possible values + reducing_gap: Optimization method for resizing based on reducing times + return_type: Return type of the image (numpy, torch, pillow) + + Returns: + The resized image + """ + verify_image_dims(image) + if len(size) != 2: + raise ValueError(f"The value of `size` must be a 2-sized tuple! Got length {len(size)}(`{size}`)") + pil_image = convert_image_type(image, ImageType.PILLOW) + pil_image = pil_image.resize(size, resample=resample, reducing_gap=reducing_gap) + np_image = convert_image_type(pil_image, return_type) + return np_image
+ + + +
+[docs] +def mirror_image(image: np.ndarray, return_type: str | ImageType = ImageType.NUMPY): + if not isinstance(image, np.ndarray): + raise ValueError("image must be a numpy array") + + verify_image_dims(image) + + pil_image = convert_image_type(image, ImageType.PILLOW) + pil_image = pil_image.transpose(Image.FLIP_LEFT_RIGHT) + final_image = convert_image_type(pil_image, return_type) + return final_image
+ + + +
+[docs] +def gray_scale_image(image: np.ndarray, return_type: str | ImageType = ImageType.NUMPY): + if not isinstance(image, np.ndarray): + raise ValueError("image must be a numpy array") + + verify_image_dims(image) + + pil_image = convert_image_type(image, ImageType.PILLOW) + pil_image = pil_image.convert("L") + np_image = convert_image_type(pil_image, ImageType.NUMPY) + final_image = convert_image_type(np_image, target_type=return_type) + return final_image
+ + + +
+[docs] +def normalize_image( + image: np.ndarray, + mean: float | Iterable[float], + std: float | Iterable[float], + channel_axis: str | ChannelsAxisSide = "first", +): + verify_image_dims(image) + + if not isinstance(image, np.ndarray): + raise ValueError("image must be a numpy array") + + num_channels = image.shape[0 if channel_axis == ChannelsAxisSide.FIRST else -1] + + if not isinstance(mean, Iterable): + mean = [mean] * num_channels + mean = np.array(mean, dtype=image.dtype) + + if not isinstance(std, Iterable): + std = [std] * num_channels + std = np.array(std, dtype=image.dtype) + + if channel_axis == ChannelsAxisSide.LAST: + image = (image - mean) / std + else: + image = ((image.T - mean) / std).T + + return image
+ + + +
+[docs] +def find_channels_axis_side(image: np.ndarray, num_channels: int = None): + valid_num_channels = (num_channels,) if num_channels is not None else (1, 2, 3) + if image.shape[0] in valid_num_channels: + return ChannelsAxisSide.FIRST + else: + return ChannelsAxisSide.LAST
+ + + +
+[docs] +def transpose_channels_axis_side( + image: np.ndarray, + axis_side: str | ChannelsAxisSide, + num_channels: int = None, + src_axis_side: str | ChannelsAxisSide = None, +): + """ + Convert an image channels axis side from (channels, ...) to (..., channels) or vise versa. + + Args: + image: Input image + axis_side: The desired axis side (can be "first" or "last") + num_channels: The number of channels in the input image + src_axis_side: The image initial channels axis side (can be "first" or "last") + + Returns: + The image with the converted channels axis + """ + if src_axis_side is None: + src_axis_side = find_channels_axis_side(image, num_channels=num_channels) + + # If input's channels axis side and output channels axis side are the same return the same image + if src_axis_side == axis_side: + return image + + if axis_side == ChannelsAxisSide.FIRST: + image = image.transpose((2, 0, 1)) + elif axis_side == ChannelsAxisSide.LAST: + image = image.transpose((1, 2, 0)) + + return image
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/integration_utils.html b/_modules/hezar/utils/integration_utils.html new file mode 100644 index 00000000..0a418593 --- /dev/null +++ b/_modules/hezar/utils/integration_utils.html @@ -0,0 +1,556 @@ + + + + + + + + hezar.utils.integration_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.integration_utils

+from __future__ import annotations
+
+import importlib.util
+from importlib.metadata import version
+from typing import List
+
+from ..constants import Backends
+
+
+__all__ = [
+    "is_backend_available",
+    "verify_dependencies",
+    "get_lib_version",
+]
+
+
+
+[docs] +def is_backend_available(backend: Backends): + """ + Check if the backend package is installed or not + + Args: + backend: Package name + + Returns: + Whether the package is available or not + """ + return importlib.util.find_spec(backend) is not None
+ + + +
+[docs] +def verify_dependencies(obj, backends: List[Backends | str] = None): + """ + Check if all the required dependencies are installed or not. + + Args: + obj: The target object to check. (Usually `self`) + backends: A list of dependency names of type `str` or `Backends` + + Raises: + ModuleNotFoundError + """ + if backends is None: + return + unavailable = [] + for backend in backends: + if not is_backend_available(backend): + unavailable.append(backend) + if len(unavailable): + raise ModuleNotFoundError( + f"`{obj.__class__.__name__}` requires " + f"{f'`{unavailable[0]}`' if len(unavailable) == 1 else unavailable} " + f"which {'is' if len(unavailable) == 1 else 'are'} not installed!" + )
+ + + +
+[docs] +def get_lib_version(lib: str): + return version(lib)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/logging.html b/_modules/hezar/utils/logging.html new file mode 100644 index 00000000..dda4b7a5 --- /dev/null +++ b/_modules/hezar/utils/logging.html @@ -0,0 +1,518 @@ + + + + + + + + hezar.utils.logging - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.logging

+import logging
+
+
+__all__ = ["Logger"]
+
+
+
+[docs] +class Logger(logging.Logger): + def __init__(self, name: str, level=None, fmt=None): + fmt = fmt or "Hezar (%(levelname)s): %(message)s" + level = level or "INFO" + super().__init__(name, level) + handler = logging.StreamHandler() + formatter = logging.Formatter(fmt) + handler.setFormatter(formatter) + self.addHandler(handler) + +
+[docs] + def log_upload_success(self, name, target_path: str): + """ + Log (info) success info when the file(s) upload is done. + """ + self.info(f"Uploaded: `{name}` --> `{target_path}`")
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/hezar/utils/registry_utils.html b/_modules/hezar/utils/registry_utils.html new file mode 100644 index 00000000..97077c1c --- /dev/null +++ b/_modules/hezar/utils/registry_utils.html @@ -0,0 +1,679 @@ + + + + + + + + hezar.utils.registry_utils - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for hezar.utils.registry_utils

+from ..constants import RegistryType
+from .common_utils import snake_case
+
+
+__all__ = [
+    "list_available_models",
+    "list_available_preprocessors",
+    "list_available_datasets",
+    "list_available_metrics",
+    "list_available_embeddings",
+    "get_registry_point",
+    "get_module_class",
+    "get_module_config_class",
+    "get_registry_key_by_module_class",
+]
+
+
+
+[docs] +def list_available_models(): + registry = _get_registry_from_type(RegistryType.MODEL) + + return sorted(registry.keys())
+ + + +
+[docs] +def list_available_preprocessors(): + registry = _get_registry_from_type(RegistryType.PREPROCESSOR) + + return sorted(registry.keys())
+ + + +
+[docs] +def list_available_datasets(): + registry = _get_registry_from_type(RegistryType.DATASET) + + return sorted(registry.keys())
+ + + +
+[docs] +def list_available_metrics(): + registry = _get_registry_from_type(RegistryType.METRIC) + + return sorted(registry.keys())
+ + + +
+[docs] +def list_available_embeddings(): + registry = _get_registry_from_type(RegistryType.EMBEDDING) + + return sorted(registry.keys())
+ + + +def _get_registry_from_type(registry_type: RegistryType): + if registry_type == RegistryType.MODEL: + from ..models import Model # noqa + from ..registry import models_registry # noqa + + registry = models_registry + + elif registry_type == RegistryType.PREPROCESSOR: + from ..preprocessors import Preprocessor # noqa + # Also import models since some preprocessors are in their own model module + from ..models import Model # noqa + from ..registry import preprocessors_registry # noqa + + registry = preprocessors_registry + + elif registry_type == RegistryType.DATASET: + from ..data import Dataset # noqa + from ..registry import datasets_registry # noqa + + registry = datasets_registry + + elif registry_type == RegistryType.EMBEDDING: + from ..embeddings import Embedding # noqa + from ..registry import embeddings_registry # noqa + + registry = embeddings_registry + + elif registry_type == RegistryType.METRIC: + from ..metrics import Metric # noqa + from ..registry import metrics_registry # noqa + + registry = metrics_registry + + else: + raise ValueError(f"Invalid `registry_type`: {registry_type}!") + + return registry + + +
+[docs] +def get_registry_point(registry_key: str, registry_type: RegistryType): + """ + Get the registry item by registry key name in a specific registry + + Args: + registry_key: Module's name in the registry + registry_type: Module's registry container type + + Returns: + A Registry object + """ + registry = _get_registry_from_type(registry_type) + + registry = registry[registry_key] + return registry
+ + + +
+[docs] +def get_module_config_class(name: str, registry_type: RegistryType): + """ + Get the config class for a given module based on its registry name. + + Args: + name (str): Module's registry name + registry_type (str): Registry type + + Returns: + A class of type :class:`hezar.Config` + """ + registry = _get_registry_from_type(registry_type) + + if name not in registry: + return None + + config_cls = registry[name].config_class + return config_cls
+ + + +
+[docs] +def get_module_class(name: str, registry_type: RegistryType): + """ + Get module class based on registry name + + Args: + name: Module's key name in its registry + registry_type: Type of the module e.g, model, dataset, preprocessor, embedding, etc + + Returns: + A class corresponding to the given module + """ + registry = _get_registry_from_type(registry_type) + + name = snake_case(name) + module_cls = registry[name].module_class + return module_cls
+ + + +
+[docs] +def get_registry_key_by_module_class(module_class: type, registry_type: RegistryType): + """ + Given the module class, return the registry key if exists + + Args: + module_class: The module class (raw class, not the name or object) + registry_type: The registry type + + Returns: + The corresponding key for the class in its registry + """ + registry = _get_registry_from_type(registry_type) + key_values = {v.module_class.__name__: k for k, v in registry.items()} + module_class_name = module_class.__name__ + if module_class_name not in key_values: + raise KeyError( + f"The requested {registry_type} class `{module_class_name}` does not exist " + f"in the {registry_type}s registry!" + ) + return key_values[module_class.__name__]
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 00000000..0184e51d --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,577 @@ + + + + + + + + Overview: module code - Hezar Documentation + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

All modules for which code is available

+ +
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/_sources/contributing.md.txt b/_sources/contributing.md.txt new file mode 100644 index 00000000..642eb3a3 --- /dev/null +++ b/_sources/contributing.md.txt @@ -0,0 +1,102 @@ +# Contributing to Hezar +Welcome to Hezar! We greatly appreciate your interest in contributing to this project and helping us make it even more +valuable to the Persian community. Whether you're a developer, researcher, or enthusiast, your contributions are +invaluable in helping us grow and improve Hezar. + +Before you start contributing, please take a moment to review the following guidelines. + +## Code of Conduct + +This project and its community adhere to +the [Contributor Code of Conduct](https://github.com/hezarai/hezar/blob/main/CODE_OF_CONDUCT.md). + +## How to Contribute + +### Reporting Bugs + +If you come across a bug or unexpected behavior, please help us by reporting it. +Use the [GitHub Issue Tracker](https://github.com/hezarai/hezar/issues) to create a detailed bug report. +Include information such as: + +- A clear and descriptive title. +- Steps to reproduce the bug. +- Expected behavior. +- Actual behavior. +- Your operating system and Python version. + +### Adding features + +Have a great idea for a new feature or improvement? We'd love to hear it. You can open an issue and add your suggestion +with a clear description and further suggestions on how it can be implemented. Also, if you already can implement it +yourself, just follow the instructions on how you can send a PR. + +### Adding/Improving documents + +Have a suggestion to enhance our documentation or want to contribute entirely new sections? We welcome your input!
+Here's how you can get involved:
+Docs website is deployed here: [https://hezarai.github.io/hezar](https://hezarai.github.io/hezar) and the source for the +docs are located at the [docs](https://github.com/hezarai/hezar/tree/main/docs) folder in the root of the repo. Feel +free to apply your changes or add new docs to this section. Notice that docs are written in Markdown format. In case you have +added new files to this section, you must include them in the `index.md` file in the same folder. For example, if you've +added the file `new_doc.md` to the `get_started` folder, you have to modify `get_started/index.md` and put your file +name there. + +### Commit guidelines + +#### Functional best practices + +- Ensure only one "logical change" per commit for efficient review and flaw identification. +- Smaller code changes facilitate quicker reviews and easier troubleshooting using Git's bisect capability. +- Avoid mixing whitespace changes with functional code changes. +- Avoid mixing two unrelated functional changes. +- Refrain from sending large new features in a single giant commit. + +#### Styling best practices + +- Use imperative mood in the subject (e.g., "Add support for ..." not "Adding support or added support") . +- Keep the subject line short and concise, preferably less than 50 characters. +- Capitalize the subject line and do not end it with a period. +- Wrap body lines at 72 characters. +- Use the body to explain what and why a change was made. +- Do not explain the "how" in the commit message; reserve it for documentation or code. +- For commits referencing an issue or pull request, write the proper commit subject followed by the reference in + parentheses (e.g., "Add NFKC normalizer (#9999)"). +- Reference codes & paths in back quotes (e.g., `variable`, `method()`, `Class()`, `file.py`). +- Preferably use the following [gitmoji](https://gitmoji.dev/) compatible codes at the beginning of your commit message: + +| Emoji Code | Emoji | Description | Example Commit | +|----------------------|-------|----------------------------------------------|----------------------------------------------------------------| +| `:bug:` | 🐛 | Fix a bug or issue | `:bug: Fix issue with image loading in DataLoader` | +| `:sparkles:` | ✨ | Add feature or improvements | `:sparkles: Introduce support for text summarization` | +| `:recycle:` | ♻️ | Refactor code (backward compatible refactor) | `:recycle: Refactor data preprocessing utilities` | +| `:memo:` | 📝 | Add or change docs | `:memo: Update documentation for text classification` | +| `:pencil2:` | ✏️ | Minor change or improvement | `:pencil2: Improve logging in Trainer` | +| `:fire:` | 🔥 | Remove code or file | `:fire: Remove outdated utility function` | +| `:boom:` | 💥 | Introduce breaking changes | `:boom: Update API, requires modification in existing scripts` | +| `:test_tube:` | 🧪 | Test-related changes | `:test_tube: Add unit tests for data loading functions` | +| `:bookmark:` | 🔖 | Version release | `:bookmark: Release v1.0.0` | +| `:adhesive_bandage:` | 🩹 | Non-critical fix | `:adhesive_bandage: Fix minor issue in BPE tokenizer` | + +## Sending a PR + +In order to apply any change to the repo, you have to follow these step: + +1. Fork the Hezar repository. +2. Create a new branch for your feature, bug fix, etc. +3. Make your changes. +4. Update the documentation to reflect your changes. +5. Ensure your code adheres to the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html). +6. Format the code using `ruff` (`ruff check --fix .`) +7. Write tests to ensure the functionality if needed. +8. Run tests and make sure all of them pass. (Skip this step if your changes do not involve codes) +9. Open a pull request from your fork and the PR template will be automatically loaded to help you do the rest. +10. Be responsive to feedback and comments during the review process. +11. Thanks for contributing to the Hezar project.😉❤️ + +## License + +By contributing to Hezar, you agree that your contributions will be licensed under +the [Apache 2.0 License](https://github.com/hezarai/hezar/blob/main/LICENSE). + +We look forward to your contributions and appreciate your efforts in making Hezar a powerful AI tool for the Persian +community! \ No newline at end of file diff --git a/_sources/get_started/index.md.txt b/_sources/get_started/index.md.txt new file mode 100644 index 00000000..df4c6cf6 --- /dev/null +++ b/_sources/get_started/index.md.txt @@ -0,0 +1,8 @@ +# Get Started +```{toctree} +:maxdepth: 1 + +overview.md +installation.md +quick_tour.md +``` diff --git a/_sources/get_started/installation.md.txt b/_sources/get_started/installation.md.txt new file mode 100644 index 00000000..d04666f9 --- /dev/null +++ b/_sources/get_started/installation.md.txt @@ -0,0 +1,41 @@ +# Installation + +## Install from PyPi +Installing Hezar is as easy as any other Python library! Most of the requirements are cross-platform and installing +them on any machine is a piece of cake! + +``` +pip install hezar +``` +### Installation variations +Hezar is packed with a lot of tools that are dependent on other packages. Most of the +time you might not want everything to be installed, hence, providing multiple variations of +Hezar so that the installation is light and fast for general use. + +You can install optional dependencies for each mode like so: +``` +pip install hezar[nlp] # For natural language processing +pip install hezar[vision] # For computer vision and image processing +pip install hezar[audio] # For audio and speech processing +pip install hezar[embeddings] # For word embeddings +``` +Or you can also install everything using: +``` +pip install hezar[all] +``` +## Install from source +Also, you can install the dev version of the library using the source: +``` +pip install git+https://github.com/hezarai/hezar.git +``` + +## Test installation +From a Python console or in CLI just import `hezar` and check the version: +```python +import hezar + +print(hezar.__version__) +``` +``` +0.23.1 +``` diff --git a/_sources/get_started/overview.md.txt b/_sources/get_started/overview.md.txt new file mode 100644 index 00000000..5154e93d --- /dev/null +++ b/_sources/get_started/overview.md.txt @@ -0,0 +1,20 @@ +# Overview + +Welcome to Hezar! A library that makes state-of-the-art machine learning as easy as possible aimed for the Persian +language, built by the Persian community! + +In Hezar, the primary goal is to provide plug-and-play AI/ML utilities so that you don't need to know much about what's +going on under the hood. Hezar is not just a model library, but instead it's packed with every aspect you need for any +ML pipeline like datasets, trainers, preprocessors, feature extractors, etc. + +Hezar is a library that: +- brings together all the best works in AI for Persian +- makes using AI models as easy as a couple of lines of code +- seamlessly integrates with Hugging Face Hub for all of its models +- has a highly developer-friendly interface +- has a task-based model interface which is more convenient for general users. +- is packed with additional tools like word embeddings, tokenizers, feature extractors, etc. +- comes with a lot of supplementary ML tools for deployment, benchmarking, optimization, etc. +- and more! + +To find out more, just take the [quick tour](quick_tour.md)! diff --git a/_sources/get_started/quick_tour.md.txt b/_sources/get_started/quick_tour.md.txt new file mode 100644 index 00000000..0891aebe --- /dev/null +++ b/_sources/get_started/quick_tour.md.txt @@ -0,0 +1,214 @@ +# Quick Tour +## Models +There's a bunch of ready to use trained models for different tasks on the Hub! + +**🤗Hugging Face Hub Page**: [https://huggingface.co/hezarai](https://huggingface.co/hezarai) + +Let's walk you through some examples! + +- **Text Classification (sentiment analysis, categorization, etc)** +```python +from hezar.models import Model + +example = ["هزار، کتابخانه‌ای کامل برای به کارگیری آسان هوش مصنوعی"] +model = Model.load("hezarai/bert-fa-sentiment-dksf") +outputs = model.predict(example) +print(outputs) +``` +``` +[[{'label': 'positive', 'score': 0.812910258769989}]] +``` +- **Sequence Labeling (POS, NER, etc.)** +```python +from hezar.models import Model + +pos_model = Model.load("hezarai/bert-fa-pos-lscp-500k") # Part-of-speech +ner_model = Model.load("hezarai/bert-fa-ner-arman") # Named entity recognition +inputs = ["شرکت هوش مصنوعی هزار"] +pos_outputs = pos_model.predict(inputs) +ner_outputs = ner_model.predict(inputs) +print(f"POS: {pos_outputs}") +print(f"NER: {ner_outputs}") +``` +``` +POS: [[{'token': 'شرکت', 'label': 'Ne'}, {'token': 'هوش', 'label': 'Ne'}, {'token': 'مصنوعی', 'label': 'AJe'}, {'token': 'هزار', 'label': 'NUM'}]] +NER: [[{'token': 'شرکت', 'label': 'B-org'}, {'token': 'هوش', 'label': 'I-org'}, {'token': 'مصنوعی', 'label': 'I-org'}, {'token': 'هزار', 'label': 'I-org'}]] +``` +- **Language Modeling (Mask Filling)** +```python +from hezar.models import Model + +roberta_mask_filling = Model.load("hezarai/roberta-fa-mask-filling") +inputs = ["سلام بچه ها حالتون "] +outputs = roberta_mask_filling.predict(inputs, top_k=1) +print(outputs) +``` +``` +[[{'token': 'چطوره', 'sequence': 'سلام بچه ها حالتون چطوره', 'token_id': 34505, 'score': 0.2230483442544937}]] +``` +- **Speech Recognition** +```python +from hezar.models import Model + +whisper = Model.load("hezarai/whisper-small-fa") +transcripts = whisper.predict("examples/assets/speech_example.mp3") +print(transcripts) +``` +``` +[{'text': 'و این تنها محدود به محیط کار نیست'}] +``` +- **Image to Text (OCR)** +```python +from hezar.models import Model +# OCR with TrOCR +model = Model.load("hezarai/trocr-base-fa-v2") +texts = model.predict(["examples/assets/ocr_example.jpg"]) +print(f"TrOCR Output: {texts}") + +# OCR with CRNN +model = Model.load("hezarai/crnn-fa-printed-96-long") +texts = model.predict("examples/assets/ocr_example.jpg") +print(f"CRNN Output: {texts}") +``` +``` +TrOCR Output: [{'text': 'چه میشه کرد، باید صبر کنیم'}] +CRNN Output: [{'text': 'چه میشه کرد، باید صبر کنیم'}] +``` +![](https://raw.githubusercontent.com/hezarai/hezar/main/examples/assets/ocr_example.jpg) + +- **Image to Text (License Plate Recognition)** +```python +from hezar.models import Model + +model = Model.load("hezarai/crnn-fa-64x256-license-plate-recognition") +plate_text = model.predict("assets/license_plate_ocr_example.jpg") +print(plate_text) # Persian text of mixed numbers and characters might not show correctly in the console +``` +``` +[{'text': '۵۷س۷۷۹۷۷'}] +``` +![](https://raw.githubusercontent.com/hezarai/hezar/main/examples/assets/license_plate_ocr_example.jpg) + +- **Image to Text (Image Captioning)** +```python +from hezar.models import Model + +model = Model.load("hezarai/vit-roberta-fa-image-captioning-flickr30k") +texts = model.predict("examples/assets/image_captioning_example.jpg") +print(texts) +``` +``` +[{'text': 'سگی با توپ تنیس در دهانش می دود.'}] +``` +![](https://raw.githubusercontent.com/hezarai/hezar/main/examples/assets/image_captioning_example.jpg) + +We constantly keep working on adding and training new models and this section will hopefully be expanding over time ;) +## Word Embeddings +- **FastText** +```python +from hezar.embeddings import Embedding + +fasttext = Embedding.load("hezarai/fasttext-fa-300") +most_similar = fasttext.most_similar("هزار") +print(most_similar) +``` +``` +[{'score': 0.7579, 'word': 'میلیون'}, + {'score': 0.6943, 'word': '21هزار'}, + {'score': 0.6861, 'word': 'میلیارد'}, + {'score': 0.6825, 'word': '26هزار'}, + {'score': 0.6803, 'word': '٣هزار'}] +``` +- **Word2Vec (Skip-gram)** +```python +from hezar.embeddings import Embedding + +word2vec = Embedding.load("hezarai/word2vec-skipgram-fa-wikipedia") +most_similar = word2vec.most_similar("هزار") +print(most_similar) +``` +``` +[{'score': 0.7885, 'word': 'چهارهزار'}, + {'score': 0.7788, 'word': '۱۰هزار'}, + {'score': 0.7727, 'word': 'دویست'}, + {'score': 0.7679, 'word': 'میلیون'}, + {'score': 0.7602, 'word': 'پانصد'}] +``` +- **Word2Vec (CBOW)** +```python +from hezar.embeddings import Embedding + +word2vec = Embedding.load("hezarai/word2vec-cbow-fa-wikipedia") +most_similar = word2vec.most_similar("هزار") +print(most_similar) +``` +``` +[{'score': 0.7407, 'word': 'دویست'}, + {'score': 0.7400, 'word': 'میلیون'}, + {'score': 0.7326, 'word': 'صد'}, + {'score': 0.7276, 'word': 'پانصد'}, + {'score': 0.7011, 'word': 'سیصد'}] +``` +For a full guide on the embeddings module, see the [embeddings tutorial](https://hezarai.github.io/hezar/tutorial/embeddings.html). +## Datasets +You can load any of the datasets on the [Hub](https://huggingface.co/hezarai) like below: +```python +from hezar.data import Dataset + +sentiment_dataset = Dataset.load("hezarai/sentiment-dksf") # A TextClassificationDataset instance +lscp_dataset = Dataset.load("hezarai/lscp-pos-500k") # A SequenceLabelingDataset instance +xlsum_dataset = Dataset.load("hezarai/xlsum-fa") # A TextSummarizationDataset instance +alpr_ocr_dataset = Dataset.load("hezarai/persian-license-plate-v1") # An OCRDataset instance +... +``` +The returned dataset objects from `load()` are PyTorch Dataset wrappers for specific tasks and can be used by a data loader out-of-the-box! + +You can also load Hezar's datasets using 🤗Datasets: +```python +from datasets import load_dataset + +dataset = load_dataset("hezarai/sentiment-dksf") +``` +For a full guide on Hezar's datasets, see the [datasets tutorial](https://hezarai.github.io/hezar/tutorial/datasets.html). +## Training +Hezar makes it super easy to train models using out-of-the-box models and datasets provided in the library. + +```python +from hezar.models import BertSequenceLabeling, BertSequenceLabelingConfig +from hezar.data import Dataset +from hezar.trainer import Trainer, TrainerConfig +from hezar.preprocessors import Preprocessor + +base_model_path = "hezarai/bert-base-fa" +dataset_path = "hezarai/lscp-pos-500k" + +train_dataset = Dataset.load(dataset_path, split="train", tokenizer_path=base_model_path) +eval_dataset = Dataset.load(dataset_path, split="test", tokenizer_path=base_model_path) + +model = BertSequenceLabeling(BertSequenceLabelingConfig(id2label=train_dataset.config.id2label)) +preprocessor = Preprocessor.load(base_model_path) + +train_config = TrainerConfig( + output_dir="bert-fa-pos-lscp-500k", + task="sequence_labeling", + device="cuda", + init_weights_from=base_model_path, + batch_size=8, + num_epochs=5, + metrics=["seqeval"], +) + +trainer = Trainer( + config=train_config, + model=model, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + data_collator=train_dataset.data_collator, + preprocessor=preprocessor, +) +trainer.train() + +trainer.push_to_hub("bert-fa-pos-lscp-500k") # push model, config, preprocessor, trainer files and configs +``` + +Want to go deeper? Check out the [guides](../guide/index.md). diff --git a/_sources/guide/advanced_training.md.txt b/_sources/guide/advanced_training.md.txt new file mode 100644 index 00000000..0da1a2cd --- /dev/null +++ b/_sources/guide/advanced_training.md.txt @@ -0,0 +1,2 @@ +# Advanced Training +Docs coming soon, stay tuned! diff --git a/_sources/guide/hezar_architecture.md.txt b/_sources/guide/hezar_architecture.md.txt new file mode 100644 index 00000000..cd6a7e71 --- /dev/null +++ b/_sources/guide/hezar_architecture.md.txt @@ -0,0 +1,289 @@ +# Hazer's Architecture + +Right from the first lines of code, Hezar was built having **simplicity**, **modularity** and **extensibility** in mind. +Hezar has a simple yet flexible design pattern that can be seen among most of its main modules. In this guide we +demonstrate the main ideas behind the design. + +Going forward, by the term _module_, we mean any main class +like `Model`, `Dataset`, `Metric`, `Trainer`, `Preprocessor`, +etc. + +## Concept 1: Configurable Modules + +Every single module object in Hezar can be constructed from a key-value container. This container is the module's config +which contains everything needed to build an object from that module. In order to have a portable serializable config +that can be also converted to Python code, there lies Hezar's most important class called `Config`. The `Config` class +is a simple Python dataclass that is equipped with extra methods for importing, exporting, pushing to the Hub, etc. +The `Config` class is defined in `hezar/configs.py` among other config derivatives. +Right now the config derivatives are: + +- `ModelConfig` +- `DatasetConfig` +- `PreprocessorConfig` +- `TrainerConfig` +- `EmbeddingConfig` +- `MetricConfig` + +So every module must have its own config inherited from `Config`. When defining a new config dataclass, one must define +a unique name (as the parameter `name`), responsible for identifying the module type that uses that config class. We'll +discuss why this `name` parameter is necessary in the registry section. + +To give some examples: + +Let's assume you want to write a new model class called `AwesomeModel`. The first step is to provide a config dataclass: + +```python +from dataclasses import dataclass +from hezar.models import ModelConfig, Model + + +@dataclass +class MyAwesomeModelConfig(ModelConfig): + name = "my_awesome_model" # this has to be a unique name among all models configs + my_param: str = "awesome" + other_param: str = "more_awesome" + + +class MyAwesomeModel(Model): + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + # Define the layers or any other stuff here + ... + + def forward(self, inputs, **kwargs): + # Casual PyTorch forward method + ... +``` + +Wait, what's that name for? Why would you need to define a name for everything? The short answer is +_Hezar's registry system_. So let's dive into it! + +## Concept 2: Modules' Registries + +There are lots of base modules in Hezar and many of which might have dozens of subclasses, but as you might have seen by +now, almost every module can load its class using the same base class in a single line. Take a look at the below +snippets: + +```python +# Load a model +from hezar.models import Model + +roberta_tc = Model.load("hezarai/roberta-fa-sentiment-dksf") # roberta_tc is a RobertaTextClassification instance +bert_pos = Model.load("hezarai/bert-fa-pos-lscp-500k") # bert_pos is a BertSequenceLabeling instance +whisper_speech = Model.load("hezarai/whisper-small-fa") # whisper_speech is a WhisperSpeechRecognition instance +... +# Load a dataset +from hezar.data import Dataset + +sentiment_dataset = Dataset.load("hezarai/sentiment-dksf") # A TextClassificationDataset instance +lscp_dataset = Dataset.load("hezarai/lscp-pos-500k") # A SequenceLabelingDataset instance +xlsum_dataset = Dataset.load("hezarai/xlsum-fa") # A TextSummarizationDataset instance +... +# Load preprocessors +from hezar.preprocessors import Preprocessor + +wordpiece = Preprocessor.load("hezarai/bert-base-fa") # A WordPieceTokenizer instance +whisper_bpe = Preprocessor.load("hezarai/whisper-small-fa") # A WhisperBPETokenizer instance +sp_unigram_bpe = Preprocessor.load("hezarai/t5-base-fa") # A SentencePieceUnigramTokenizer instance +... +# Load embedding +from hezar.embeddings import Embedding + +fasttext = Embedding.load("hezarai/fasttext-fa-300") # A FastText instance +word2vec = Embedding.load("hezarai/word2vec-skipgram-fa-wikipedia") # A Word2Vec instance +... +``` + +So, what's going on under the hood that handles module loading and initiation? + +**Registry System** + +Well, there are ways to tackle this challenge, but Hezar manages this by using _a global registry_ for every module +type. These registries are simple Python dictionaries that hold the properties for every module class, module config, +etc. +The general structure is like below: + +```python +# Models registry for example +models_registry = { + "bert_text_classification": Registry( + module_class=hezar.models.text_classification.bert.bert_text_classification.BertTextClassification, + config_class=hezar.models.text_classification.bert.bert_text_classification_config.BertTextClassificationConfig, + description="SOME MODEL DESCRIPTION ..." + ), + "AND SO ON...": Registry(...) +} +``` +Each registry value is a `Registry` (data)class that has 3 properties: `config_class`, `module_class` and `description`. +- `module_class`: Holds the class object for the module. Using this property you can actually create the module object. +- `config_class`: Holds the config class and can be passed to the module class so that the module can be created. +- `description`: Holds the description of the model if given. + +But how are the modules inserted into the registries? The answer is _registry class decorators_ + +**`register_*()` Class Decorators** + +In the file `hezar/registry.py`, there are a bunch of decorator functions that fulfill the task of registering any module +into the right registry automagically! +These decorators take two parameters: +- `name`: A string name that has to be the same as the one in config +- `config_class`: The config class +- `description`: Optional description for the module + +The example below demonstrates registering a model: +```python +... +from hezar.models import Model, ModelConfig + +@dataclass +class MyBertConfig(ModelConfig): + name = "my_bert" + vocab_size: int = 1000 + hidden_size: int = 768 + +# Below line is all you need to add `my_bert` to `models_registry` +@register_model("my_bert", config_class=MyBertConfig) +class MyBert(Model): + def __init__(self, config: MyBertConfig, **kwargs): + super().__init__(config, **kwargs) + + def forward(self, inputs, **kwargs): + ... + +``` +Registry decorators currently include: +- `register_model` +- `register_preprocessor` +- `register_dataset` +- `register_embedding` +- `register_metric` +- `register_trainer` + +**Getting Available Modules** + +To figure out what modules are available in a registry, there are also utils for that: +```python +from hezar import utils + +print(utils.list_available_models()) +print(utils.list_available_preprocessors()) +print(utils.list_available_datasets()) +print(utils.list_available_metrics()) +print(utils.list_available_embeddings()) +... +``` +**Creating Modules from Registry Names** + +So now it's pretty easy to create modules objects using their `name`! Let's say you want to create a +BPE tokenizer. You can do it this way: +```python +from hezar.registry import preprocessors_registry + +module_cls = preprocessors_registry["bpe_tokenizer"].module_class +config_cls = preprocessors_registry["bpe_tokenizer"].config_class + +bpe = module_cls(config_cls()) +``` +Although, this is not how it's actually done in Hezar because it's long and ugly! To handle this properly we use another +internal feature of Hezar called the _builders_! + + +**Builders** + +Using builders you can build modules from their registry names in a single line of code. +These family of functions take 3 main parameters: +- `name`: A registry key name representing that module. This name has to be present in the corresponding registry! +- `config`: Optionally you can pass a config object to control how the module is built. The config has to be of a type that the module accepts. +- `**kwargs`: Optionally you can pass config parameters as keyword arguments to override the default config. (The override priority is `kwargs` > `config` > default config) +```python +from hezar import builders + +bert = builders.build_model("bert_mask_filling", hidden_size=768, vocab_size=50000) +sp_bpe = builders.build_preprocessor("sentencepiece_bpe_tokenizer") +tc_dataset = builders.build_dataset("text_classification", path="hezarai/sentiment-dksf", tokenizer_path="hezarai/bert-base-fa") +... +``` +Available builders include: +- `build_model` +- `build_dataset` +- `build_preprocessor` +- `build_embedding` +- `build_metric` + +So why would you need to use builders or registries when you can import everything normally? like below: +```python +from hezar.models import WhisperSpeechRecognition, WhisperSpeechRecognitionConfig + +whisper = WhisperSpeechRecognition(WhisperSpeechRecognitionConfig(max_new_tokens=400)) +``` +The answer is that if you want to do it in a straightforward way, you can always use the classes directly. But the fact is that everything works with +configs and a config must have at least some identifiers so that a module can be initialized from it. The main usage of +the registries is to be able to create everything from the configs! So lets slide into the next section, the Hub! + +## Concept 3: Hugging Face Hub Integration +In Hezar, EVERY module can be uploaded to or downloaded from the Hugging Face Hub with ease! Modules have 3 main methods +to do so: +- `load`: A method implemented in any type of base class that loads the module from the Hub or local disk automagically! +- `save`: A method to save all the necessary files and configurations to a path on the local disk. +- `push_to_hub`: A method implemented in any type of base class that pushes all the necessary files and configurations to the Hub so that the module can be loaded from the Hub again. + +**Loading** + +All base modules implement their own `load` method based on their characteristics. But the first step in every load +process is loading the configuration as all the info lies there, and then any other file is loaded. +For example the class `Model` first loads its config and builds the model using `build_model` and the config parameters. +Then the state dict is loaded to the model. If the path contains preprocessor files and configs, it would load them too. +On the other hand, some simple modules like metric might just load the config to create a metric instance. +One important feature of any `load` method is that like builders, it accepts config parameters as keyword arguments so +that you can override config properties. + +**Saving** + +Almost every module has the `save` method implemented which is responsible for saving config and other related files to the +disk. This method takes a `path` parameter which is just the base folder path and any necessary subfolder will be created +automatically based on the module type. For example, if you save a tokenizer at path `my_tokenizer/`, the `Tokenizer`'s +`save` method will create a `preprocessor` folder and saves the `tokenizer.json` and `tokenizer_config.yaml` on that +folder. You can control the `subfolder` parameter and other file/path names if the base class gives you the option. + +**Pushing to the Hub** + +Pushing to the Hugging Face Hub is so much like the save method. The only difference is that the files are then uploaded +to the Hub after saving. + + +## Concept 4: Task-based Modeling & Training +Hezar is a practical library not a framework (it can be though!). That's why we decided to categorize models, trainers, +datasets, etc. under task names e.g, `speech_recognition`, `language_modeling`, etc. If you've worked with other +libraries, this might somewhat seem irrational, but trust us! For most users and usages this fits better! + +Currently, all models, trainers and datasets are categorized by task name, but this does not mean that for every task, +there exists a model, trainer, dataset, etc. + +## Concept 5: Integration with Other Tools +Re-inventing the wheel has no place in Hezar. It's strongly recommended that if something already exists somewhere, and +we want it, just copy and paste it into the code!
+In terms of backbone frameworks and libraries, we carefully R&D the present tools and choose the one that is the simplest +yet popular. + +More specifically, here's a simple summary of the core modules in Hezar: +- **Models**: Every model is a `hezar.models.Model` instance which is in fact, a PyTorch `nn.Module` wrapper with extra features for saving, loading, exporting, etc. +- **Datasets**: Every dataset is a `hezar.data.Dataset` instance which is a PyTorch Dataset implemented specifically for each task that can load the data files from the Hugging Face Hub. +- **Preprocessors**: All preprocessors are preferably backed by a robust library like Tokenizers, pillow, etc. +- **Embeddings**: All embeddings are developed on top of Gensim and can be easily loaded from the Hub and used in just 2 lines of code! +- **Trainer**: Trainer is the base class for training almost any model in Hezar or even your own custom models backed by Hezar. The Trainer comes with a lot of features and is also exportable to the Hub! +- **Metrics**: Metrics are also another configurable and portable modules backed by Scikit-learn, seqeval, etc. and can be easily used in the trainers! + + +## Concept 6: Our Inspirations +Hezar was built using the best practices we've learned from working with dozens of industry leading open source +software in the AI world. Our biggest inspirations are: + +- [Transformers](https://github.com/huggingface/transformers) by Hugging Face +- [Fairseq](https://github.com/facebookresearch/fairseq) by Meta AI +- [Flair](https://github.com/flairNLP/flair) by FlairAI +- [ZenML](https://github.com/zenml-io/zenml) +- [Ludwig](https://github.com/ludwig-ai/ludwig) by Ludwig AI +- [UniLM](https://github.com/microsoft/unilm) by Microsoft +- [PyTorch Ignite](https://github.com/pytorch/ignite) by PyTorch +- [Lightning](https://github.com/Lightning-AI/lightning) by Lightning AI +- [Hazm](https://github.com/roshan-research/hazm) by Roshan diff --git a/_sources/guide/index.md.txt b/_sources/guide/index.md.txt new file mode 100644 index 00000000..e0368c9a --- /dev/null +++ b/_sources/guide/index.md.txt @@ -0,0 +1,12 @@ +# Developer Guides + +Welcome to the developer guide section where you can take a deeper dive into the internals of Hezar! + +```{toctree} +:maxdepth: 1 + +hezar_architecture.md +models_advanced.md +trainer_in_depth.md +advanced_training.md +``` diff --git a/_sources/guide/models_advanced.md.txt b/_sources/guide/models_advanced.md.txt new file mode 100644 index 00000000..091e479d --- /dev/null +++ b/_sources/guide/models_advanced.md.txt @@ -0,0 +1,267 @@ +# Advanced Guide on Models + +Models (under `hezar.models`) is the most used module in Hezar. In this section, we'll take a deeper tour of this +module. + +Note that this section assumes you already know the basics of Hezar and in specific, the models module, but if not, +you can check out the introduction guide on models [here](../tutorial/models.md). + +## Building Models +As you'd probably know at this point, any subclass of Model is a regular PyTorch nn.Module, so creating any model +is straightforward. But what makes it different? + +First difference is in the `__init__` method. Every model has to take in a `config` parameter that contains all the +necessary parameters needed for the model to be created and initialized. This `config` parameter is a +dataclass of type `ModelConfig` derived from the base config class which is `Config`. The `Config` class is the +base config container for all configs in Hezar. Find out more about +configs [here](hezar_architecture.md/#concept-1-configurable-modules). + +Take a look at the snippets below: + +- **Regular PyTorch** +```python +import torch +import torch.nn as nn + + +class SampleCNN(nn.Module): + def __init__(self, num_channels=3, num_classes=10): + super().__init__() + self.conv1 = nn.Conv2d(num_channels, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, num_classes) + + def forward(self, x): + x = self.pool(nn.functional.relu(self.conv1(x))) + x = self.pool(nn.functional.relu(self.conv2(x))) + x = torch.flatten(x, 1) # flatten all dimensions except batch + x = nn.functional.relu(self.fc1(x)) + x = nn.functional.relu(self.fc2(x)) + x = self.fc3(x) + return x +``` + +- **Hezar Model** +```python +from dataclasses import dataclass + +import torch +import torch.nn as nn +from hezar.models import Model, ModelConfig, register_model + +@dataclass +class SampleNetConfig(ModelConfig): + name = "sample_net" + num_channels: int = 3 + num_classes: int = 10 + +@register_model("sample_net", config_class=SampleNetConfig, description="My simple CNN network") +class SampleNet(Model): + def __init__(self, config: SampleNetConfig): + super().__init__(config=config) + self.conv1 = nn.Conv2d(self.config.num_channels, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, self.config.num_classes) + + def forward(self, x): + x = self.pool(nn.functional.relu(self.conv1(x))) + x = self.pool(nn.functional.relu(self.conv2(x))) + x = torch.flatten(x, 1) # flatten all dimensions except batch + x = nn.functional.relu(self.fc1(x)) + x = nn.functional.relu(self.fc2(x)) + x = self.fc3(x) + return x +``` +So what you actually need to do to make your PyTorch model compatible with Hezar is: +1. Move all the required arguments of the model to a new dataclass config by deriving the `ModelConfig` class +2. Implement your model by inheriting from `Model` instead of `nn.Module` and construct your model architecture by using config parameters +3. Optionally you can register your model by using the `register_model` decorator under the same `name` parameter in the config. This step makes your model importable/exportable (compatible with `save`, `load`, `push_to_hub` methods) + + +## Models Registry System +Registry system is not specific to models but all modules in Hezar. For more info on registries check out [this section](hezar_architecture.md/#concept-2-modules-registries). + +Registries are required for finding the right class when trying to load a model from a path (local or Hub). Each model must +have a name which must be the same as the one in its config under `name` parameter. (take a look at the example above). + +To see all the available models use: +```python +from hezar.utils import list_available_models + +print(list_available_models()) +``` + +### Models Registry and `build_model` +The `models_registry` (like all registry containers in Hezar) is a dictionary of model names mapped to their module classes +and config classes. So one can easily build a model with default parameters by its registry key. + +```python +from hezar.registry import models_registry + +bert = models_registry["bert"].module_class(models_registry["bert"].config_class()) +``` +Obviously, this is so ugly and long so lets use the build method `build_model`. This method takes in 3 paramters: +- `name`: The model name which must be present in `model_registry` keys +- `config`: Optional model config +- `**kwargs`: Extra config parameters as keyword arguments that overwrites the default config parameters. +```python +from hezar.builders import build_model + +bert = build_model("bert") +``` +You can also pass config parameters to the `build_model` method as kwargs to overwrite default config parameters: +```python +from hezar.builders import build_model + +bert = build_model("bert", hidden_size=768) +``` +Or pass in the whole config to the build function: +```python +from hezar.builders import build_model +from hezar.models import BERTConfig + +bert = build_model("bert", BERTConfig(hidden_act="gelu", hidden_size=840)) +``` + +## Inference & Prediction +The end-to-end prediction for any model is done by calling the `predict()` method on raw inputs. +The `predict()` method itself, calls three main methods in order: +- `preprocess()` +- `forward()`/`generate()`* +- `post_process()` + +*based on model type; regular or generative +### Preprocessing/Post-processing +These steps are performed by two methods: +- `preprocess()`: takes in raw inputs and processes them to create direct model inputs and returns a dictionary of named +inputs that is unpacked for model's `forward`/`generate` method. Each model can handle raw inputs however necessary. +But ready-to-use models in Hezar, all use preprocessor modules. preprocessor modules can be tokenizers, feature extractors, +normalizers, etc. The `Model` class has a `preprocessor` property that stores a dictionary of the required preprocessors +for the model. These preprocessors are named after their original name in config or registry like `bpe_tokenizer`, `image_processor`, etc. +- `post_process()`: responsible for converting model forward/generate outputs which are usually tensors to a human-readable +format. You might also use the `preprocessor` property at this stage i.e, for decoding, etc. + +#### The `preprocessor` property +The preprocessor property can be directly set on a model. This preprocessor must be of type `Preprocessor`. If a model +needs multiple preprocessors you can pass in a dictionary of preprocessors by their name (preferably registry name). +You can use the preprocessor property like below: +```python +class TextClassificationModel(Model): + def __init__(self): + ... + + def forward(self, inputs): + ... + + def preprocess(self, raw_texts): + tokenizer = self.preprocessor["bpe_tokenizer"] + model_inputs = tokenizer(raw_texts, return_tensors="pt") + return model_inputs + + def post_process(self, model_outputs): + logits = model_outputs["logits"] + label_ids = logits.argmax(1) + labels_str = [self.config.id2label[label_id] for label_id in label_ids] + return labels_str +``` +You can inspect the preprocessor for any model like below: +```python +from hezar.models import Model + +whisper = Model.load("hezarai/whisper-small-fa") +whisper_preprocessors = whisper.preprocessor +print(whisper_preprocessors) +``` +``` +PreprocessorsContainer( + [ + ('whisper_feature_extractor', + < hezar.preprocessors.feature_extractors.audio.whisper_feature_extractor.WhisperFeatureExtractor at 0x7f6316fdcbb0 >), + ('whisper_bpe_tokenizer', + < hezar.preprocessors.tokenizers.whisper_bpe.WhisperBPETokenizer at 0x7f643cb13f40 >) + ] +) +``` +### Passing kwargs to `predict()` +You can also pass in additional parameters corresponding to any of the methods and the `predict()` method will figure out +how each arg should be passed to the write method (`preprocess`, `forward` or `post_process`). + +Suppose you model's methods take parameters like below: +- `preprocess(raw_inputs, return_attention_mask=False)` +- `post_process(model_inputs, output_all_scores=False)` +You can pass in parameters for such model like below: +```python +model.predict(raw_inputs, return_attention_mask=True, output_all_scores=True) +``` +The predict method knows which parameter corresponds to which method. (see [issue #96](https://github.com/hezarai/hezar/issues/96)) + + +## Saving, Loading & Pushing to Hub +All Hezar models can be easily saved, loaded and pushed to hub in the same way. + +### Loading Models +Loading models is done by using the `.load()` method. This method takes in the path to the desired model which can be +a path on the Hub or a path on your local disk. +```python +from hezar.models import Model + +whisper = Model.load("hezarai/whisper-small-fa") +whisper.save("my-whisper") +whisper_2 = Model.load("my-whisper") +whisper_2.push_to_hub("arxyzan/whisper-small-fa") +``` +Note that the preprocessors of the model will also be loaded if available when using `Model.load()`. However, you can +disable this behavior by `Model.load(path, load_preprocessor=False)`. +#### `load()` Parameters +`Model.load()` takes these parameters: +- `hub_or_local_path`: Path to a Hub repo or a folder on your local disk +- `load_locally`: Force this method to look for the path locally +- `load_preprocessor`: Whether to load the preprocessor(s) or not (defaults to True) +- `model_filename`: Optionally specify the model's weights file name (defaults to `model.pt`) +- `config_filename`: Optionally specify the model's config file name (defaults to `model_config.yaml`) +- `save_path`: Optionally save the loaded model to a custom path +- `**kwargs`: Additional config parameters to overwrite the loaded config parameters + +#### Loading State Dicts +Although Hezar models are regular PyTorch `nn.Module`s, but for convenience, we overrode the `load_state_dict` in a way +that the user can load backbone models on a model for fine-tuning purposes. Also, our method can safely ignore mismatching +keys if the values are compatible. So if you receive a warning when fine-tuning a model like below: +``` +Hezar (WARNING): Partially loading the weights as the model architecture and the given state dict are incompatible! +Ignore this warning in case you plan on fine-tuning this model +Incompatible keys: [] +Missing keys: ['classifier.weight', 'classifier.bias'] +``` +You are good to go with your training because only the last classifier weights are missing and new for the training. + +### Saving Models +Saving models to a path is pretty simple. Note that this method takes a **folder** path not a file path because it saves +all the files for the model, config and preprocessors to this path but instead you can control the behavior of this method +too. +#### `save()` Parameters +`Model.save()` takes these parameters: +- `path`: A path to a local folder +- `filename`: Model's file name (defaults to `model.pt`) +- `save_preprocessor`: Whether to save the preprocessor or not +- `config_filename`: Model's config file name (defaults to `model_config.yaml`) + +### Pushing to the Hub +Pushing Hezar models to the Hub (just like other modules in Hezar) is done by using the `push_to_hub` method. +#### `push_to_hub()` Parameters +This method is actually the save method that is followed by the upload operation so its parameters are similar to `save`. +- `repo_id`: Path to the repo id on the Hugging Face Hub +- `filename`: Model's file name (defaults to `model.pt`) +- `config_filename`: Optionally specify the model's config file name (defaults to `model_config.yaml`) +- `push_preprocessor`: Whether to push the preprocessor or not +- `commit_message`: Commit message for this push +- `private`: Specify if the repo should be private or not. Only applicable if the repo does not already exist. + +## Wrap Up +In this guide, we walked through the detail and internals of the models in Hezar. Hezar models are PyTorch Modules equiped +with extra functionalities for better integration and exportability. diff --git a/_sources/guide/trainer_in_depth.md.txt b/_sources/guide/trainer_in_depth.md.txt new file mode 100644 index 00000000..ad6b6c3d --- /dev/null +++ b/_sources/guide/trainer_in_depth.md.txt @@ -0,0 +1,167 @@ +# Trainer In-depth Guide +The `Trainer` is the base class for training all the models in Hezar no matter the task, dataset, model architecture, etc. +In this guide, we'll demonstrate all the internals and details in this powerful class and how you can customize it based +on your needs. + +We'll do this step by step in the exact order that takes place when training a model. + +## Initialization +In order to initialize the Trainer you would need to have some objects ready. + +- **Trainer Config**: A Trainer config is a config dataclass (of type `TrainerConfig`) with a bunch of attributes that configures +how the Trainer behaves. The most important ones being: + - `output_dir` (required): Path to the directory to save trainer outputs (checkpoints, logs, etc.) + - `task` (required): Specify the task of the training e.g, `text_classification`, `speech_recognition`, etc. + - `num_epochs` (required): Number of training epochs + - `init_weights_from` (optional): If the model is randomly initialized or you want to finetune it from a different set of + weights, you can provide a path to a pretrained model using this parameter to load the model weights from. + - `batch_size` (required): Training batch size + - `eval_batch_size` (optional): Evaluation batch size (defaults to `batch_size` if not set) + - `mixed_precision` (optional): Type of mixed precision e.g, `fp16`, `bf16`, etc. (Disabled by default) + - `metrics` (optional): A set of metrics for model evaluation. Available metrics can be obtained using `utils.list_available_metrics()` + - and etc. +- **Model**: A Hezar Model instance +- **Train & Eval Datasets**: Train and evaluation datasets (Hezar Dataset instances) +- **Preprocessor(s)**: Model's preprocessor if it's not already in the model (`model.preprocessor == None`) + +As an example, here is how you can initialize a trainer for text classification using BERT: + +```python +from hezar.models import BertTextClassification, BertTextClassificationConfig +from hezar.data import Dataset +from hezar.preprocessors import Preprocessor +from hezar.trainer import Trainer, TrainerConfig + + +dataset_path = "hezarai/sentiment-dksf" +base_model_path = "hezarai/bert-base-fa" + +train_dataset = Dataset.load(dataset_path, split="train", tokenizer_path=base_model_path) +eval_dataset = Dataset.load(dataset_path, split="test", tokenizer_path=base_model_path) + +model = BertTextClassification(BertTextClassificationConfig(id2label=train_dataset.config.id2label)) +preprocessor = Preprocessor.load(base_model_path) + +train_config = TrainerConfig( + output_dir="bert-fa-sentiment-analysis-dksf", + task="text_classification", + device="cuda", + init_weights_from=base_model_path, + batch_size=8, + num_epochs=5, + metrics=["f1", "precision", "accuracy", "recall"], +) + +trainer = Trainer( + config=train_config, + model=model, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + data_collator=train_dataset.data_collator, + preprocessor=preprocessor, +) +``` + +### `Trainer.__init__()` +So what does exactly happen in the Trainer's `__init__`? + +* Config sanitization +* Configure determinism (controlled by `config.seed`) +* Configure device(s) +* Setup model and +* preprocessor (load pretrained weights if configured) +* Setup data loaders +* Setup optimizer and LR scheduler +* Configure precision if set +* Setup metrics handler (chosen based on the trainer's task, type `MetricsHandler`) +* Configure paths and loggers +* Setup trainer state + +Now the trainer has all the objects ready (almost!). + +## Training process +The training process starts right when you call `trainer.train()`. This simple method does all the heavy lifting needed +during a full training process. We'll go through each of them one by one. + +In a nutshell, the training process is simply a repeating loop of training the model on the full train data and then +evaluating it on the evaluation data followed by calculating the metrics and saving logs and results. + +### 1. Training info +Right before the trainer starts the main training process, it simply outputs some info about the run. +These info would be something like this: +``` +******************** Training Info ******************** + + Output Directory: `bert-fa-sentiment-analysis-dksf` + Task: `text_classification` + Model: `BertTextClassification` + Init Weights: `hezarai/bert-base-fa` + Device(s): `cuda` + Training Dataset: `TextClassificationDataset(path=hezarai/sentiment-dksf['train'], size=28602)` + Evaluation Dataset: `TextClassificationDataset(path=hezarai/sentiment-dksf['test'], size=2315)` + Optimizer: `adam` + Initial Learning Rate: `2e-05` + Learning Rate Decay: `0.0` + Epochs: `5` + Batch Size: `8` + Number of Parameters: `118299651` + Number of Trainable Parameters: `118299651` + Mixed Precision: `Full (fp32)` + Metrics: `['f1', 'precision', 'accuracy', 'recall']` + Checkpoints Path: `bert-fa-sentiment-analysis-dksf/checkpoints` + Logs Path: `bert-fa-sentiment-analysis-dksf/logs` + +******************************************************* +``` + +### 2. Inner training loop +The inner training loop which is invoked by `.inner_training_loop(epoch)` trains the model on the full iteration of the +training data. This iteration itself is a repeating loop of the below processes: +1. Preparation of the input batch using `.prepare_input_batch(input_batch)` which performs all the necessary validations and +checks on the input batch like casting data type, device, etc. +2. Performing one training step using `.training_step(input_batch)` which is the casual PyTorch forward pass followed by +loss computation and backward pass and finally an optimizer step. This method outputs the loss value along the model outputs. +3. Aggregation of loss values through the training loop and live verbose of loss average up until that point in the progress bar. + +### 3. Evaluation loop +The evaluation loop is also the same as the training loop, but it does everything in eval mode (`torch.inference_mode`). +1. Preparation of the input batch using `.prepare_input_batch(input_batch)` which performs all the necessary validations and +checks on the input batch like casting data type, device, etc. +2. Performing one evaluation step using `.evaluation_step(input_batch)`: + - Perform one forward pass on the inputs + - Calculate loss on the inputs + - Calculate generation outputs if the model is generative (`model.is_generative`) +3. Calculate metrics on the outputs which is handled by the metrics handler (`self.metrics_handler`) +4. Aggregation of metrics values through the evaluation loop and live verbose of the averages in the progress bar. (the +final value is the total average value) + +### 4. Saving & logging +When a full training loop and evaluation is done, the trainer saves some properties: +- Save the trainer state in the `trainer_state.yaml` file at `config.output_dir/config.checkpoints_dir`. +- Save the current checkpoint (model weights, configs, etc.) at `config.checkpoints_dir/epoch`. +- Save all the new training and evaluation results to the CSV file and TensorBoard at `config.logs_dir` + +All the above steps are repeated for `config.num_epochs` times. + + +## Save & Push to Hub +Like all other components in Hezar, you can also save or push the trainer to the Hub. + +### Save Trainer +In order to save the trainer, you can simply call `trainer.save()` which accepts the following: +- `path`: The target directory to save the objects in the trainer +- `config_filename` +- `model_filename` +- `model_config_filename` +- `subfolder` +- `dataset_config_file` + +Overall, the `save` method saves the model, preprocessor, dataset config and the trainer config. + +### Push to Hub +You can also push the trainer to the hub. This method just calls the `push_to_hub` method on the model, preprocessor, and configs. + + +## Advanced Training & Customization +The Trainer is implemented in a really flexible and customizable way so that any change can be done by simply overriding +your desired method. You can learn more about how you can do such things [here](advanced_training.md) \ No newline at end of file diff --git a/_sources/index.md.txt b/_sources/index.md.txt new file mode 100644 index 00000000..dbe736fc --- /dev/null +++ b/_sources/index.md.txt @@ -0,0 +1,17 @@ +# Welcome to Hezar's documentation! +![](https://github.com/hezarai/hezar/raw/main/hezar.png) +Welcome to Hezar official documentation! + +Here you can find every piece of info about all the aspects in Hezar. + +Navigate to the desired section using this table of contents: + +```{toctree} +:maxdepth: 2 + +get_started/index +tutorial/index +guide/index +source/index +contributing +``` diff --git a/_sources/source/hezar.builders.rst.txt b/_sources/source/hezar.builders.rst.txt new file mode 100644 index 00000000..6eb0aa72 --- /dev/null +++ b/_sources/source/hezar.builders.rst.txt @@ -0,0 +1,7 @@ +hezar.builders module +===================== + +.. automodule:: hezar.builders + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.configs.rst.txt b/_sources/source/hezar.configs.rst.txt new file mode 100644 index 00000000..02b7dc5d --- /dev/null +++ b/_sources/source/hezar.configs.rst.txt @@ -0,0 +1,7 @@ +hezar.configs module +==================== + +.. automodule:: hezar.configs + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.constants.rst.txt b/_sources/source/hezar.constants.rst.txt new file mode 100644 index 00000000..441ee5d5 --- /dev/null +++ b/_sources/source/hezar.constants.rst.txt @@ -0,0 +1,7 @@ +hezar.constants module +====================== + +.. automodule:: hezar.constants + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.data_collators.rst.txt b/_sources/source/hezar.data.data_collators.rst.txt new file mode 100644 index 00000000..0e743e71 --- /dev/null +++ b/_sources/source/hezar.data.data_collators.rst.txt @@ -0,0 +1,7 @@ +hezar.data.data\_collators module +================================= + +.. automodule:: hezar.data.data_collators + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.dataset.rst.txt b/_sources/source/hezar.data.datasets.dataset.rst.txt new file mode 100644 index 00000000..6ebde446 --- /dev/null +++ b/_sources/source/hezar.data.datasets.dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.dataset module +================================== + +.. automodule:: hezar.data.datasets.dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.image_captioning_dataset.rst.txt b/_sources/source/hezar.data.datasets.image_captioning_dataset.rst.txt new file mode 100644 index 00000000..96b25524 --- /dev/null +++ b/_sources/source/hezar.data.datasets.image_captioning_dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.image\_captioning\_dataset module +===================================================== + +.. automodule:: hezar.data.datasets.image_captioning_dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.ocr_dataset.rst.txt b/_sources/source/hezar.data.datasets.ocr_dataset.rst.txt new file mode 100644 index 00000000..3af0f3ad --- /dev/null +++ b/_sources/source/hezar.data.datasets.ocr_dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.ocr\_dataset module +======================================= + +.. automodule:: hezar.data.datasets.ocr_dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.rst.txt b/_sources/source/hezar.data.datasets.rst.txt new file mode 100644 index 00000000..1e9678f0 --- /dev/null +++ b/_sources/source/hezar.data.datasets.rst.txt @@ -0,0 +1,23 @@ +hezar.data.datasets package +=========================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.data.datasets.dataset + hezar.data.datasets.image_captioning_dataset + hezar.data.datasets.ocr_dataset + hezar.data.datasets.sequence_labeling_dataset + hezar.data.datasets.text_classification_dataset + hezar.data.datasets.text_summarization_dataset + +Module contents +--------------- + +.. automodule:: hezar.data.datasets + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.sequence_labeling_dataset.rst.txt b/_sources/source/hezar.data.datasets.sequence_labeling_dataset.rst.txt new file mode 100644 index 00000000..ecedebcc --- /dev/null +++ b/_sources/source/hezar.data.datasets.sequence_labeling_dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.sequence\_labeling\_dataset module +====================================================== + +.. automodule:: hezar.data.datasets.sequence_labeling_dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.text_classification_dataset.rst.txt b/_sources/source/hezar.data.datasets.text_classification_dataset.rst.txt new file mode 100644 index 00000000..adcc0b32 --- /dev/null +++ b/_sources/source/hezar.data.datasets.text_classification_dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.text\_classification\_dataset module +======================================================== + +.. automodule:: hezar.data.datasets.text_classification_dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.datasets.text_summarization_dataset.rst.txt b/_sources/source/hezar.data.datasets.text_summarization_dataset.rst.txt new file mode 100644 index 00000000..36565686 --- /dev/null +++ b/_sources/source/hezar.data.datasets.text_summarization_dataset.rst.txt @@ -0,0 +1,7 @@ +hezar.data.datasets.text\_summarization\_dataset module +======================================================= + +.. automodule:: hezar.data.datasets.text_summarization_dataset + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.data.rst.txt b/_sources/source/hezar.data.rst.txt new file mode 100644 index 00000000..97106494 --- /dev/null +++ b/_sources/source/hezar.data.rst.txt @@ -0,0 +1,26 @@ +hezar.data package +================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.data.datasets + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.data.data_collators + +Module contents +--------------- + +.. automodule:: hezar.data + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.embeddings.embedding.rst.txt b/_sources/source/hezar.embeddings.embedding.rst.txt new file mode 100644 index 00000000..4a546f29 --- /dev/null +++ b/_sources/source/hezar.embeddings.embedding.rst.txt @@ -0,0 +1,7 @@ +hezar.embeddings.embedding module +================================= + +.. automodule:: hezar.embeddings.embedding + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.embeddings.fasttext.rst.txt b/_sources/source/hezar.embeddings.fasttext.rst.txt new file mode 100644 index 00000000..75658d64 --- /dev/null +++ b/_sources/source/hezar.embeddings.fasttext.rst.txt @@ -0,0 +1,7 @@ +hezar.embeddings.fasttext module +================================ + +.. automodule:: hezar.embeddings.fasttext + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.embeddings.rst.txt b/_sources/source/hezar.embeddings.rst.txt new file mode 100644 index 00000000..368acdb4 --- /dev/null +++ b/_sources/source/hezar.embeddings.rst.txt @@ -0,0 +1,20 @@ +hezar.embeddings package +======================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.embeddings.embedding + hezar.embeddings.fasttext + hezar.embeddings.word2vec + +Module contents +--------------- + +.. automodule:: hezar.embeddings + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.embeddings.word2vec.rst.txt b/_sources/source/hezar.embeddings.word2vec.rst.txt new file mode 100644 index 00000000..822452bb --- /dev/null +++ b/_sources/source/hezar.embeddings.word2vec.rst.txt @@ -0,0 +1,7 @@ +hezar.embeddings.word2vec module +================================ + +.. automodule:: hezar.embeddings.word2vec + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.accuracy.rst.txt b/_sources/source/hezar.metrics.accuracy.rst.txt new file mode 100644 index 00000000..5bef0716 --- /dev/null +++ b/_sources/source/hezar.metrics.accuracy.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.accuracy module +============================= + +.. automodule:: hezar.metrics.accuracy + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.bleu.rst.txt b/_sources/source/hezar.metrics.bleu.rst.txt new file mode 100644 index 00000000..6dabf497 --- /dev/null +++ b/_sources/source/hezar.metrics.bleu.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.bleu module +========================= + +.. automodule:: hezar.metrics.bleu + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.cer.rst.txt b/_sources/source/hezar.metrics.cer.rst.txt new file mode 100644 index 00000000..f1adc019 --- /dev/null +++ b/_sources/source/hezar.metrics.cer.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.cer module +======================== + +.. automodule:: hezar.metrics.cer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.f1.rst.txt b/_sources/source/hezar.metrics.f1.rst.txt new file mode 100644 index 00000000..25117c91 --- /dev/null +++ b/_sources/source/hezar.metrics.f1.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.f1 module +======================= + +.. automodule:: hezar.metrics.f1 + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.metric.rst.txt b/_sources/source/hezar.metrics.metric.rst.txt new file mode 100644 index 00000000..3f0a1818 --- /dev/null +++ b/_sources/source/hezar.metrics.metric.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.metric module +=========================== + +.. automodule:: hezar.metrics.metric + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.precision.rst.txt b/_sources/source/hezar.metrics.precision.rst.txt new file mode 100644 index 00000000..45919a9a --- /dev/null +++ b/_sources/source/hezar.metrics.precision.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.precision module +============================== + +.. automodule:: hezar.metrics.precision + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.recall.rst.txt b/_sources/source/hezar.metrics.recall.rst.txt new file mode 100644 index 00000000..daafce17 --- /dev/null +++ b/_sources/source/hezar.metrics.recall.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.recall module +=========================== + +.. automodule:: hezar.metrics.recall + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.rouge.rst.txt b/_sources/source/hezar.metrics.rouge.rst.txt new file mode 100644 index 00000000..debbe8c1 --- /dev/null +++ b/_sources/source/hezar.metrics.rouge.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.rouge module +========================== + +.. automodule:: hezar.metrics.rouge + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.rst.txt b/_sources/source/hezar.metrics.rst.txt new file mode 100644 index 00000000..25144d29 --- /dev/null +++ b/_sources/source/hezar.metrics.rst.txt @@ -0,0 +1,27 @@ +hezar.metrics package +===================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.metrics.accuracy + hezar.metrics.bleu + hezar.metrics.cer + hezar.metrics.f1 + hezar.metrics.metric + hezar.metrics.precision + hezar.metrics.recall + hezar.metrics.rouge + hezar.metrics.seqeval + hezar.metrics.wer + +Module contents +--------------- + +.. automodule:: hezar.metrics + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.seqeval.rst.txt b/_sources/source/hezar.metrics.seqeval.rst.txt new file mode 100644 index 00000000..8cfb10f3 --- /dev/null +++ b/_sources/source/hezar.metrics.seqeval.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.seqeval module +============================ + +.. automodule:: hezar.metrics.seqeval + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.metrics.wer.rst.txt b/_sources/source/hezar.metrics.wer.rst.txt new file mode 100644 index 00000000..208bae55 --- /dev/null +++ b/_sources/source/hezar.metrics.wer.rst.txt @@ -0,0 +1,7 @@ +hezar.metrics.wer module +======================== + +.. automodule:: hezar.metrics.wer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.bert.bert.rst.txt b/_sources/source/hezar.models.backbone.bert.bert.rst.txt new file mode 100644 index 00000000..9b519b02 --- /dev/null +++ b/_sources/source/hezar.models.backbone.bert.bert.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.bert.bert module +====================================== + +.. automodule:: hezar.models.backbone.bert.bert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.bert.bert_config.rst.txt b/_sources/source/hezar.models.backbone.bert.bert_config.rst.txt new file mode 100644 index 00000000..9ca103f8 --- /dev/null +++ b/_sources/source/hezar.models.backbone.bert.bert_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.bert.bert\_config module +============================================== + +.. automodule:: hezar.models.backbone.bert.bert_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.bert.rst.txt b/_sources/source/hezar.models.backbone.bert.rst.txt new file mode 100644 index 00000000..0b056778 --- /dev/null +++ b/_sources/source/hezar.models.backbone.bert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.backbone.bert package +================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone.bert.bert + hezar.models.backbone.bert.bert_config + +Module contents +--------------- + +.. automodule:: hezar.models.backbone.bert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.distilbert.distilbert.rst.txt b/_sources/source/hezar.models.backbone.distilbert.distilbert.rst.txt new file mode 100644 index 00000000..13320b45 --- /dev/null +++ b/_sources/source/hezar.models.backbone.distilbert.distilbert.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.distilbert.distilbert module +================================================== + +.. automodule:: hezar.models.backbone.distilbert.distilbert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.distilbert.distilbert_config.rst.txt b/_sources/source/hezar.models.backbone.distilbert.distilbert_config.rst.txt new file mode 100644 index 00000000..38be2504 --- /dev/null +++ b/_sources/source/hezar.models.backbone.distilbert.distilbert_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.distilbert.distilbert\_config module +========================================================== + +.. automodule:: hezar.models.backbone.distilbert.distilbert_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.distilbert.rst.txt b/_sources/source/hezar.models.backbone.distilbert.rst.txt new file mode 100644 index 00000000..5a14af8a --- /dev/null +++ b/_sources/source/hezar.models.backbone.distilbert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.backbone.distilbert package +======================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone.distilbert.distilbert + hezar.models.backbone.distilbert.distilbert_config + +Module contents +--------------- + +.. automodule:: hezar.models.backbone.distilbert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.roberta.roberta.rst.txt b/_sources/source/hezar.models.backbone.roberta.roberta.rst.txt new file mode 100644 index 00000000..bbc5d68d --- /dev/null +++ b/_sources/source/hezar.models.backbone.roberta.roberta.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.roberta.roberta module +============================================ + +.. automodule:: hezar.models.backbone.roberta.roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.roberta.roberta_config.rst.txt b/_sources/source/hezar.models.backbone.roberta.roberta_config.rst.txt new file mode 100644 index 00000000..c00461ee --- /dev/null +++ b/_sources/source/hezar.models.backbone.roberta.roberta_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.roberta.roberta\_config module +==================================================== + +.. automodule:: hezar.models.backbone.roberta.roberta_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.roberta.rst.txt b/_sources/source/hezar.models.backbone.roberta.rst.txt new file mode 100644 index 00000000..02839ecd --- /dev/null +++ b/_sources/source/hezar.models.backbone.roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.backbone.roberta package +===================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone.roberta.roberta + hezar.models.backbone.roberta.roberta_config + +Module contents +--------------- + +.. automodule:: hezar.models.backbone.roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.rst.txt b/_sources/source/hezar.models.backbone.rst.txt new file mode 100644 index 00000000..28549a49 --- /dev/null +++ b/_sources/source/hezar.models.backbone.rst.txt @@ -0,0 +1,21 @@ +hezar.models.backbone package +============================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone.bert + hezar.models.backbone.distilbert + hezar.models.backbone.roberta + hezar.models.backbone.vit + +Module contents +--------------- + +.. automodule:: hezar.models.backbone + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.vit.rst.txt b/_sources/source/hezar.models.backbone.vit.rst.txt new file mode 100644 index 00000000..dc8dadee --- /dev/null +++ b/_sources/source/hezar.models.backbone.vit.rst.txt @@ -0,0 +1,19 @@ +hezar.models.backbone.vit package +================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone.vit.vit + hezar.models.backbone.vit.vit_config + +Module contents +--------------- + +.. automodule:: hezar.models.backbone.vit + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.vit.vit.rst.txt b/_sources/source/hezar.models.backbone.vit.vit.rst.txt new file mode 100644 index 00000000..42bebcca --- /dev/null +++ b/_sources/source/hezar.models.backbone.vit.vit.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.vit.vit module +==================================== + +.. automodule:: hezar.models.backbone.vit.vit + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.backbone.vit.vit_config.rst.txt b/_sources/source/hezar.models.backbone.vit.vit_config.rst.txt new file mode 100644 index 00000000..b5b6087b --- /dev/null +++ b/_sources/source/hezar.models.backbone.vit.vit_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.backbone.vit.vit\_config module +============================================ + +.. automodule:: hezar.models.backbone.vit.vit_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.rst.txt b/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.rst.txt new file mode 100644 index 00000000..effb428e --- /dev/null +++ b/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.beit\_roberta.beit\_roberta\_image2text module +====================================================================== + +.. automodule:: hezar.models.image2text.beit_roberta.beit_roberta_image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.rst.txt b/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.rst.txt new file mode 100644 index 00000000..2e097b82 --- /dev/null +++ b/_sources/source/hezar.models.image2text.beit_roberta.beit_roberta_image2text_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.beit\_roberta.beit\_roberta\_image2text\_config module +============================================================================== + +.. automodule:: hezar.models.image2text.beit_roberta.beit_roberta_image2text_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.beit_roberta.rst.txt b/_sources/source/hezar.models.image2text.beit_roberta.rst.txt new file mode 100644 index 00000000..4ddcbcf0 --- /dev/null +++ b/_sources/source/hezar.models.image2text.beit_roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.image2text.beit\_roberta package +============================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.beit_roberta.beit_roberta_image2text + hezar.models.image2text.beit_roberta.beit_roberta_image2text_config + +Module contents +--------------- + +.. automodule:: hezar.models.image2text.beit_roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.crnn.crnn_decode_utils.rst.txt b/_sources/source/hezar.models.image2text.crnn.crnn_decode_utils.rst.txt new file mode 100644 index 00000000..59c44cc2 --- /dev/null +++ b/_sources/source/hezar.models.image2text.crnn.crnn_decode_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.crnn.crnn\_decode\_utils module +======================================================= + +.. automodule:: hezar.models.image2text.crnn.crnn_decode_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.crnn.crnn_image2text.rst.txt b/_sources/source/hezar.models.image2text.crnn.crnn_image2text.rst.txt new file mode 100644 index 00000000..537a0ce6 --- /dev/null +++ b/_sources/source/hezar.models.image2text.crnn.crnn_image2text.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.crnn.crnn\_image2text module +==================================================== + +.. automodule:: hezar.models.image2text.crnn.crnn_image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.crnn.crnn_image2text_config.rst.txt b/_sources/source/hezar.models.image2text.crnn.crnn_image2text_config.rst.txt new file mode 100644 index 00000000..709ae605 --- /dev/null +++ b/_sources/source/hezar.models.image2text.crnn.crnn_image2text_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.crnn.crnn\_image2text\_config module +============================================================ + +.. automodule:: hezar.models.image2text.crnn.crnn_image2text_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.crnn.rst.txt b/_sources/source/hezar.models.image2text.crnn.rst.txt new file mode 100644 index 00000000..6ed55675 --- /dev/null +++ b/_sources/source/hezar.models.image2text.crnn.rst.txt @@ -0,0 +1,20 @@ +hezar.models.image2text.crnn package +==================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.crnn.crnn_decode_utils + hezar.models.image2text.crnn.crnn_image2text + hezar.models.image2text.crnn.crnn_image2text_config + +Module contents +--------------- + +.. automodule:: hezar.models.image2text.crnn + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.rst.txt b/_sources/source/hezar.models.image2text.rst.txt new file mode 100644 index 00000000..ad4af596 --- /dev/null +++ b/_sources/source/hezar.models.image2text.rst.txt @@ -0,0 +1,22 @@ +hezar.models.image2text package +=============================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.beit_roberta + hezar.models.image2text.crnn + hezar.models.image2text.trocr + hezar.models.image2text.vit_gpt2 + hezar.models.image2text.vit_roberta + +Module contents +--------------- + +.. automodule:: hezar.models.image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.trocr.rst.txt b/_sources/source/hezar.models.image2text.trocr.rst.txt new file mode 100644 index 00000000..2b05cfc7 --- /dev/null +++ b/_sources/source/hezar.models.image2text.trocr.rst.txt @@ -0,0 +1,19 @@ +hezar.models.image2text.trocr package +===================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.trocr.trocr_image2text + hezar.models.image2text.trocr.trocr_image2text_config + +Module contents +--------------- + +.. automodule:: hezar.models.image2text.trocr + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.trocr.trocr_image2text.rst.txt b/_sources/source/hezar.models.image2text.trocr.trocr_image2text.rst.txt new file mode 100644 index 00000000..490e8289 --- /dev/null +++ b/_sources/source/hezar.models.image2text.trocr.trocr_image2text.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.trocr.trocr\_image2text module +====================================================== + +.. automodule:: hezar.models.image2text.trocr.trocr_image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.trocr.trocr_image2text_config.rst.txt b/_sources/source/hezar.models.image2text.trocr.trocr_image2text_config.rst.txt new file mode 100644 index 00000000..f692f016 --- /dev/null +++ b/_sources/source/hezar.models.image2text.trocr.trocr_image2text_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.trocr.trocr\_image2text\_config module +============================================================== + +.. automodule:: hezar.models.image2text.trocr.trocr_image2text_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_gpt2.rst.txt b/_sources/source/hezar.models.image2text.vit_gpt2.rst.txt new file mode 100644 index 00000000..108f0f6f --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_gpt2.rst.txt @@ -0,0 +1,19 @@ +hezar.models.image2text.vit\_gpt2 package +========================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.vit_gpt2.vit_gpt2_image2text + hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config + +Module contents +--------------- + +.. automodule:: hezar.models.image2text.vit_gpt2 + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.rst.txt b/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.rst.txt new file mode 100644 index 00000000..2f95efc1 --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.vit\_gpt2.vit\_gpt2\_image2text module +============================================================== + +.. automodule:: hezar.models.image2text.vit_gpt2.vit_gpt2_image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.rst.txt b/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.rst.txt new file mode 100644 index 00000000..cdc13a59 --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.vit\_gpt2.vit\_gpt2\_image2text\_config module +====================================================================== + +.. automodule:: hezar.models.image2text.vit_gpt2.vit_gpt2_image2text_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_roberta.rst.txt b/_sources/source/hezar.models.image2text.vit_roberta.rst.txt new file mode 100644 index 00000000..cb71cb77 --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.image2text.vit\_roberta package +============================================ + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.image2text.vit_roberta.vit_roberta_image2text + hezar.models.image2text.vit_roberta.vit_roberta_image2text_config + +Module contents +--------------- + +.. automodule:: hezar.models.image2text.vit_roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.rst.txt b/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.rst.txt new file mode 100644 index 00000000..826c0579 --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.vit\_roberta.vit\_roberta\_image2text module +==================================================================== + +.. automodule:: hezar.models.image2text.vit_roberta.vit_roberta_image2text + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.rst.txt b/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.rst.txt new file mode 100644 index 00000000..39fef145 --- /dev/null +++ b/_sources/source/hezar.models.image2text.vit_roberta.vit_roberta_image2text_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.image2text.vit\_roberta.vit\_roberta\_image2text\_config module +============================================================================ + +.. automodule:: hezar.models.image2text.vit_roberta.vit_roberta_image2text_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling.rst.txt b/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling.rst.txt new file mode 100644 index 00000000..b9ad0d11 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.bert.bert\_mask\_filling module +========================================================== + +.. automodule:: hezar.models.mask_filling.bert.bert_mask_filling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling_config.rst.txt b/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling_config.rst.txt new file mode 100644 index 00000000..be3dfef2 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.bert.bert_mask_filling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.bert.bert\_mask\_filling\_config module +================================================================== + +.. automodule:: hezar.models.mask_filling.bert.bert_mask_filling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.bert.rst.txt b/_sources/source/hezar.models.mask_filling.bert.rst.txt new file mode 100644 index 00000000..345f0036 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.bert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.mask\_filling.bert package +======================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.mask_filling.bert.bert_mask_filling + hezar.models.mask_filling.bert.bert_mask_filling_config + +Module contents +--------------- + +.. automodule:: hezar.models.mask_filling.bert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.rst.txt b/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.rst.txt new file mode 100644 index 00000000..e11bc317 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.distilbert.distilbert\_mask\_filling module +====================================================================== + +.. automodule:: hezar.models.mask_filling.distilbert.distilbert_mask_filling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.rst.txt b/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.rst.txt new file mode 100644 index 00000000..c71af978 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.distilbert.distilbert_mask_filling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.distilbert.distilbert\_mask\_filling\_config module +============================================================================== + +.. automodule:: hezar.models.mask_filling.distilbert.distilbert_mask_filling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.distilbert.rst.txt b/_sources/source/hezar.models.mask_filling.distilbert.rst.txt new file mode 100644 index 00000000..15b0235a --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.distilbert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.mask\_filling.distilbert package +============================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.mask_filling.distilbert.distilbert_mask_filling + hezar.models.mask_filling.distilbert.distilbert_mask_filling_config + +Module contents +--------------- + +.. automodule:: hezar.models.mask_filling.distilbert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling.rst.txt b/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling.rst.txt new file mode 100644 index 00000000..324595d7 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.roberta.roberta\_mask\_filling module +================================================================ + +.. automodule:: hezar.models.mask_filling.roberta.roberta_mask_filling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.rst.txt b/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.rst.txt new file mode 100644 index 00000000..d0a9f1fe --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.roberta.roberta_mask_filling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.mask\_filling.roberta.roberta\_mask\_filling\_config module +======================================================================== + +.. automodule:: hezar.models.mask_filling.roberta.roberta_mask_filling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.roberta.rst.txt b/_sources/source/hezar.models.mask_filling.roberta.rst.txt new file mode 100644 index 00000000..f245751c --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.mask\_filling.roberta package +========================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.mask_filling.roberta.roberta_mask_filling + hezar.models.mask_filling.roberta.roberta_mask_filling_config + +Module contents +--------------- + +.. automodule:: hezar.models.mask_filling.roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.mask_filling.rst.txt b/_sources/source/hezar.models.mask_filling.rst.txt new file mode 100644 index 00000000..bf6c3106 --- /dev/null +++ b/_sources/source/hezar.models.mask_filling.rst.txt @@ -0,0 +1,20 @@ +hezar.models.mask\_filling package +================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.mask_filling.bert + hezar.models.mask_filling.distilbert + hezar.models.mask_filling.roberta + +Module contents +--------------- + +.. automodule:: hezar.models.mask_filling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.model.rst.txt b/_sources/source/hezar.models.model.rst.txt new file mode 100644 index 00000000..c5b23b97 --- /dev/null +++ b/_sources/source/hezar.models.model.rst.txt @@ -0,0 +1,7 @@ +hezar.models.model module +========================= + +.. automodule:: hezar.models.model + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.model_outputs.rst.txt b/_sources/source/hezar.models.model_outputs.rst.txt new file mode 100644 index 00000000..cc43a7b8 --- /dev/null +++ b/_sources/source/hezar.models.model_outputs.rst.txt @@ -0,0 +1,7 @@ +hezar.models.model\_outputs module +================================== + +.. automodule:: hezar.models.model_outputs + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.rst.txt b/_sources/source/hezar.models.rst.txt new file mode 100644 index 00000000..ac7553a0 --- /dev/null +++ b/_sources/source/hezar.models.rst.txt @@ -0,0 +1,34 @@ +hezar.models package +==================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.backbone + hezar.models.image2text + hezar.models.mask_filling + hezar.models.sequence_labeling + hezar.models.speech_recognition + hezar.models.text_classification + hezar.models.text_embedding + hezar.models.text_generation + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.model + hezar.models.model_outputs + +Module contents +--------------- + +.. automodule:: hezar.models + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.rst.txt b/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.rst.txt new file mode 100644 index 00000000..358123cb --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.bert.bert\_sequence\_labeling module +==================================================================== + +.. automodule:: hezar.models.sequence_labeling.bert.bert_sequence_labeling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.rst.txt b/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.rst.txt new file mode 100644 index 00000000..a0e74aa7 --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.bert.bert_sequence_labeling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.bert.bert\_sequence\_labeling\_config module +============================================================================ + +.. automodule:: hezar.models.sequence_labeling.bert.bert_sequence_labeling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.bert.rst.txt b/_sources/source/hezar.models.sequence_labeling.bert.rst.txt new file mode 100644 index 00000000..153264bf --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.bert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.sequence\_labeling.bert package +============================================ + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.sequence_labeling.bert.bert_sequence_labeling + hezar.models.sequence_labeling.bert.bert_sequence_labeling_config + +Module contents +--------------- + +.. automodule:: hezar.models.sequence_labeling.bert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.rst.txt b/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.rst.txt new file mode 100644 index 00000000..7024f982 --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.distilbert.distilbert\_sequence\_labeling module +================================================================================ + +.. automodule:: hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.rst.txt b/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.rst.txt new file mode 100644 index 00000000..250d149e --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.distilbert.distilbert\_sequence\_labeling\_config module +======================================================================================== + +.. automodule:: hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.distilbert.rst.txt b/_sources/source/hezar.models.sequence_labeling.distilbert.rst.txt new file mode 100644 index 00000000..2c9c5194 --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.distilbert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.sequence\_labeling.distilbert package +================================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling + hezar.models.sequence_labeling.distilbert.distilbert_sequence_labeling_config + +Module contents +--------------- + +.. automodule:: hezar.models.sequence_labeling.distilbert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.rst.txt b/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.rst.txt new file mode 100644 index 00000000..665a7e9c --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.roberta.roberta\_sequence\_labeling module +========================================================================== + +.. automodule:: hezar.models.sequence_labeling.roberta.roberta_sequence_labeling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.rst.txt b/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.rst.txt new file mode 100644 index 00000000..9bad047f --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.sequence\_labeling.roberta.roberta\_sequence\_labeling\_config module +================================================================================== + +.. automodule:: hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.roberta.rst.txt b/_sources/source/hezar.models.sequence_labeling.roberta.rst.txt new file mode 100644 index 00000000..183b9ac1 --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.sequence\_labeling.roberta package +=============================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.sequence_labeling.roberta.roberta_sequence_labeling + hezar.models.sequence_labeling.roberta.roberta_sequence_labeling_config + +Module contents +--------------- + +.. automodule:: hezar.models.sequence_labeling.roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.sequence_labeling.rst.txt b/_sources/source/hezar.models.sequence_labeling.rst.txt new file mode 100644 index 00000000..5149a573 --- /dev/null +++ b/_sources/source/hezar.models.sequence_labeling.rst.txt @@ -0,0 +1,20 @@ +hezar.models.sequence\_labeling package +======================================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.sequence_labeling.bert + hezar.models.sequence_labeling.distilbert + hezar.models.sequence_labeling.roberta + +Module contents +--------------- + +.. automodule:: hezar.models.sequence_labeling + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.rst.txt b/_sources/source/hezar.models.speech_recognition.rst.txt new file mode 100644 index 00000000..5133759d --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.rst.txt @@ -0,0 +1,18 @@ +hezar.models.speech\_recognition package +======================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.speech_recognition.whisper + +Module contents +--------------- + +.. automodule:: hezar.models.speech_recognition + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.whisper.rst.txt b/_sources/source/hezar.models.speech_recognition.whisper.rst.txt new file mode 100644 index 00000000..481aec9a --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.whisper.rst.txt @@ -0,0 +1,21 @@ +hezar.models.speech\_recognition.whisper package +================================================ + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.speech_recognition.whisper.whisper_feature_extractor + hezar.models.speech_recognition.whisper.whisper_speech_recognition + hezar.models.speech_recognition.whisper.whisper_speech_recognition_config + hezar.models.speech_recognition.whisper.whisper_tokenizer + +Module contents +--------------- + +.. automodule:: hezar.models.speech_recognition.whisper + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.rst.txt b/_sources/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.rst.txt new file mode 100644 index 00000000..29462dc4 --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.whisper.whisper_feature_extractor.rst.txt @@ -0,0 +1,7 @@ +hezar.models.speech\_recognition.whisper.whisper\_feature\_extractor module +=========================================================================== + +.. automodule:: hezar.models.speech_recognition.whisper.whisper_feature_extractor + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.rst.txt b/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.rst.txt new file mode 100644 index 00000000..7673fd03 --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition.rst.txt @@ -0,0 +1,7 @@ +hezar.models.speech\_recognition.whisper.whisper\_speech\_recognition module +============================================================================ + +.. automodule:: hezar.models.speech_recognition.whisper.whisper_speech_recognition + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.rst.txt b/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.rst.txt new file mode 100644 index 00000000..063d4729 --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.speech\_recognition.whisper.whisper\_speech\_recognition\_config module +==================================================================================== + +.. automodule:: hezar.models.speech_recognition.whisper.whisper_speech_recognition_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.rst.txt b/_sources/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.rst.txt new file mode 100644 index 00000000..88f92c65 --- /dev/null +++ b/_sources/source/hezar.models.speech_recognition.whisper.whisper_tokenizer.rst.txt @@ -0,0 +1,7 @@ +hezar.models.speech\_recognition.whisper.whisper\_tokenizer module +================================================================== + +.. automodule:: hezar.models.speech_recognition.whisper.whisper_tokenizer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.bert.bert_text_classification.rst.txt b/_sources/source/hezar.models.text_classification.bert.bert_text_classification.rst.txt new file mode 100644 index 00000000..4da38eb5 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.bert.bert_text_classification.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.bert.bert\_text\_classification module +======================================================================== + +.. automodule:: hezar.models.text_classification.bert.bert_text_classification + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.bert.bert_text_classification_config.rst.txt b/_sources/source/hezar.models.text_classification.bert.bert_text_classification_config.rst.txt new file mode 100644 index 00000000..32cadb21 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.bert.bert_text_classification_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.bert.bert\_text\_classification\_config module +================================================================================ + +.. automodule:: hezar.models.text_classification.bert.bert_text_classification_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.bert.rst.txt b/_sources/source/hezar.models.text_classification.bert.rst.txt new file mode 100644 index 00000000..bcffc558 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.bert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_classification.bert package +============================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_classification.bert.bert_text_classification + hezar.models.text_classification.bert.bert_text_classification_config + +Module contents +--------------- + +.. automodule:: hezar.models.text_classification.bert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification.rst.txt b/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification.rst.txt new file mode 100644 index 00000000..60be019a --- /dev/null +++ b/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.distilbert.distilbert\_text\_classification module +==================================================================================== + +.. automodule:: hezar.models.text_classification.distilbert.distilbert_text_classification + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.rst.txt b/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.rst.txt new file mode 100644 index 00000000..5b532939 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.distilbert.distilbert_text_classification_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.distilbert.distilbert\_text\_classification\_config module +============================================================================================ + +.. automodule:: hezar.models.text_classification.distilbert.distilbert_text_classification_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.distilbert.rst.txt b/_sources/source/hezar.models.text_classification.distilbert.rst.txt new file mode 100644 index 00000000..d33274fb --- /dev/null +++ b/_sources/source/hezar.models.text_classification.distilbert.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_classification.distilbert package +==================================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_classification.distilbert.distilbert_text_classification + hezar.models.text_classification.distilbert.distilbert_text_classification_config + +Module contents +--------------- + +.. automodule:: hezar.models.text_classification.distilbert + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification.rst.txt b/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification.rst.txt new file mode 100644 index 00000000..80e28d89 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.roberta.roberta\_text\_classification module +============================================================================== + +.. automodule:: hezar.models.text_classification.roberta.roberta_text_classification + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification_config.rst.txt b/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification_config.rst.txt new file mode 100644 index 00000000..8541010d --- /dev/null +++ b/_sources/source/hezar.models.text_classification.roberta.roberta_text_classification_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_classification.roberta.roberta\_text\_classification\_config module +====================================================================================== + +.. automodule:: hezar.models.text_classification.roberta.roberta_text_classification_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.roberta.rst.txt b/_sources/source/hezar.models.text_classification.roberta.rst.txt new file mode 100644 index 00000000..d05f0dd7 --- /dev/null +++ b/_sources/source/hezar.models.text_classification.roberta.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_classification.roberta package +================================================= + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_classification.roberta.roberta_text_classification + hezar.models.text_classification.roberta.roberta_text_classification_config + +Module contents +--------------- + +.. automodule:: hezar.models.text_classification.roberta + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_classification.rst.txt b/_sources/source/hezar.models.text_classification.rst.txt new file mode 100644 index 00000000..0c4fc19c --- /dev/null +++ b/_sources/source/hezar.models.text_classification.rst.txt @@ -0,0 +1,20 @@ +hezar.models.text\_classification package +========================================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_classification.bert + hezar.models.text_classification.distilbert + hezar.models.text_classification.roberta + +Module contents +--------------- + +.. automodule:: hezar.models.text_classification + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_embedding.rst.txt b/_sources/source/hezar.models.text_embedding.rst.txt new file mode 100644 index 00000000..0ba6ba4c --- /dev/null +++ b/_sources/source/hezar.models.text_embedding.rst.txt @@ -0,0 +1,10 @@ +hezar.models.text\_embedding package +==================================== + +Module contents +--------------- + +.. automodule:: hezar.models.text_embedding + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation.rst.txt b/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation.rst.txt new file mode 100644 index 00000000..5dee9bfe --- /dev/null +++ b/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_generation.gpt2.gpt2\_text\_generation module +================================================================ + +.. automodule:: hezar.models.text_generation.gpt2.gpt2_text_generation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.rst.txt b/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.rst.txt new file mode 100644 index 00000000..f149635f --- /dev/null +++ b/_sources/source/hezar.models.text_generation.gpt2.gpt2_text_generation_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_generation.gpt2.gpt2\_text\_generation\_config module +======================================================================== + +.. automodule:: hezar.models.text_generation.gpt2.gpt2_text_generation_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.gpt2.rst.txt b/_sources/source/hezar.models.text_generation.gpt2.rst.txt new file mode 100644 index 00000000..35269fd0 --- /dev/null +++ b/_sources/source/hezar.models.text_generation.gpt2.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_generation.gpt2 package +========================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_generation.gpt2.gpt2_text_generation + hezar.models.text_generation.gpt2.gpt2_text_generation_config + +Module contents +--------------- + +.. automodule:: hezar.models.text_generation.gpt2 + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.rst.txt b/_sources/source/hezar.models.text_generation.rst.txt new file mode 100644 index 00000000..89a04a68 --- /dev/null +++ b/_sources/source/hezar.models.text_generation.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_generation package +===================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_generation.gpt2 + hezar.models.text_generation.t5 + +Module contents +--------------- + +.. automodule:: hezar.models.text_generation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.t5.rst.txt b/_sources/source/hezar.models.text_generation.t5.rst.txt new file mode 100644 index 00000000..f798f702 --- /dev/null +++ b/_sources/source/hezar.models.text_generation.t5.rst.txt @@ -0,0 +1,19 @@ +hezar.models.text\_generation.t5 package +======================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.models.text_generation.t5.t5_text_generation + hezar.models.text_generation.t5.t5_text_generation_config + +Module contents +--------------- + +.. automodule:: hezar.models.text_generation.t5 + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.t5.t5_text_generation.rst.txt b/_sources/source/hezar.models.text_generation.t5.t5_text_generation.rst.txt new file mode 100644 index 00000000..0567751f --- /dev/null +++ b/_sources/source/hezar.models.text_generation.t5.t5_text_generation.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_generation.t5.t5\_text\_generation module +============================================================ + +.. automodule:: hezar.models.text_generation.t5.t5_text_generation + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.models.text_generation.t5.t5_text_generation_config.rst.txt b/_sources/source/hezar.models.text_generation.t5.t5_text_generation_config.rst.txt new file mode 100644 index 00000000..e5a83769 --- /dev/null +++ b/_sources/source/hezar.models.text_generation.t5.t5_text_generation_config.rst.txt @@ -0,0 +1,7 @@ +hezar.models.text\_generation.t5.t5\_text\_generation\_config module +==================================================================== + +.. automodule:: hezar.models.text_generation.t5.t5_text_generation_config + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.audio_feature_extractor.rst.txt b/_sources/source/hezar.preprocessors.audio_feature_extractor.rst.txt new file mode 100644 index 00000000..8c4dbccc --- /dev/null +++ b/_sources/source/hezar.preprocessors.audio_feature_extractor.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.audio\_feature\_extractor module +==================================================== + +.. automodule:: hezar.preprocessors.audio_feature_extractor + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.image_processor.rst.txt b/_sources/source/hezar.preprocessors.image_processor.rst.txt new file mode 100644 index 00000000..705e7f9a --- /dev/null +++ b/_sources/source/hezar.preprocessors.image_processor.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.image\_processor module +=========================================== + +.. automodule:: hezar.preprocessors.image_processor + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.preprocessor.rst.txt b/_sources/source/hezar.preprocessors.preprocessor.rst.txt new file mode 100644 index 00000000..e9db0d27 --- /dev/null +++ b/_sources/source/hezar.preprocessors.preprocessor.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.preprocessor module +======================================= + +.. automodule:: hezar.preprocessors.preprocessor + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.rst.txt b/_sources/source/hezar.preprocessors.rst.txt new file mode 100644 index 00000000..e2db5f82 --- /dev/null +++ b/_sources/source/hezar.preprocessors.rst.txt @@ -0,0 +1,29 @@ +hezar.preprocessors package +=========================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.preprocessors.tokenizers + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.preprocessors.audio_feature_extractor + hezar.preprocessors.image_processor + hezar.preprocessors.preprocessor + hezar.preprocessors.text_normalizer + +Module contents +--------------- + +.. automodule:: hezar.preprocessors + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.text_normalizer.rst.txt b/_sources/source/hezar.preprocessors.text_normalizer.rst.txt new file mode 100644 index 00000000..5f8f2801 --- /dev/null +++ b/_sources/source/hezar.preprocessors.text_normalizer.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.text\_normalizer module +=========================================== + +.. automodule:: hezar.preprocessors.text_normalizer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.bpe.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.bpe.rst.txt new file mode 100644 index 00000000..8f8353f0 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.bpe.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.tokenizers.bpe module +========================================= + +.. automodule:: hezar.preprocessors.tokenizers.bpe + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.rst.txt new file mode 100644 index 00000000..8c32edb2 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.rst.txt @@ -0,0 +1,22 @@ +hezar.preprocessors.tokenizers package +====================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.preprocessors.tokenizers.bpe + hezar.preprocessors.tokenizers.sentencepiece_bpe + hezar.preprocessors.tokenizers.sentencepiece_unigram + hezar.preprocessors.tokenizers.tokenizer + hezar.preprocessors.tokenizers.wordpiece + +Module contents +--------------- + +.. automodule:: hezar.preprocessors.tokenizers + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.rst.txt new file mode 100644 index 00000000..547ae296 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_bpe.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.tokenizers.sentencepiece\_bpe module +======================================================== + +.. automodule:: hezar.preprocessors.tokenizers.sentencepiece_bpe + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.rst.txt new file mode 100644 index 00000000..20d06611 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.sentencepiece_unigram.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.tokenizers.sentencepiece\_unigram module +============================================================ + +.. automodule:: hezar.preprocessors.tokenizers.sentencepiece_unigram + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.tokenizer.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.tokenizer.rst.txt new file mode 100644 index 00000000..0d7bcdf8 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.tokenizer.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.tokenizers.tokenizer module +=============================================== + +.. automodule:: hezar.preprocessors.tokenizers.tokenizer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.preprocessors.tokenizers.wordpiece.rst.txt b/_sources/source/hezar.preprocessors.tokenizers.wordpiece.rst.txt new file mode 100644 index 00000000..09196220 --- /dev/null +++ b/_sources/source/hezar.preprocessors.tokenizers.wordpiece.rst.txt @@ -0,0 +1,7 @@ +hezar.preprocessors.tokenizers.wordpiece module +=============================================== + +.. automodule:: hezar.preprocessors.tokenizers.wordpiece + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.registry.rst.txt b/_sources/source/hezar.registry.rst.txt new file mode 100644 index 00000000..57ce1713 --- /dev/null +++ b/_sources/source/hezar.registry.rst.txt @@ -0,0 +1,7 @@ +hezar.registry module +===================== + +.. automodule:: hezar.registry + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.rst.txt b/_sources/source/hezar.rst.txt new file mode 100644 index 00000000..9b54c33d --- /dev/null +++ b/_sources/source/hezar.rst.txt @@ -0,0 +1,35 @@ +hezar package +============= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + hezar.data + hezar.embeddings + hezar.metrics + hezar.models + hezar.preprocessors + hezar.trainer + hezar.utils + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.builders + hezar.configs + hezar.constants + hezar.registry + +Module contents +--------------- + +.. automodule:: hezar + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.trainer.metrics_handlers.rst.txt b/_sources/source/hezar.trainer.metrics_handlers.rst.txt new file mode 100644 index 00000000..b9a5a835 --- /dev/null +++ b/_sources/source/hezar.trainer.metrics_handlers.rst.txt @@ -0,0 +1,7 @@ +hezar.trainer.metrics\_handlers module +====================================== + +.. automodule:: hezar.trainer.metrics_handlers + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.trainer.rst.txt b/_sources/source/hezar.trainer.rst.txt new file mode 100644 index 00000000..271eb1ff --- /dev/null +++ b/_sources/source/hezar.trainer.rst.txt @@ -0,0 +1,20 @@ +hezar.trainer package +===================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.trainer.metrics_handlers + hezar.trainer.trainer + hezar.trainer.trainer_utils + +Module contents +--------------- + +.. automodule:: hezar.trainer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.trainer.trainer.rst.txt b/_sources/source/hezar.trainer.trainer.rst.txt new file mode 100644 index 00000000..e5ae7f8f --- /dev/null +++ b/_sources/source/hezar.trainer.trainer.rst.txt @@ -0,0 +1,7 @@ +hezar.trainer.trainer module +============================ + +.. automodule:: hezar.trainer.trainer + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.trainer.trainer_utils.rst.txt b/_sources/source/hezar.trainer.trainer_utils.rst.txt new file mode 100644 index 00000000..90b1c358 --- /dev/null +++ b/_sources/source/hezar.trainer.trainer_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.trainer.trainer\_utils module +=================================== + +.. automodule:: hezar.trainer.trainer_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.audio_utils.rst.txt b/_sources/source/hezar.utils.audio_utils.rst.txt new file mode 100644 index 00000000..a2a9d931 --- /dev/null +++ b/_sources/source/hezar.utils.audio_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.audio\_utils module +=============================== + +.. automodule:: hezar.utils.audio_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.common_utils.rst.txt b/_sources/source/hezar.utils.common_utils.rst.txt new file mode 100644 index 00000000..ed9244c4 --- /dev/null +++ b/_sources/source/hezar.utils.common_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.common\_utils module +================================ + +.. automodule:: hezar.utils.common_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.data_utils.rst.txt b/_sources/source/hezar.utils.data_utils.rst.txt new file mode 100644 index 00000000..9f1e3bb5 --- /dev/null +++ b/_sources/source/hezar.utils.data_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.data\_utils module +============================== + +.. automodule:: hezar.utils.data_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.file_utils.rst.txt b/_sources/source/hezar.utils.file_utils.rst.txt new file mode 100644 index 00000000..a68e4d9b --- /dev/null +++ b/_sources/source/hezar.utils.file_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.file\_utils module +============================== + +.. automodule:: hezar.utils.file_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.hub_utils.rst.txt b/_sources/source/hezar.utils.hub_utils.rst.txt new file mode 100644 index 00000000..77284a45 --- /dev/null +++ b/_sources/source/hezar.utils.hub_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.hub\_utils module +============================= + +.. automodule:: hezar.utils.hub_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.image_utils.rst.txt b/_sources/source/hezar.utils.image_utils.rst.txt new file mode 100644 index 00000000..3b199808 --- /dev/null +++ b/_sources/source/hezar.utils.image_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.image\_utils module +=============================== + +.. automodule:: hezar.utils.image_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.integration_utils.rst.txt b/_sources/source/hezar.utils.integration_utils.rst.txt new file mode 100644 index 00000000..812aba78 --- /dev/null +++ b/_sources/source/hezar.utils.integration_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.integration\_utils module +===================================== + +.. automodule:: hezar.utils.integration_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.logging.rst.txt b/_sources/source/hezar.utils.logging.rst.txt new file mode 100644 index 00000000..eebc5780 --- /dev/null +++ b/_sources/source/hezar.utils.logging.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.logging module +========================== + +.. automodule:: hezar.utils.logging + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.registry_utils.rst.txt b/_sources/source/hezar.utils.registry_utils.rst.txt new file mode 100644 index 00000000..d4020418 --- /dev/null +++ b/_sources/source/hezar.utils.registry_utils.rst.txt @@ -0,0 +1,7 @@ +hezar.utils.registry\_utils module +================================== + +.. automodule:: hezar.utils.registry_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/hezar.utils.rst.txt b/_sources/source/hezar.utils.rst.txt new file mode 100644 index 00000000..0d48f115 --- /dev/null +++ b/_sources/source/hezar.utils.rst.txt @@ -0,0 +1,26 @@ +hezar.utils package +=================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + hezar.utils.audio_utils + hezar.utils.common_utils + hezar.utils.data_utils + hezar.utils.file_utils + hezar.utils.hub_utils + hezar.utils.image_utils + hezar.utils.integration_utils + hezar.utils.logging + hezar.utils.registry_utils + +Module contents +--------------- + +.. automodule:: hezar.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/_sources/source/index.md.txt b/_sources/source/index.md.txt new file mode 100644 index 00000000..b9ea22ca --- /dev/null +++ b/_sources/source/index.md.txt @@ -0,0 +1,9 @@ +# Reference API + +The docs here are mostly generated from the official source code on the main branch. + + +```{toctree} +:maxdepth: 2 +hezar +``` diff --git a/_sources/source/modules.rst.txt b/_sources/source/modules.rst.txt new file mode 100644 index 00000000..945e205a --- /dev/null +++ b/_sources/source/modules.rst.txt @@ -0,0 +1,7 @@ +hezar +===== + +.. toctree:: + :maxdepth: 4 + + hezar diff --git a/_sources/tutorial/datasets.md.txt b/_sources/tutorial/datasets.md.txt new file mode 100644 index 00000000..8ddf6a65 --- /dev/null +++ b/_sources/tutorial/datasets.md.txt @@ -0,0 +1,59 @@ +# Datasets +Hezar provides both dataset class implementations and ready-to-use data files for the community. + +## Hub Datasets +Hezar datasets are all hosted on the Hugging Face Hub and can be loaded just like any dataset on the Hub. + +### Load using Hugging Face datasets +```python +from datasets import load_dataset + +sentiment_dataset = load_dataset("hezarai/sentiment-dksf") +lscp_dataset = load_dataset("hezarai/lscp-pos-500k") +xlsum_dataset = load_dataset("hezarai/xlsum-fa") +... +``` + +### Load using Hezar Dataset +```python +from hezar.data import Dataset + +sentiment_dataset = Dataset.load("hezarai/sentiment-dksf") # A TextClassificationDataset instance +lscp_dataset = Dataset.load("hezarai/lscp-pos-500k") # A SequenceLabelingDataset instance +xlsum_dataset = Dataset.load("hezarai/xlsum-fa") # A TextSummarizationDataset instance +... +``` + +The difference between using Hezar vs Hugging Face datasets is the output class. In Hezar when you load +a dataset using the `Dataset` class, it automatically finds the proper class for that dataset and creates a +PyTorch `Dataset` instance so that it can be easily passed to a PyTorch `DataLoader` class. +```python +from torch.utils.data import DataLoader + +from hezar.data.datasets import Dataset + +dataset = Dataset.load( + "hezarai/lscp-pos-500k", + tokenizer_path="hezarai/distilbert-base-fa", # tokenizer_path is necessary for data collator +) + +loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=dataset.data_collator) +itr = iter(loader) +print(next(itr)) +``` +But when loading using Hugging Face datasets, the output is an HF Dataset instance. + +So in a nutshell, any Hezar dataset can be loaded using HF datasets but not vise-versa! +(Because Hezar looks out for a `dataset_config.yaml` file in any dataset repo so non-Hezar datasets cannot be +loaded using Hezar `Dataset` class.) + +## Dataset classes +Hezar categorizes datasets based on their target task. The dataset classes all inherit from the base `Dataset` class +which is a PyTorch Dataset subclass. (hence having `__getitem__` and `__len__` methods.) + +Some examples of the dataset classes are `TextClassificationDataset`, `TextSummarizationDataset`, `SequenceLabelingDataset`, etc. + +## Dataset Templates +We try to have a simple yet practical pattern for all datasets on the Hub. Every dataset on the Hub needs to have +a dataset loading script. Some ready to use templates are located in the [templates/dataset_scripts](https://github.com/hezarai/hezar/tree/main/templates/dataset_scripts) folder. +To add a new Hezar compatible dataset to the Hub you can follow the guide provided there. diff --git a/_sources/tutorial/embeddings.md.txt b/_sources/tutorial/embeddings.md.txt new file mode 100644 index 00000000..02007c37 --- /dev/null +++ b/_sources/tutorial/embeddings.md.txt @@ -0,0 +1,128 @@ +# Embeddings +In Hezar, embeddings serve as fundamental components for various natural language processing tasks. The Embedding class +provides a flexible and extensible foundation for working with word embeddings. Currently Hezar has two embedding models +backed by Gensim. This tutorial will guide you through the essential aspects of using and customizing embeddings in Hezar. + +## Load an Embedding from Hub +Loading an embedding from a pretrained embedding on the Hub or locally, is as straightforward as other modules in Hezar. +You can choose your desired model from our Hub and load it like below: +```python +from hezar.embeddings import Embedding + +word2vec = Embedding.load("hezarai/word2vec-cbow-fa-wikipedia") +``` +Now let's just run a simple similarity test between two given words: +```python +word2vec.similarity("هزار", "میلیون") +``` +``` +0.7400991 +``` + +## Embeddings methods + +### Similarity +For getting the similarity score between two words, use the following: +```python +similarity_score = word2vec.similarity("سلام", "درود") +print(similarity_score) +``` +``` +0.6196184 +``` +### Get Top-n Similar Words +Find top-n most similar words to a given word like: +```python +from pprint import pprint + +most_similar = word2vec.most_similar("هزار", topn=5) +pprint(most_similar) +``` +``` +[{'score': '0.7407', 'word': 'دویست'}, + {'score': '0.7401', 'word': 'میلیون'}, + {'score': '0.7326', 'word': 'صد'}, + {'score': '0.7277', 'word': 'پانصد'}, + {'score': '0.7011', 'word': 'سیصد'}] +``` + +### Least Similar in a List +To get the least similar word in a list or a word that does not match other words in a list, use the following: +```python +least_similar = word2vec.doesnt_match(["خانه", "اتاق", "ماشین"]) +``` +``` +'ماشین' +``` + +### Get Word's Vector +Get the vector for a word by: +```python +vector = word2vec("سلام") +``` +You can also give the model a list of words to get vectors for each of them: +```python +vectors = word2vec(["هوش", "مصنوعی"]) +``` + +### Get the Vocabulary +Get the dictionary of the whole vocabulary in the embedding model: +```python +vocab = word2vec.vocab +``` +#### Vocabulary words and indexes +You can also get index of a word in the vocabulary or vise verse: +```python +index = word2vec.word2index("هوش") +word = word2vec.index2word(index) +print(word) +``` +``` +'هوش' +``` +### Converting to a PyTorch nn.Embedding +You can also get a PyTorch embedding layer from the embedding model: +```python +embedding_layer = word2vec.torch_embedding() +print(embedding_layer) +``` +``` +Embedding(240547, 200) +``` +## Training an Embedding Model +To train an embedding model, first choose and build your embedding. For this example, we'll train a Word2Vec model using +the CBOW algorithm with a vector dimension of 200. +```python +from hezar.embeddings import Word2Vec, Word2VecConfig + +model = Word2Vec( + Word2VecConfig( + vector_size=200, + window=5, + train_algorithm="cbow", + alpha=0.025, + min_count=1, + seed=1, + workers=4, + min_alpha=0.0001, + ) +) +``` +Now given a list of sentences as the dataset, run training process: +```python +with open("data.txt") as f: + sentences = f.readlines() + +sentences = [s.replace("\n", "") for s in sentences] + +word2vec.train(sentences, epochs=5) +``` +## Saving and Pushing to the Hub +Now you can save and push your model to the Hub: +```python +word2vec.save("word2vec-cbow-200") + +word2vec.push_to_hub("/word2vec-cbow-200-fa") +``` + + diff --git a/_sources/tutorial/index.md.txt b/_sources/tutorial/index.md.txt new file mode 100644 index 00000000..9078583d --- /dev/null +++ b/_sources/tutorial/index.md.txt @@ -0,0 +1,10 @@ +# Tutorial +```{toctree} +:maxdepth: 1 + +models.md +datasets.md +embeddings.md +preprocessors.md +training.md +``` diff --git a/_sources/tutorial/models.md.txt b/_sources/tutorial/models.md.txt new file mode 100644 index 00000000..04834308 --- /dev/null +++ b/_sources/tutorial/models.md.txt @@ -0,0 +1,138 @@ +# Models +In Hezar, models are the typical PyTorch modules with some extra features for loading, saving, exporting, etc. +Let's dive into some of the most important ones! + +## Models Basics +### Building Models +Like any other package, you can import any model from `hezar.models` that you want. + +```python +from hezar.models import BertMaskFilling, BertMaskFillingConfig + +bert = BertMaskFilling(BertMaskFillingConfig()) +``` +You can also configure the architecture by changing the properties in a model's config like so: +```python +config = BertMaskFillingConfig(num_hidden_layers=8, num_attention_heads=8) +bert = BertMaskFilling(config) +``` + +Every model in Hezar, can be pushed to or downloaded from the Hub. + +### Loading pre-trained models +Loading a model from Hub is as easy as: +```python +from hezar.models import Model + +bert = Model.load("hezarai/bert-base-fa") +``` +The `load` methods takes the following steps to build the model: + +1. Load the config file `model_config.yaml` and figure out the model's class using the `name` config parameter. (`bert_mask_filling` in this snippet) +2. Build the model with random weights from the corresponding class. (`BertMaskFilling` in this snippet) +3. Download the weights file (`model.pt`) and load the state dict into to the model. +4. If the path contains any preprocessor, the preprocessor (`WordPieceTokenizer` in this snippet) will be loaded too. +(You can disable loading preprocessors by setting `Model.load(path, load_preprocessor=False)`) + +### Inference & Prediction +Now that you have loaded a model along with its preprocessors, feature extractors, etc. you can perform an end-to-end +inference in a single line of code using `Model.predict` method. + +A sequence labeling example would be like this: +```python +from hezar.models import Model + +pos_model = Model.load("hezarai/bert-fa-pos-lscp-500k") # Part-of-speech +inputs = ["شرکت هوش مصنوعی هزار"] +pos_outputs = pos_model.predict(inputs) +print(f"POS: {pos_outputs}") +``` +``` +POS: [[{'token': 'شرکت', 'tag': 'Ne'}, {'token': 'هوش', 'tag': 'Ne'}, {'token': 'مصنوعی', 'tag': 'AJe'}, {'token': 'هزار', 'tag': 'NUM'}]] +``` + +### Saving Models +You can save any model along with its config and preprocessor and other files on disk like: + +```python +from hezar.models import RobertaMaskFilling, RobertaMaskFillingConfig + +roberta = RobertaMaskFilling(RobertaMaskFillingConfig(vocab_size=60000)) +roberta.save("my-roberta") +``` + +### Pushing to the Hub +Every model can be pushed to the Hub. +```python +from hezar.models import RobertaTextClassification, RobertaTextClassificationConfig + +roberta = RobertaTextClassification(RobertaTextClassificationConfig(num_labels=2)) +roberta.push_to_hub("arxyzan/roberta-sentiment") +``` +``` +INFO: Uploaded:`RobertaTextClassificationConfig(name=roberta_text_classification)` --> `arxyzan/roberta-sentiment/model_config.yaml` +INFO: Uploaded: `RobertaTextClassification(name=roberta_text_classification)` --> `arxyzan/roberta-sentiment/model.pt` +``` +## Custom Models +Every Hezar model is a subclass of the base model class `Model` and the `Model` itself is a subclass of PyTorch `nn.Module` +with some extra features. So if you're familiar with PyTorch, this should feel like home! + +### A Sample Perceptron +```python +from dataclasses import dataclass + +from torch import Tensor, nn + +from hezar.models import Model, ModelConfig +from hezar.registry import register_model + + +@dataclass +class PerceptronConfig(ModelConfig): + name = "perceptron" + input_shape: int = 4 + output_shape: int = 2 + + +@register_model("perceptron", config_class=PerceptronConfig) +class Perceptron(Model): + """ + A simple single layer network + """ + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + self.nn = nn.Linear( + in_features=self.config.input_shape, + out_features=self.config.output_shape, + ) + + def forward(self, inputs: list, **kwargs): + inputs = Tensor(inputs).reshape(1, -1) + x = self.nn(inputs) + return x + + def post_process(self, model_outputs, **kwargs): + return model_outputs.numpy() + +``` +The only point here is that you have to pass a `ModelConfig` to your model and read everything from the config and the +rest is just typical PyTorch stuff. + +Now you have access to all the features of a Hezar model. +```python +model = Perceptron(PerceptronConfig()) +inputs = [1, 2, 3, 4] +outputs = model.predict(inputs) +print(outputs) +model.save("my-perceptron") +model.push_to_hub("hezarai/perceptron") +``` +``` +[[-1.0953112 -1.9854667]] +INFO: Uploaded:`PerceptronConfig(name=perceptron)` --> `hezarai/perceptron/model_config.yaml` +INFO: Uploaded: `Perceptron(name=perceptron)` --> `hezarai/perceptron/model.pt` +``` + + +To learn more about the internals of the models in Hezar take a look at [the models in-depth guide](../guide/models_advanced.md) diff --git a/_sources/tutorial/preprocessors.md.txt b/_sources/tutorial/preprocessors.md.txt new file mode 100644 index 00000000..d992a946 --- /dev/null +++ b/_sources/tutorial/preprocessors.md.txt @@ -0,0 +1,82 @@ +# Preprocessors +A really important group of modules in Hezar is the preprocessors. Preprocessors are responsible for every single +processing of inputs from their rawest form to the point that they're ready to be fed to the model. + +Preprocessors include all the tokenizers, feature extractors, normalizers, etc. and all of them are considered as a +preprocessor type. + +## Loading preprocessors +Following the common pattern among all modules in Hezar, preprocessors also can be loaded in the same way. + +**Loading with the corresponding module**
+You can load any preprocessor of any type with its base class like `Tokenizer`, `AudioFeatureExtractor`, etc. +```python +from hezar.preprocessors import Tokenizer, AudioFeatureExtractor, TextNormalizer + +tokenizer = Tokenizer.load("hezarai/bert-base-fa") +normalizer = TextNormalizer.load("hezarai/roberta-base-fa") +feature_extractor = AudioFeatureExtractor.load("hezarai/whisper-small-fa") +... +``` +**Loading with the Preprocessor module**
+Some models might need multiple types of preprocessors. For example encoder-decoder multimodal models like image captioning models +or even audio models need both feature extractor and text tokenizer or even a text normalizer. In order to load all +preprocessors in a path, you can use the `Preprocessor.load`. The output of this method depends on whether the path +contains single or multiple preprocessors. +- If path contains only one preprocessor the output is a preprocessor object of the right type. +- If path contains multiple preprocessors, the output is a `PreprocessorContainer` which is a dict-like object that holds +each preprocessor by its registry name. +```python +from hezar.preprocessors import Tokenizer + +tokenizer = Tokenizer.load("hezarai/bert-base-fa") +print(tokenizer) +``` +``` + +``` + +```python +from hezar.preprocessors import Preprocessor + +whisper_preprocessors = Preprocessor.load("hezarai/whisper-small-fa") +print(whisper_preprocessors) +``` +``` +PreprocessorsContainer( + [ + ('whisper_feature_extractor', + < hezar.preprocessors.feature_extractors.audio.whisper_feature_extractor.WhisperFeatureExtractor at 0x7f6316fdcbb0 >), + ('whisper_bpe_tokenizer', + < hezar.preprocessors.tokenizers.whisper_bpe.WhisperBPETokenizer at 0x7f643cb13f40 >) + ] +) +``` + +## Saving & Pushing to the Hub +Although preprocessor have their own type, they all implement the `load`, `save` and `push_to_hub` methods. +```python +from hezar.preprocessors import TextNormalizer, TextNormalizerConfig + +normalizer = TextNormalizer(TextNormalizerConfig(nfkc=False)) +normalizer.save("my-normalizer") +normalizer.push_to_hub("arxyzan/my-normalizer") +``` +### Folder structure of the preprocessors +All preprocessors are saved under the `preprocessor` subfolder by default. Changing this behaviour is possible from all +three methods: +- `load(..., subfolder="SUBFOLDER")` +- `save(..., subfolder="SUBFOLDER")` +- `push_to_hub(..., subfolder="SUBFOLDER")` + +The folder structure of the preprocessors for any save model (locally or in a repo) is something like below: +``` +hezarai/whisper-small-fa +├── model_config.yaml +├── model.pt +└── preprocessor + ├── feature_extractor_config.yaml + ├── tokenizer_config.yaml + └── tokenizer.json + +``` diff --git a/_sources/tutorial/training.md.txt b/_sources/tutorial/training.md.txt new file mode 100644 index 00000000..e737352f --- /dev/null +++ b/_sources/tutorial/training.md.txt @@ -0,0 +1,112 @@ +# Training & Fine-tuning + +Training a model in Hezar is pretty much like any other library or even simpler! As mentioned before, any model in Hezar +is also a PyTorch module. So training a model is actually training a PyTorch model with some more cool features! +Let's dive in. + +## Setup +In this example we're going to train a sentiment analysis model based on DistilBERT on a dataset containing +text and sentiment pairs collected from SnappFood/Digikala user comments. +### Import everything needed +First things first, let's import the required stuff. + +```python +from hezar.models import DistilBertTextClassification, DistilBertTextClassificationConfig +from hezar.data import Dataset +from hezar.trainer import Trainer, TrainerConfig +from hezar.preprocessors import Preprocessor +``` +### Define paths +Let's define our paths to the datasets, tokenizer, etc. +```python +DATASET_PATH = "hezarai/sentiment-dksf" # dataset path on the Hub +BASE_MODEL_PATH = "hezarai/distilbert-base-fa" # used as model backbone weights and tokenizer +``` +## Datasets +We can easily load our desired datasets from the Hub. +```python +train_dataset = Dataset.load(DATASET_PATH, split="train", tokenizer_path=BASE_MODEL_PATH) +eval_dataset = Dataset.load(DATASET_PATH, split="test", tokenizer_path=BASE_MODEL_PATH) +``` + +## Model +Let's build our model along with its tokenizer. +### Build the model +```python +model = DistilBertTextClassification(DistilBertTextClassificationConfig(id2label=train_dataset.config.id2label)) +``` +### Load the tokenizer +The tokenizer can be loaded from the base model path. +```python +tokenizer = Preprocessor.load(BASE_MODEL_PATH) +``` + +## Trainer +Hezar has a general Trainer class that satisfies most of your needs. You can customize almost every single part of it +but for now, we stick with the base class `Trainer`. +### Trainer Config +Define all the training properties in the trainer's config. As we're training a text classification model we set the +task to `text_classification` in our config. Other parameters are also customizable like below: +```python +train_config = TrainerConfig( + output_dir="distilbert-fa-sentiment-analysis-dksf", + task="text_classification", + device="cuda", + init_weights_from=BASE_MODEL_PATH, + batch_size=8, + num_epochs=5, + metrics=["f1"], + num_dataloader_workers=0, + seed=42, + optimizer="adamw", + learning_rate=2e-5, + weight_decay=.0, + scheduler="reduce_on_plateau", + use_amp=False, + save_freq=1, +) +``` +### Setup the Trainer +Now that we have our training config we can setup the Trainer. +```python +trainer = Trainer( + config=train_config, + model=model, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + data_collator=train_dataset.data_collator, + preprocessor=tokenizer, +) +``` +### Start Training +```python +trainer.train() +``` +``` +Epoch: 1/5 100%|####################################| 3576/3576 [07:07<00:00, 8.37batch/s, f1=0.732, loss=0.619] +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 38.64batch/s, f1=0.8, loss=0.473] +Epoch: 2/5 100%|####################################| 3576/3576 [07:00<00:00, 8.50batch/s, f1=0.807, loss=0.47] +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 39.87batch/s, f1=0.838, loss=0.419] +Epoch: 3/5 100%|####################################| 3576/3576 [07:01<00:00, 8.48batch/s, f1=0.864, loss=0.348] +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 39.97batch/s, f1=0.875, loss=0.346] +Epoch: 4/5 100%|####################################| 3576/3576 [06:57<00:00, 8.56batch/s, f1=0.919, loss=0.227] +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 38.84batch/s, f1=0.875, loss=0.381] +Epoch: 5/5 100%|####################################| 3576/3576 [07:02<00:00, 8.46batch/s, f1=0.943, loss=0.156] +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 39.71batch/s, f1=0.887, loss=0.446] +``` +### Evaluate +```python +trainer.evaluate() +``` +``` +Evaluating... 100%|####################################| 290/290 [00:07<00:00, 39.46batch/s, f1=0.887, loss=0.445] +``` +## Push everything +Now you can push your trained model to the Hub. The files to push are the model, model config, preprocessor, trainer config, +etc. +```python +trainer.push_to_hub("arxyzan/distilbert-fa-sentiment-dksf") +``` + +## Advanced concepts +You can also explore the in-depth Trainer guide [here](../guide/trainer_in_depth.md). \ No newline at end of file diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 00000000..30fee9d0 --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/check-solid.svg b/_static/check-solid.svg new file mode 100644 index 00000000..92fad4b5 --- /dev/null +++ b/_static/check-solid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/_static/clipboard.min.js b/_static/clipboard.min.js new file mode 100644 index 00000000..54b3c463 --- /dev/null +++ b/_static/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.8 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 + + + + diff --git a/_static/copybutton.css b/_static/copybutton.css new file mode 100644 index 00000000..f1916ec7 --- /dev/null +++ b/_static/copybutton.css @@ -0,0 +1,94 @@ +/* Copy buttons */ +button.copybtn { + position: absolute; + display: flex; + top: .3em; + right: .3em; + width: 1.7em; + height: 1.7em; + opacity: 0; + transition: opacity 0.3s, border .3s, background-color .3s; + user-select: none; + padding: 0; + border: none; + outline: none; + border-radius: 0.4em; + /* The colors that GitHub uses */ + border: #1b1f2426 1px solid; + background-color: #f6f8fa; + color: #57606a; +} + +button.copybtn.success { + border-color: #22863a; + color: #22863a; +} + +button.copybtn svg { + stroke: currentColor; + width: 1.5em; + height: 1.5em; + padding: 0.1em; +} + +div.highlight { + position: relative; +} + +/* Show the copybutton */ +.highlight:hover button.copybtn, button.copybtn.success { + opacity: 1; +} + +.highlight button.copybtn:hover { + background-color: rgb(235, 235, 235); +} + +.highlight button.copybtn:active { + background-color: rgb(187, 187, 187); +} + +/** + * A minimal CSS-only tooltip copied from: + * https://codepen.io/mildrenben/pen/rVBrpK + * + * To use, write HTML like the following: + * + *

Short

+ */ + .o-tooltip--left { + position: relative; + } + + .o-tooltip--left:after { + opacity: 0; + visibility: hidden; + position: absolute; + content: attr(data-tooltip); + padding: .2em; + font-size: .8em; + left: -.2em; + background: grey; + color: white; + white-space: nowrap; + z-index: 2; + border-radius: 2px; + transform: translateX(-102%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); +} + +.o-tooltip--left:hover:after { + display: block; + opacity: 1; + visibility: visible; + transform: translateX(-100%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); + transition-delay: .5s; +} + +/* By default the copy button shouldn't show up when printing a page */ +@media print { + button.copybtn { + display: none; + } +} diff --git a/_static/copybutton.js b/_static/copybutton.js new file mode 100644 index 00000000..2ea7ff3e --- /dev/null +++ b/_static/copybutton.js @@ -0,0 +1,248 @@ +// Localization support +const messages = { + 'en': { + 'copy': 'Copy', + 'copy_to_clipboard': 'Copy to clipboard', + 'copy_success': 'Copied!', + 'copy_failure': 'Failed to copy', + }, + 'es' : { + 'copy': 'Copiar', + 'copy_to_clipboard': 'Copiar al portapapeles', + 'copy_success': '¡Copiado!', + 'copy_failure': 'Error al copiar', + }, + 'de' : { + 'copy': 'Kopieren', + 'copy_to_clipboard': 'In die Zwischenablage kopieren', + 'copy_success': 'Kopiert!', + 'copy_failure': 'Fehler beim Kopieren', + }, + 'fr' : { + 'copy': 'Copier', + 'copy_to_clipboard': 'Copier dans le presse-papier', + 'copy_success': 'Copié !', + 'copy_failure': 'Échec de la copie', + }, + 'ru': { + 'copy': 'Скопировать', + 'copy_to_clipboard': 'Скопировать в буфер', + 'copy_success': 'Скопировано!', + 'copy_failure': 'Не удалось скопировать', + }, + 'zh-CN': { + 'copy': '复制', + 'copy_to_clipboard': '复制到剪贴板', + 'copy_success': '复制成功!', + 'copy_failure': '复制失败', + }, + 'it' : { + 'copy': 'Copiare', + 'copy_to_clipboard': 'Copiato negli appunti', + 'copy_success': 'Copiato!', + 'copy_failure': 'Errore durante la copia', + } +} + +let locale = 'en' +if( document.documentElement.lang !== undefined + && messages[document.documentElement.lang] !== undefined ) { + locale = document.documentElement.lang +} + +let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT; +if (doc_url_root == '#') { + doc_url_root = ''; +} + +/** + * SVG files for our copy buttons + */ +let iconCheck = ` + ${messages[locale]['copy_success']} + + +` + +// If the user specified their own SVG use that, otherwise use the default +let iconCopy = ``; +if (!iconCopy) { + iconCopy = ` + ${messages[locale]['copy_to_clipboard']} + + + +` +} + +/** + * Set up copy/paste for code blocks + */ + +const runWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} + +const codeCellId = index => `codecell${index}` + +// Clears selected text since ClipboardJS will select the text when copying +const clearSelection = () => { + if (window.getSelection) { + window.getSelection().removeAllRanges() + } else if (document.selection) { + document.selection.empty() + } +} + +// Changes tooltip text for a moment, then changes it back +// We want the timeout of our `success` class to be a bit shorter than the +// tooltip and icon change, so that we can hide the icon before changing back. +var timeoutIcon = 2000; +var timeoutSuccessClass = 1500; + +const temporarilyChangeTooltip = (el, oldText, newText) => { + el.setAttribute('data-tooltip', newText) + el.classList.add('success') + // Remove success a little bit sooner than we change the tooltip + // So that we can use CSS to hide the copybutton first + setTimeout(() => el.classList.remove('success'), timeoutSuccessClass) + setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon) +} + +// Changes the copy button icon for two seconds, then changes it back +const temporarilyChangeIcon = (el) => { + el.innerHTML = iconCheck; + setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon) +} + +const addCopyButtonToCodeCells = () => { + // If ClipboardJS hasn't loaded, wait a bit and try again. This + // happens because we load ClipboardJS asynchronously. + if (window.ClipboardJS === undefined) { + setTimeout(addCopyButtonToCodeCells, 250) + return + } + + // Add copybuttons to all of our code cells + const COPYBUTTON_SELECTOR = 'div.highlight pre'; + const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR) + codeCells.forEach((codeCell, index) => { + const id = codeCellId(index) + codeCell.setAttribute('id', id) + + const clipboardButton = id => + `` + codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) + }) + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} + + +var copyTargetText = (trigger) => { + var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); + + // get filtered text + let exclude = '.linenos'; + + let text = filterText(target, exclude); + return formatCopyText(text, '', false, true, true, true, '', '') +} + + // Initialize with a callback so we can modify the text before copy + const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) + + // Update UI with error/success messages + clipboard.on('success', event => { + clearSelection() + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success']) + temporarilyChangeIcon(event.trigger) + }) + + clipboard.on('error', event => { + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure']) + }) +} + +runWhenDOMLoaded(addCopyButtonToCodeCells) \ No newline at end of file diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js new file mode 100644 index 00000000..dbe1aaad --- /dev/null +++ b/_static/copybutton_funcs.js @@ -0,0 +1,73 @@ +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +export function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} diff --git a/_static/debug.css b/_static/debug.css new file mode 100644 index 00000000..74d4aec3 --- /dev/null +++ b/_static/debug.css @@ -0,0 +1,69 @@ +/* + This CSS file should be overridden by the theme authors. It's + meant for debugging and developing the skeleton that this theme provides. +*/ +body { + font-family: -apple-system, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, + "Apple Color Emoji", "Segoe UI Emoji"; + background: lavender; +} +.sb-announcement { + background: rgb(131, 131, 131); +} +.sb-announcement__inner { + background: black; + color: white; +} +.sb-header { + background: lightskyblue; +} +.sb-header__inner { + background: royalblue; + color: white; +} +.sb-header-secondary { + background: lightcyan; +} +.sb-header-secondary__inner { + background: cornflowerblue; + color: white; +} +.sb-sidebar-primary { + background: lightgreen; +} +.sb-main { + background: blanchedalmond; +} +.sb-main__inner { + background: antiquewhite; +} +.sb-header-article { + background: lightsteelblue; +} +.sb-article-container { + background: snow; +} +.sb-article-main { + background: white; +} +.sb-footer-article { + background: lightpink; +} +.sb-sidebar-secondary { + background: lightgoldenrodyellow; +} +.sb-footer-content { + background: plum; +} +.sb-footer-content__inner { + background: palevioletred; +} +.sb-footer { + background: pink; +} +.sb-footer__inner { + background: salmon; +} +.sb-article { + background: white; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 00000000..d06a71d7 --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 00000000..7e4c114f --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 00000000..a858a410 Binary files /dev/null and b/_static/file.png differ diff --git a/_static/hezar_logo.svg b/_static/hezar_logo.svg new file mode 100644 index 00000000..167c26ce --- /dev/null +++ b/_static/hezar_logo.svg @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 00000000..250f5665 --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,199 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, is available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 00000000..d96755fd Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 00000000..7107cec9 Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 00000000..5ca2352f --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,247 @@ +.highlight pre { line-height: 125%; } +.highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +.highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +.highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #008800; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #AA22FF; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #008800; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #008800; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #008800 } /* Comment.Preproc */ +.highlight .cpf { color: #008800; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #008800; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #008800; font-weight: bold } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #AA22FF; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #AA22FF; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #AA22FF; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #AA22FF } /* Keyword.Pseudo */ +.highlight .kr { color: #AA22FF; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #00BB00; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BB4444 } /* Literal.String */ +.highlight .na { color: #BB4444 } /* Name.Attribute */ +.highlight .nb { color: #AA22FF } /* Name.Builtin */ +.highlight .nc { color: #0000FF } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #00A000 } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #B8860B } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BB4444 } /* Literal.String.Affix */ +.highlight .sb { color: #BB4444 } /* Literal.String.Backtick */ +.highlight .sc { color: #BB4444 } /* Literal.String.Char */ +.highlight .dl { color: #BB4444 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BB4444; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BB4444 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BB4444 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BB4444 } /* Literal.String.Single */ +.highlight .ss { color: #B8860B } /* Literal.String.Symbol */ +.highlight .bp { color: #AA22FF } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #00A000 } /* Name.Function.Magic */ +.highlight .vc { color: #B8860B } /* Name.Variable.Class */ +.highlight .vg { color: #B8860B } /* Name.Variable.Global */ +.highlight .vi { color: #B8860B } /* Name.Variable.Instance */ +.highlight .vm { color: #B8860B } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ +@media not print { +body[data-theme="dark"] .highlight pre { line-height: 125%; } +body[data-theme="dark"] .highlight td.linenos .normal { color: #37474F; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight span.linenos { color: #37474F; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight td.linenos .special { color: #607A86; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight span.linenos.special { color: #607A86; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight .hll { background-color: #2C3B41 } +body[data-theme="dark"] .highlight { background: #263238; color: #EEFFFF } +body[data-theme="dark"] .highlight .c { color: #546E7A; font-style: italic } /* Comment */ +body[data-theme="dark"] .highlight .err { color: #FF5370 } /* Error */ +body[data-theme="dark"] .highlight .esc { color: #89DDFF } /* Escape */ +body[data-theme="dark"] .highlight .g { color: #EEFFFF } /* Generic */ +body[data-theme="dark"] .highlight .k { color: #BB80B3 } /* Keyword */ +body[data-theme="dark"] .highlight .l { color: #C3E88D } /* Literal */ +body[data-theme="dark"] .highlight .n { color: #EEFFFF } /* Name */ +body[data-theme="dark"] .highlight .o { color: #89DDFF } /* Operator */ +body[data-theme="dark"] .highlight .p { color: #89DDFF } /* Punctuation */ +body[data-theme="dark"] .highlight .ch { color: #546E7A; font-style: italic } /* Comment.Hashbang */ +body[data-theme="dark"] .highlight .cm { color: #546E7A; font-style: italic } /* Comment.Multiline */ +body[data-theme="dark"] .highlight .cp { color: #546E7A; font-style: italic } /* Comment.Preproc */ +body[data-theme="dark"] .highlight .cpf { color: #546E7A; font-style: italic } /* Comment.PreprocFile */ +body[data-theme="dark"] .highlight .c1 { color: #546E7A; font-style: italic } /* Comment.Single */ +body[data-theme="dark"] .highlight .cs { color: #546E7A; font-style: italic } /* Comment.Special */ +body[data-theme="dark"] .highlight .gd { color: #FF5370 } /* Generic.Deleted */ +body[data-theme="dark"] .highlight .ge { color: #89DDFF } /* Generic.Emph */ +body[data-theme="dark"] .highlight .ges { color: #FFCB6B } /* Generic.EmphStrong */ +body[data-theme="dark"] .highlight .gr { color: #FF5370 } /* Generic.Error */ +body[data-theme="dark"] .highlight .gh { color: #C3E88D } /* Generic.Heading */ +body[data-theme="dark"] .highlight .gi { color: #C3E88D } /* Generic.Inserted */ +body[data-theme="dark"] .highlight .go { color: #546E7A } /* Generic.Output */ +body[data-theme="dark"] .highlight .gp { color: #FFCB6B } /* Generic.Prompt */ +body[data-theme="dark"] .highlight .gs { color: #FF5370 } /* Generic.Strong */ +body[data-theme="dark"] .highlight .gu { color: #89DDFF } /* Generic.Subheading */ +body[data-theme="dark"] .highlight .gt { color: #FF5370 } /* Generic.Traceback */ +body[data-theme="dark"] .highlight .kc { color: #89DDFF } /* Keyword.Constant */ +body[data-theme="dark"] .highlight .kd { color: #BB80B3 } /* Keyword.Declaration */ +body[data-theme="dark"] .highlight .kn { color: #89DDFF; font-style: italic } /* Keyword.Namespace */ +body[data-theme="dark"] .highlight .kp { color: #89DDFF } /* Keyword.Pseudo */ +body[data-theme="dark"] .highlight .kr { color: #BB80B3 } /* Keyword.Reserved */ +body[data-theme="dark"] .highlight .kt { color: #BB80B3 } /* Keyword.Type */ +body[data-theme="dark"] .highlight .ld { color: #C3E88D } /* Literal.Date */ +body[data-theme="dark"] .highlight .m { color: #F78C6C } /* Literal.Number */ +body[data-theme="dark"] .highlight .s { color: #C3E88D } /* Literal.String */ +body[data-theme="dark"] .highlight .na { color: #BB80B3 } /* Name.Attribute */ +body[data-theme="dark"] .highlight .nb { color: #82AAFF } /* Name.Builtin */ +body[data-theme="dark"] .highlight .nc { color: #FFCB6B } /* Name.Class */ +body[data-theme="dark"] .highlight .no { color: #EEFFFF } /* Name.Constant */ +body[data-theme="dark"] .highlight .nd { color: #82AAFF } /* Name.Decorator */ +body[data-theme="dark"] .highlight .ni { color: #89DDFF } /* Name.Entity */ +body[data-theme="dark"] .highlight .ne { color: #FFCB6B } /* Name.Exception */ +body[data-theme="dark"] .highlight .nf { color: #82AAFF } /* Name.Function */ +body[data-theme="dark"] .highlight .nl { color: #82AAFF } /* Name.Label */ +body[data-theme="dark"] .highlight .nn { color: #FFCB6B } /* Name.Namespace */ +body[data-theme="dark"] .highlight .nx { color: #EEFFFF } /* Name.Other */ +body[data-theme="dark"] .highlight .py { color: #FFCB6B } /* Name.Property */ +body[data-theme="dark"] .highlight .nt { color: #FF5370 } /* Name.Tag */ +body[data-theme="dark"] .highlight .nv { color: #89DDFF } /* Name.Variable */ +body[data-theme="dark"] .highlight .ow { color: #89DDFF; font-style: italic } /* Operator.Word */ +body[data-theme="dark"] .highlight .pm { color: #89DDFF } /* Punctuation.Marker */ +body[data-theme="dark"] .highlight .w { color: #EEFFFF } /* Text.Whitespace */ +body[data-theme="dark"] .highlight .mb { color: #F78C6C } /* Literal.Number.Bin */ +body[data-theme="dark"] .highlight .mf { color: #F78C6C } /* Literal.Number.Float */ +body[data-theme="dark"] .highlight .mh { color: #F78C6C } /* Literal.Number.Hex */ +body[data-theme="dark"] .highlight .mi { color: #F78C6C } /* Literal.Number.Integer */ +body[data-theme="dark"] .highlight .mo { color: #F78C6C } /* Literal.Number.Oct */ +body[data-theme="dark"] .highlight .sa { color: #BB80B3 } /* Literal.String.Affix */ +body[data-theme="dark"] .highlight .sb { color: #C3E88D } /* Literal.String.Backtick */ +body[data-theme="dark"] .highlight .sc { color: #C3E88D } /* Literal.String.Char */ +body[data-theme="dark"] .highlight .dl { color: #EEFFFF } /* Literal.String.Delimiter */ +body[data-theme="dark"] .highlight .sd { color: #546E7A; font-style: italic } /* Literal.String.Doc */ +body[data-theme="dark"] .highlight .s2 { color: #C3E88D } /* Literal.String.Double */ +body[data-theme="dark"] .highlight .se { color: #EEFFFF } /* Literal.String.Escape */ +body[data-theme="dark"] .highlight .sh { color: #C3E88D } /* Literal.String.Heredoc */ +body[data-theme="dark"] .highlight .si { color: #89DDFF } /* Literal.String.Interpol */ +body[data-theme="dark"] .highlight .sx { color: #C3E88D } /* Literal.String.Other */ +body[data-theme="dark"] .highlight .sr { color: #89DDFF } /* Literal.String.Regex */ +body[data-theme="dark"] .highlight .s1 { color: #C3E88D } /* Literal.String.Single */ +body[data-theme="dark"] .highlight .ss { color: #89DDFF } /* Literal.String.Symbol */ +body[data-theme="dark"] .highlight .bp { color: #89DDFF } /* Name.Builtin.Pseudo */ +body[data-theme="dark"] .highlight .fm { color: #82AAFF } /* Name.Function.Magic */ +body[data-theme="dark"] .highlight .vc { color: #89DDFF } /* Name.Variable.Class */ +body[data-theme="dark"] .highlight .vg { color: #89DDFF } /* Name.Variable.Global */ +body[data-theme="dark"] .highlight .vi { color: #89DDFF } /* Name.Variable.Instance */ +body[data-theme="dark"] .highlight .vm { color: #82AAFF } /* Name.Variable.Magic */ +body[data-theme="dark"] .highlight .il { color: #F78C6C } /* Literal.Number.Integer.Long */ +@media (prefers-color-scheme: dark) { +body:not([data-theme="light"]) .highlight pre { line-height: 125%; } +body:not([data-theme="light"]) .highlight td.linenos .normal { color: #37474F; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight span.linenos { color: #37474F; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight td.linenos .special { color: #607A86; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight span.linenos.special { color: #607A86; background-color: #263238; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight .hll { background-color: #2C3B41 } +body:not([data-theme="light"]) .highlight { background: #263238; color: #EEFFFF } +body:not([data-theme="light"]) .highlight .c { color: #546E7A; font-style: italic } /* Comment */ +body:not([data-theme="light"]) .highlight .err { color: #FF5370 } /* Error */ +body:not([data-theme="light"]) .highlight .esc { color: #89DDFF } /* Escape */ +body:not([data-theme="light"]) .highlight .g { color: #EEFFFF } /* Generic */ +body:not([data-theme="light"]) .highlight .k { color: #BB80B3 } /* Keyword */ +body:not([data-theme="light"]) .highlight .l { color: #C3E88D } /* Literal */ +body:not([data-theme="light"]) .highlight .n { color: #EEFFFF } /* Name */ +body:not([data-theme="light"]) .highlight .o { color: #89DDFF } /* Operator */ +body:not([data-theme="light"]) .highlight .p { color: #89DDFF } /* Punctuation */ +body:not([data-theme="light"]) .highlight .ch { color: #546E7A; font-style: italic } /* Comment.Hashbang */ +body:not([data-theme="light"]) .highlight .cm { color: #546E7A; font-style: italic } /* Comment.Multiline */ +body:not([data-theme="light"]) .highlight .cp { color: #546E7A; font-style: italic } /* Comment.Preproc */ +body:not([data-theme="light"]) .highlight .cpf { color: #546E7A; font-style: italic } /* Comment.PreprocFile */ +body:not([data-theme="light"]) .highlight .c1 { color: #546E7A; font-style: italic } /* Comment.Single */ +body:not([data-theme="light"]) .highlight .cs { color: #546E7A; font-style: italic } /* Comment.Special */ +body:not([data-theme="light"]) .highlight .gd { color: #FF5370 } /* Generic.Deleted */ +body:not([data-theme="light"]) .highlight .ge { color: #89DDFF } /* Generic.Emph */ +body:not([data-theme="light"]) .highlight .ges { color: #FFCB6B } /* Generic.EmphStrong */ +body:not([data-theme="light"]) .highlight .gr { color: #FF5370 } /* Generic.Error */ +body:not([data-theme="light"]) .highlight .gh { color: #C3E88D } /* Generic.Heading */ +body:not([data-theme="light"]) .highlight .gi { color: #C3E88D } /* Generic.Inserted */ +body:not([data-theme="light"]) .highlight .go { color: #546E7A } /* Generic.Output */ +body:not([data-theme="light"]) .highlight .gp { color: #FFCB6B } /* Generic.Prompt */ +body:not([data-theme="light"]) .highlight .gs { color: #FF5370 } /* Generic.Strong */ +body:not([data-theme="light"]) .highlight .gu { color: #89DDFF } /* Generic.Subheading */ +body:not([data-theme="light"]) .highlight .gt { color: #FF5370 } /* Generic.Traceback */ +body:not([data-theme="light"]) .highlight .kc { color: #89DDFF } /* Keyword.Constant */ +body:not([data-theme="light"]) .highlight .kd { color: #BB80B3 } /* Keyword.Declaration */ +body:not([data-theme="light"]) .highlight .kn { color: #89DDFF; font-style: italic } /* Keyword.Namespace */ +body:not([data-theme="light"]) .highlight .kp { color: #89DDFF } /* Keyword.Pseudo */ +body:not([data-theme="light"]) .highlight .kr { color: #BB80B3 } /* Keyword.Reserved */ +body:not([data-theme="light"]) .highlight .kt { color: #BB80B3 } /* Keyword.Type */ +body:not([data-theme="light"]) .highlight .ld { color: #C3E88D } /* Literal.Date */ +body:not([data-theme="light"]) .highlight .m { color: #F78C6C } /* Literal.Number */ +body:not([data-theme="light"]) .highlight .s { color: #C3E88D } /* Literal.String */ +body:not([data-theme="light"]) .highlight .na { color: #BB80B3 } /* Name.Attribute */ +body:not([data-theme="light"]) .highlight .nb { color: #82AAFF } /* Name.Builtin */ +body:not([data-theme="light"]) .highlight .nc { color: #FFCB6B } /* Name.Class */ +body:not([data-theme="light"]) .highlight .no { color: #EEFFFF } /* Name.Constant */ +body:not([data-theme="light"]) .highlight .nd { color: #82AAFF } /* Name.Decorator */ +body:not([data-theme="light"]) .highlight .ni { color: #89DDFF } /* Name.Entity */ +body:not([data-theme="light"]) .highlight .ne { color: #FFCB6B } /* Name.Exception */ +body:not([data-theme="light"]) .highlight .nf { color: #82AAFF } /* Name.Function */ +body:not([data-theme="light"]) .highlight .nl { color: #82AAFF } /* Name.Label */ +body:not([data-theme="light"]) .highlight .nn { color: #FFCB6B } /* Name.Namespace */ +body:not([data-theme="light"]) .highlight .nx { color: #EEFFFF } /* Name.Other */ +body:not([data-theme="light"]) .highlight .py { color: #FFCB6B } /* Name.Property */ +body:not([data-theme="light"]) .highlight .nt { color: #FF5370 } /* Name.Tag */ +body:not([data-theme="light"]) .highlight .nv { color: #89DDFF } /* Name.Variable */ +body:not([data-theme="light"]) .highlight .ow { color: #89DDFF; font-style: italic } /* Operator.Word */ +body:not([data-theme="light"]) .highlight .pm { color: #89DDFF } /* Punctuation.Marker */ +body:not([data-theme="light"]) .highlight .w { color: #EEFFFF } /* Text.Whitespace */ +body:not([data-theme="light"]) .highlight .mb { color: #F78C6C } /* Literal.Number.Bin */ +body:not([data-theme="light"]) .highlight .mf { color: #F78C6C } /* Literal.Number.Float */ +body:not([data-theme="light"]) .highlight .mh { color: #F78C6C } /* Literal.Number.Hex */ +body:not([data-theme="light"]) .highlight .mi { color: #F78C6C } /* Literal.Number.Integer */ +body:not([data-theme="light"]) .highlight .mo { color: #F78C6C } /* Literal.Number.Oct */ +body:not([data-theme="light"]) .highlight .sa { color: #BB80B3 } /* Literal.String.Affix */ +body:not([data-theme="light"]) .highlight .sb { color: #C3E88D } /* Literal.String.Backtick */ +body:not([data-theme="light"]) .highlight .sc { color: #C3E88D } /* Literal.String.Char */ +body:not([data-theme="light"]) .highlight .dl { color: #EEFFFF } /* Literal.String.Delimiter */ +body:not([data-theme="light"]) .highlight .sd { color: #546E7A; font-style: italic } /* Literal.String.Doc */ +body:not([data-theme="light"]) .highlight .s2 { color: #C3E88D } /* Literal.String.Double */ +body:not([data-theme="light"]) .highlight .se { color: #EEFFFF } /* Literal.String.Escape */ +body:not([data-theme="light"]) .highlight .sh { color: #C3E88D } /* Literal.String.Heredoc */ +body:not([data-theme="light"]) .highlight .si { color: #89DDFF } /* Literal.String.Interpol */ +body:not([data-theme="light"]) .highlight .sx { color: #C3E88D } /* Literal.String.Other */ +body:not([data-theme="light"]) .highlight .sr { color: #89DDFF } /* Literal.String.Regex */ +body:not([data-theme="light"]) .highlight .s1 { color: #C3E88D } /* Literal.String.Single */ +body:not([data-theme="light"]) .highlight .ss { color: #89DDFF } /* Literal.String.Symbol */ +body:not([data-theme="light"]) .highlight .bp { color: #89DDFF } /* Name.Builtin.Pseudo */ +body:not([data-theme="light"]) .highlight .fm { color: #82AAFF } /* Name.Function.Magic */ +body:not([data-theme="light"]) .highlight .vc { color: #89DDFF } /* Name.Variable.Class */ +body:not([data-theme="light"]) .highlight .vg { color: #89DDFF } /* Name.Variable.Global */ +body:not([data-theme="light"]) .highlight .vi { color: #89DDFF } /* Name.Variable.Instance */ +body:not([data-theme="light"]) .highlight .vm { color: #82AAFF } /* Name.Variable.Magic */ +body:not([data-theme="light"]) .highlight .il { color: #F78C6C } /* Literal.Number.Integer.Long */ +} +} \ No newline at end of file diff --git a/_static/scripts/furo-extensions.js b/_static/scripts/furo-extensions.js new file mode 100644 index 00000000..e69de29b diff --git a/_static/scripts/furo.js b/_static/scripts/furo.js new file mode 100644 index 00000000..32e7c05b --- /dev/null +++ b/_static/scripts/furo.js @@ -0,0 +1,3 @@ +/*! For license information please see furo.js.LICENSE.txt */ +(()=>{var t={212:function(t,e,n){var o,r;r=void 0!==n.g?n.g:"undefined"!=typeof window?window:this,o=function(){return function(t){"use strict";var e={navClass:"active",contentClass:"active",nested:!1,nestedClass:"active",offset:0,reflow:!1,events:!0},n=function(t,e,n){if(n.settings.events){var o=new CustomEvent(t,{bubbles:!0,cancelable:!0,detail:n});e.dispatchEvent(o)}},o=function(t){var e=0;if(t.offsetParent)for(;t;)e+=t.offsetTop,t=t.offsetParent;return e>=0?e:0},r=function(t){t&&t.sort((function(t,e){return o(t.content)=Math.max(document.body.scrollHeight,document.documentElement.scrollHeight,document.body.offsetHeight,document.documentElement.offsetHeight,document.body.clientHeight,document.documentElement.clientHeight)},l=function(t,e){var n=t[t.length-1];if(function(t,e){return!(!s()||!c(t.content,e,!0))}(n,e))return n;for(var o=t.length-1;o>=0;o--)if(c(t[o].content,e))return t[o]},a=function(t,e){if(e.nested&&t.parentNode){var n=t.parentNode.closest("li");n&&(n.classList.remove(e.nestedClass),a(n,e))}},i=function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.remove(e.navClass),t.content.classList.remove(e.contentClass),a(o,e),n("gumshoeDeactivate",o,{link:t.nav,content:t.content,settings:e}))}},u=function(t,e){if(e.nested){var n=t.parentNode.closest("li");n&&(n.classList.add(e.nestedClass),u(n,e))}};return function(o,c){var s,a,d,f,m,v={setup:function(){s=document.querySelectorAll(o),a=[],Array.prototype.forEach.call(s,(function(t){var e=document.getElementById(decodeURIComponent(t.hash.substr(1)));e&&a.push({nav:t,content:e})})),r(a)},detect:function(){var t=l(a,m);t?d&&t.content===d.content||(i(d,m),function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.add(e.navClass),t.content.classList.add(e.contentClass),u(o,e),n("gumshoeActivate",o,{link:t.nav,content:t.content,settings:e}))}}(t,m),d=t):d&&(i(d,m),d=null)}},h=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame(v.detect)},g=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame((function(){r(a),v.detect()}))};return v.destroy=function(){d&&i(d,m),t.removeEventListener("scroll",h,!1),m.reflow&&t.removeEventListener("resize",g,!1),a=null,s=null,d=null,f=null,m=null},m=function(){var t={};return Array.prototype.forEach.call(arguments,(function(e){for(var n in e){if(!e.hasOwnProperty(n))return;t[n]=e[n]}})),t}(e,c||{}),v.setup(),v.detect(),t.addEventListener("scroll",h,!1),m.reflow&&t.addEventListener("resize",g,!1),v}}(r)}.apply(e,[]),void 0===o||(t.exports=o)}},e={};function n(o){var r=e[o];if(void 0!==r)return r.exports;var c=e[o]={exports:{}};return t[o].call(c.exports,c,c.exports,n),c.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var o in e)n.o(e,o)&&!n.o(t,o)&&Object.defineProperty(t,o,{enumerable:!0,get:e[o]})},n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(t){if("object"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{"use strict";var t=n(212),e=n.n(t),o=null,r=null,c=window.pageYOffset||document.documentElement.scrollTop;const s=64;function l(){const t=localStorage.getItem("theme")||"auto";var e;"light"!==(e=window.matchMedia("(prefers-color-scheme: dark)").matches?"auto"===t?"light":"light"==t?"dark":"auto":"auto"===t?"dark":"dark"==t?"light":"auto")&&"dark"!==e&&"auto"!==e&&(console.error(`Got invalid theme mode: ${e}. Resetting to auto.`),e="auto"),document.body.dataset.theme=e,localStorage.setItem("theme",e),console.log(`Changed to ${e} mode.`)}function a(){!function(){const t=document.getElementsByClassName("theme-toggle");Array.from(t).forEach((t=>{t.addEventListener("click",l)}))}(),function(){let t=0,e=!1;window.addEventListener("scroll",(function(n){t=window.scrollY,e||(window.requestAnimationFrame((function(){var n;n=t,0==Math.floor(r.getBoundingClientRect().top)?r.classList.add("scrolled"):r.classList.remove("scrolled"),function(t){tc&&document.documentElement.classList.remove("show-back-to-top"),c=t}(n),function(t){null!==o&&(0==t?o.scrollTo(0,0):Math.ceil(t)>=Math.floor(document.documentElement.scrollHeight-window.innerHeight)?o.scrollTo(0,o.scrollHeight):document.querySelector(".scroll-current"))}(n),e=!1})),e=!0)})),window.scroll()}(),null!==o&&new(e())(".toc-tree a",{reflow:!0,recursive:!0,navClass:"scroll-current",offset:()=>{let t=parseFloat(getComputedStyle(document.documentElement).fontSize);return r.getBoundingClientRect().height+.5*t+1}})}document.addEventListener("DOMContentLoaded",(function(){document.body.parentNode.classList.remove("no-js"),r=document.querySelector("header"),o=document.querySelector(".toc-scroll"),a()}))})()})(); +//# sourceMappingURL=furo.js.map \ No newline at end of file diff --git a/_static/scripts/furo.js.LICENSE.txt b/_static/scripts/furo.js.LICENSE.txt new file mode 100644 index 00000000..1632189c --- /dev/null +++ b/_static/scripts/furo.js.LICENSE.txt @@ -0,0 +1,7 @@ +/*! + * gumshoejs v5.1.2 (patched by @pradyunsg) + * A simple, framework-agnostic scrollspy script. + * (c) 2019 Chris Ferdinandi + * MIT License + * http://github.com/cferdinandi/gumshoe + */ diff --git a/_static/scripts/furo.js.map b/_static/scripts/furo.js.map new file mode 100644 index 00000000..7b7ddb11 --- /dev/null +++ b/_static/scripts/furo.js.map @@ -0,0 +1 @@ +{"version":3,"file":"scripts/furo.js","mappings":";iCAAA,MAQWA,SAWS,IAAX,EAAAC,EACH,EAAAA,EACkB,oBAAXC,OACPA,OACAC,KAbS,EAAF,WACP,OAaJ,SAAUD,GACR,aAMA,IAAIE,EAAW,CAEbC,SAAU,SACVC,aAAc,SAGdC,QAAQ,EACRC,YAAa,SAGbC,OAAQ,EACRC,QAAQ,EAGRC,QAAQ,GA6BNC,EAAY,SAAUC,EAAMC,EAAMC,GAEpC,GAAKA,EAAOC,SAASL,OAArB,CAGA,IAAIM,EAAQ,IAAIC,YAAYL,EAAM,CAChCM,SAAS,EACTC,YAAY,EACZL,OAAQA,IAIVD,EAAKO,cAAcJ,EAVgB,CAWrC,EAOIK,EAAe,SAAUR,GAC3B,IAAIS,EAAW,EACf,GAAIT,EAAKU,aACP,KAAOV,GACLS,GAAYT,EAAKW,UACjBX,EAAOA,EAAKU,aAGhB,OAAOD,GAAY,EAAIA,EAAW,CACpC,EAMIG,EAAe,SAAUC,GACvBA,GACFA,EAASC,MAAK,SAAUC,EAAOC,GAG7B,OAFcR,EAAaO,EAAME,SACnBT,EAAaQ,EAAMC,UACF,EACxB,CACT,GAEJ,EAwCIC,EAAW,SAAUlB,EAAME,EAAUiB,GACvC,IAAIC,EAASpB,EAAKqB,wBACd1B,EAnCU,SAAUO,GAExB,MAA+B,mBAApBA,EAASP,OACX2B,WAAWpB,EAASP,UAItB2B,WAAWpB,EAASP,OAC7B,CA2Be4B,CAAUrB,GACvB,OAAIiB,EAEAK,SAASJ,EAAOD,OAAQ,KACvB/B,EAAOqC,aAAeC,SAASC,gBAAgBC,cAG7CJ,SAASJ,EAAOS,IAAK,KAAOlC,CACrC,EAMImC,EAAa,WACf,OACEC,KAAKC,KAAK5C,EAAOqC,YAAcrC,EAAO6C,cAnCjCF,KAAKG,IACVR,SAASS,KAAKC,aACdV,SAASC,gBAAgBS,aACzBV,SAASS,KAAKE,aACdX,SAASC,gBAAgBU,aACzBX,SAASS,KAAKP,aACdF,SAASC,gBAAgBC,aAkC7B,EAmBIU,EAAY,SAAUzB,EAAUX,GAClC,IAAIqC,EAAO1B,EAASA,EAAS2B,OAAS,GACtC,GAbgB,SAAUC,EAAMvC,GAChC,SAAI4B,MAAgBZ,EAASuB,EAAKxB,QAASf,GAAU,GAEvD,CAUMwC,CAAYH,EAAMrC,GAAW,OAAOqC,EACxC,IAAK,IAAII,EAAI9B,EAAS2B,OAAS,EAAGG,GAAK,EAAGA,IACxC,GAAIzB,EAASL,EAAS8B,GAAG1B,QAASf,GAAW,OAAOW,EAAS8B,EAEjE,EAOIC,EAAmB,SAAUC,EAAK3C,GAEpC,GAAKA,EAAST,QAAWoD,EAAIC,WAA7B,CAGA,IAAIC,EAAKF,EAAIC,WAAWE,QAAQ,MAC3BD,IAGLA,EAAGE,UAAUC,OAAOhD,EAASR,aAG7BkD,EAAiBG,EAAI7C,GAV0B,CAWjD,EAOIiD,EAAa,SAAUC,EAAOlD,GAEhC,GAAKkD,EAAL,CAGA,IAAIL,EAAKK,EAAMP,IAAIG,QAAQ,MACtBD,IAGLA,EAAGE,UAAUC,OAAOhD,EAASX,UAC7B6D,EAAMnC,QAAQgC,UAAUC,OAAOhD,EAASV,cAGxCoD,EAAiBG,EAAI7C,GAGrBJ,EAAU,oBAAqBiD,EAAI,CACjCM,KAAMD,EAAMP,IACZ5B,QAASmC,EAAMnC,QACff,SAAUA,IAjBM,CAmBpB,EAOIoD,EAAiB,SAAUT,EAAK3C,GAElC,GAAKA,EAAST,OAAd,CAGA,IAAIsD,EAAKF,EAAIC,WAAWE,QAAQ,MAC3BD,IAGLA,EAAGE,UAAUM,IAAIrD,EAASR,aAG1B4D,EAAeP,EAAI7C,GAVS,CAW9B,EA6LA,OA1JkB,SAAUsD,EAAUC,GAKpC,IACIC,EAAU7C,EAAU8C,EAASC,EAAS1D,EADtC2D,EAAa,CAUjBA,MAAmB,WAEjBH,EAAWhC,SAASoC,iBAAiBN,GAGrC3C,EAAW,GAGXkD,MAAMC,UAAUC,QAAQC,KAAKR,GAAU,SAAUjB,GAE/C,IAAIxB,EAAUS,SAASyC,eACrBC,mBAAmB3B,EAAK4B,KAAKC,OAAO,KAEjCrD,GAGLJ,EAAS0D,KAAK,CACZ1B,IAAKJ,EACLxB,QAASA,GAEb,IAGAL,EAAaC,EACf,EAKAgD,OAAoB,WAElB,IAAIW,EAASlC,EAAUzB,EAAUX,GAG5BsE,EASDb,GAAWa,EAAOvD,UAAY0C,EAAQ1C,UAG1CkC,EAAWQ,EAASzD,GAzFT,SAAUkD,EAAOlD,GAE9B,GAAKkD,EAAL,CAGA,IAAIL,EAAKK,EAAMP,IAAIG,QAAQ,MACtBD,IAGLA,EAAGE,UAAUM,IAAIrD,EAASX,UAC1B6D,EAAMnC,QAAQgC,UAAUM,IAAIrD,EAASV,cAGrC8D,EAAeP,EAAI7C,GAGnBJ,EAAU,kBAAmBiD,EAAI,CAC/BM,KAAMD,EAAMP,IACZ5B,QAASmC,EAAMnC,QACff,SAAUA,IAjBM,CAmBpB,CAqEIuE,CAASD,EAAQtE,GAGjByD,EAAUa,GAfJb,IACFR,EAAWQ,EAASzD,GACpByD,EAAU,KAchB,GAMIe,EAAgB,SAAUvE,GAExByD,GACFxE,EAAOuF,qBAAqBf,GAI9BA,EAAUxE,EAAOwF,sBAAsBf,EAAWgB,OACpD,EAMIC,EAAgB,SAAU3E,GAExByD,GACFxE,EAAOuF,qBAAqBf,GAI9BA,EAAUxE,EAAOwF,uBAAsB,WACrChE,EAAaC,GACbgD,EAAWgB,QACb,GACF,EAkDA,OA7CAhB,EAAWkB,QAAU,WAEfpB,GACFR,EAAWQ,EAASzD,GAItBd,EAAO4F,oBAAoB,SAAUN,GAAe,GAChDxE,EAASN,QACXR,EAAO4F,oBAAoB,SAAUF,GAAe,GAItDjE,EAAW,KACX6C,EAAW,KACXC,EAAU,KACVC,EAAU,KACV1D,EAAW,IACb,EAOEA,EA3XS,WACX,IAAI+E,EAAS,CAAC,EAOd,OANAlB,MAAMC,UAAUC,QAAQC,KAAKgB,WAAW,SAAUC,GAChD,IAAK,IAAIC,KAAOD,EAAK,CACnB,IAAKA,EAAIE,eAAeD,GAAM,OAC9BH,EAAOG,GAAOD,EAAIC,EACpB,CACF,IACOH,CACT,CAkXeK,CAAOhG,EAAUmE,GAAW,CAAC,GAGxCI,EAAW0B,QAGX1B,EAAWgB,SAGXzF,EAAOoG,iBAAiB,SAAUd,GAAe,GAC7CxE,EAASN,QACXR,EAAOoG,iBAAiB,SAAUV,GAAe,GAS9CjB,CACT,CAOF,CArcW4B,CAAQvG,EAChB,UAFM,SAEN,uBCXDwG,EAA2B,CAAC,EAGhC,SAASC,EAAoBC,GAE5B,IAAIC,EAAeH,EAAyBE,GAC5C,QAAqBE,IAAjBD,EACH,OAAOA,EAAaE,QAGrB,IAAIC,EAASN,EAAyBE,GAAY,CAGjDG,QAAS,CAAC,GAOX,OAHAE,EAAoBL,GAAU1B,KAAK8B,EAAOD,QAASC,EAAQA,EAAOD,QAASJ,GAGpEK,EAAOD,OACf,CCrBAJ,EAAoBO,EAAKF,IACxB,IAAIG,EAASH,GAAUA,EAAOI,WAC7B,IAAOJ,EAAiB,QACxB,IAAM,EAEP,OADAL,EAAoBU,EAAEF,EAAQ,CAAEG,EAAGH,IAC5BA,CAAM,ECLdR,EAAoBU,EAAI,CAACN,EAASQ,KACjC,IAAI,IAAInB,KAAOmB,EACXZ,EAAoBa,EAAED,EAAYnB,KAASO,EAAoBa,EAAET,EAASX,IAC5EqB,OAAOC,eAAeX,EAASX,EAAK,CAAEuB,YAAY,EAAMC,IAAKL,EAAWnB,IAE1E,ECNDO,EAAoBxG,EAAI,WACvB,GAA0B,iBAAf0H,WAAyB,OAAOA,WAC3C,IACC,OAAOxH,MAAQ,IAAIyH,SAAS,cAAb,EAChB,CAAE,MAAOC,GACR,GAAsB,iBAAX3H,OAAqB,OAAOA,MACxC,CACA,CAPuB,GCAxBuG,EAAoBa,EAAI,CAACrB,EAAK6B,IAAUP,OAAOzC,UAAUqB,eAAenB,KAAKiB,EAAK6B,4CCK9EC,EAAY,KACZC,EAAS,KACTC,EAAgB/H,OAAO6C,aAAeP,SAASC,gBAAgByF,UACnE,MAAMC,EAAmB,GA2EzB,SAASC,IACP,MAAMC,EAAeC,aAAaC,QAAQ,UAAY,OAZxD,IAAkBC,EACH,WADGA,EAaItI,OAAOuI,WAAW,gCAAgCC,QAI/C,SAAjBL,EACO,QACgB,SAAhBA,EACA,OAEA,OAIU,SAAjBA,EACO,OACgB,QAAhBA,EACA,QAEA,SA9BoB,SAATG,GAA4B,SAATA,IACzCG,QAAQC,MAAM,2BAA2BJ,yBACzCA,EAAO,QAGThG,SAASS,KAAK4F,QAAQC,MAAQN,EAC9BF,aAAaS,QAAQ,QAASP,GAC9BG,QAAQK,IAAI,cAAcR,UA0B5B,CAkDA,SAASnC,KART,WAEE,MAAM4C,EAAUzG,SAAS0G,uBAAuB,gBAChDrE,MAAMsE,KAAKF,GAASlE,SAASqE,IAC3BA,EAAI9C,iBAAiB,QAAS8B,EAAe,GAEjD,CAGEiB,GA9CF,WAEE,IAAIC,EAA6B,EAC7BC,GAAU,EAEdrJ,OAAOoG,iBAAiB,UAAU,SAAUuB,GAC1CyB,EAA6BpJ,OAAOsJ,QAE/BD,IACHrJ,OAAOwF,uBAAsB,WAzDnC,IAAuB+D,IA0DDH,EA9GkC,GAAlDzG,KAAK6G,MAAM1B,EAAO7F,wBAAwBQ,KAC5CqF,EAAOjE,UAAUM,IAAI,YAErB2D,EAAOjE,UAAUC,OAAO,YAI5B,SAAmCyF,GAC7BA,EAAYtB,EACd3F,SAASC,gBAAgBsB,UAAUC,OAAO,oBAEtCyF,EAAYxB,EACdzF,SAASC,gBAAgBsB,UAAUM,IAAI,oBAC9BoF,EAAYxB,GACrBzF,SAASC,gBAAgBsB,UAAUC,OAAO,oBAG9CiE,EAAgBwB,CAClB,CAoCEE,CAA0BF,GAlC5B,SAA6BA,GACT,OAAd1B,IAKa,GAAb0B,EACF1B,EAAU6B,SAAS,EAAG,GAGtB/G,KAAKC,KAAK2G,IACV5G,KAAK6G,MAAMlH,SAASC,gBAAgBS,aAAehD,OAAOqC,aAE1DwF,EAAU6B,SAAS,EAAG7B,EAAU7E,cAGhBV,SAASqH,cAAc,mBAc3C,CAKEC,CAAoBL,GAwDdF,GAAU,CACZ,IAEAA,GAAU,EAEd,IACArJ,OAAO6J,QACT,CA6BEC,GA1BkB,OAAdjC,GAKJ,IAAI,IAAJ,CAAY,cAAe,CACzBrH,QAAQ,EACRuJ,WAAW,EACX5J,SAAU,iBACVI,OAAQ,KACN,IAAIyJ,EAAM9H,WAAW+H,iBAAiB3H,SAASC,iBAAiB2H,UAChE,OAAOpC,EAAO7F,wBAAwBkI,OAAS,GAAMH,EAAM,CAAC,GAiBlE,CAcA1H,SAAS8D,iBAAiB,oBAT1B,WACE9D,SAASS,KAAKW,WAAWG,UAAUC,OAAO,SAE1CgE,EAASxF,SAASqH,cAAc,UAChC9B,EAAYvF,SAASqH,cAAc,eAEnCxD,GACF","sources":["webpack:///./src/furo/assets/scripts/gumshoe-patched.js","webpack:///webpack/bootstrap","webpack:///webpack/runtime/compat get default export","webpack:///webpack/runtime/define property getters","webpack:///webpack/runtime/global","webpack:///webpack/runtime/hasOwnProperty shorthand","webpack:///./src/furo/assets/scripts/furo.js"],"sourcesContent":["/*!\n * gumshoejs v5.1.2 (patched by @pradyunsg)\n * A simple, framework-agnostic scrollspy script.\n * (c) 2019 Chris Ferdinandi\n * MIT License\n * http://github.com/cferdinandi/gumshoe\n */\n\n(function (root, factory) {\n if (typeof define === \"function\" && define.amd) {\n define([], function () {\n return factory(root);\n });\n } else if (typeof exports === \"object\") {\n module.exports = factory(root);\n } else {\n root.Gumshoe = factory(root);\n }\n})(\n typeof global !== \"undefined\"\n ? global\n : typeof window !== \"undefined\"\n ? window\n : this,\n function (window) {\n \"use strict\";\n\n //\n // Defaults\n //\n\n var defaults = {\n // Active classes\n navClass: \"active\",\n contentClass: \"active\",\n\n // Nested navigation\n nested: false,\n nestedClass: \"active\",\n\n // Offset & reflow\n offset: 0,\n reflow: false,\n\n // Event support\n events: true,\n };\n\n //\n // Methods\n //\n\n /**\n * Merge two or more objects together.\n * @param {Object} objects The objects to merge together\n * @returns {Object} Merged values of defaults and options\n */\n var extend = function () {\n var merged = {};\n Array.prototype.forEach.call(arguments, function (obj) {\n for (var key in obj) {\n if (!obj.hasOwnProperty(key)) return;\n merged[key] = obj[key];\n }\n });\n return merged;\n };\n\n /**\n * Emit a custom event\n * @param {String} type The event type\n * @param {Node} elem The element to attach the event to\n * @param {Object} detail Any details to pass along with the event\n */\n var emitEvent = function (type, elem, detail) {\n // Make sure events are enabled\n if (!detail.settings.events) return;\n\n // Create a new event\n var event = new CustomEvent(type, {\n bubbles: true,\n cancelable: true,\n detail: detail,\n });\n\n // Dispatch the event\n elem.dispatchEvent(event);\n };\n\n /**\n * Get an element's distance from the top of the Document.\n * @param {Node} elem The element\n * @return {Number} Distance from the top in pixels\n */\n var getOffsetTop = function (elem) {\n var location = 0;\n if (elem.offsetParent) {\n while (elem) {\n location += elem.offsetTop;\n elem = elem.offsetParent;\n }\n }\n return location >= 0 ? location : 0;\n };\n\n /**\n * Sort content from first to last in the DOM\n * @param {Array} contents The content areas\n */\n var sortContents = function (contents) {\n if (contents) {\n contents.sort(function (item1, item2) {\n var offset1 = getOffsetTop(item1.content);\n var offset2 = getOffsetTop(item2.content);\n if (offset1 < offset2) return -1;\n return 1;\n });\n }\n };\n\n /**\n * Get the offset to use for calculating position\n * @param {Object} settings The settings for this instantiation\n * @return {Float} The number of pixels to offset the calculations\n */\n var getOffset = function (settings) {\n // if the offset is a function run it\n if (typeof settings.offset === \"function\") {\n return parseFloat(settings.offset());\n }\n\n // Otherwise, return it as-is\n return parseFloat(settings.offset);\n };\n\n /**\n * Get the document element's height\n * @private\n * @returns {Number}\n */\n var getDocumentHeight = function () {\n return Math.max(\n document.body.scrollHeight,\n document.documentElement.scrollHeight,\n document.body.offsetHeight,\n document.documentElement.offsetHeight,\n document.body.clientHeight,\n document.documentElement.clientHeight,\n );\n };\n\n /**\n * Determine if an element is in view\n * @param {Node} elem The element\n * @param {Object} settings The settings for this instantiation\n * @param {Boolean} bottom If true, check if element is above bottom of viewport instead\n * @return {Boolean} Returns true if element is in the viewport\n */\n var isInView = function (elem, settings, bottom) {\n var bounds = elem.getBoundingClientRect();\n var offset = getOffset(settings);\n if (bottom) {\n return (\n parseInt(bounds.bottom, 10) <\n (window.innerHeight || document.documentElement.clientHeight)\n );\n }\n return parseInt(bounds.top, 10) <= offset;\n };\n\n /**\n * Check if at the bottom of the viewport\n * @return {Boolean} If true, page is at the bottom of the viewport\n */\n var isAtBottom = function () {\n if (\n Math.ceil(window.innerHeight + window.pageYOffset) >=\n getDocumentHeight()\n )\n return true;\n return false;\n };\n\n /**\n * Check if the last item should be used (even if not at the top of the page)\n * @param {Object} item The last item\n * @param {Object} settings The settings for this instantiation\n * @return {Boolean} If true, use the last item\n */\n var useLastItem = function (item, settings) {\n if (isAtBottom() && isInView(item.content, settings, true)) return true;\n return false;\n };\n\n /**\n * Get the active content\n * @param {Array} contents The content areas\n * @param {Object} settings The settings for this instantiation\n * @return {Object} The content area and matching navigation link\n */\n var getActive = function (contents, settings) {\n var last = contents[contents.length - 1];\n if (useLastItem(last, settings)) return last;\n for (var i = contents.length - 1; i >= 0; i--) {\n if (isInView(contents[i].content, settings)) return contents[i];\n }\n };\n\n /**\n * Deactivate parent navs in a nested navigation\n * @param {Node} nav The starting navigation element\n * @param {Object} settings The settings for this instantiation\n */\n var deactivateNested = function (nav, settings) {\n // If nesting isn't activated, bail\n if (!settings.nested || !nav.parentNode) return;\n\n // Get the parent navigation\n var li = nav.parentNode.closest(\"li\");\n if (!li) return;\n\n // Remove the active class\n li.classList.remove(settings.nestedClass);\n\n // Apply recursively to any parent navigation elements\n deactivateNested(li, settings);\n };\n\n /**\n * Deactivate a nav and content area\n * @param {Object} items The nav item and content to deactivate\n * @param {Object} settings The settings for this instantiation\n */\n var deactivate = function (items, settings) {\n // Make sure there are items to deactivate\n if (!items) return;\n\n // Get the parent list item\n var li = items.nav.closest(\"li\");\n if (!li) return;\n\n // Remove the active class from the nav and content\n li.classList.remove(settings.navClass);\n items.content.classList.remove(settings.contentClass);\n\n // Deactivate any parent navs in a nested navigation\n deactivateNested(li, settings);\n\n // Emit a custom event\n emitEvent(\"gumshoeDeactivate\", li, {\n link: items.nav,\n content: items.content,\n settings: settings,\n });\n };\n\n /**\n * Activate parent navs in a nested navigation\n * @param {Node} nav The starting navigation element\n * @param {Object} settings The settings for this instantiation\n */\n var activateNested = function (nav, settings) {\n // If nesting isn't activated, bail\n if (!settings.nested) return;\n\n // Get the parent navigation\n var li = nav.parentNode.closest(\"li\");\n if (!li) return;\n\n // Add the active class\n li.classList.add(settings.nestedClass);\n\n // Apply recursively to any parent navigation elements\n activateNested(li, settings);\n };\n\n /**\n * Activate a nav and content area\n * @param {Object} items The nav item and content to activate\n * @param {Object} settings The settings for this instantiation\n */\n var activate = function (items, settings) {\n // Make sure there are items to activate\n if (!items) return;\n\n // Get the parent list item\n var li = items.nav.closest(\"li\");\n if (!li) return;\n\n // Add the active class to the nav and content\n li.classList.add(settings.navClass);\n items.content.classList.add(settings.contentClass);\n\n // Activate any parent navs in a nested navigation\n activateNested(li, settings);\n\n // Emit a custom event\n emitEvent(\"gumshoeActivate\", li, {\n link: items.nav,\n content: items.content,\n settings: settings,\n });\n };\n\n /**\n * Create the Constructor object\n * @param {String} selector The selector to use for navigation items\n * @param {Object} options User options and settings\n */\n var Constructor = function (selector, options) {\n //\n // Variables\n //\n\n var publicAPIs = {};\n var navItems, contents, current, timeout, settings;\n\n //\n // Methods\n //\n\n /**\n * Set variables from DOM elements\n */\n publicAPIs.setup = function () {\n // Get all nav items\n navItems = document.querySelectorAll(selector);\n\n // Create contents array\n contents = [];\n\n // Loop through each item, get it's matching content, and push to the array\n Array.prototype.forEach.call(navItems, function (item) {\n // Get the content for the nav item\n var content = document.getElementById(\n decodeURIComponent(item.hash.substr(1)),\n );\n if (!content) return;\n\n // Push to the contents array\n contents.push({\n nav: item,\n content: content,\n });\n });\n\n // Sort contents by the order they appear in the DOM\n sortContents(contents);\n };\n\n /**\n * Detect which content is currently active\n */\n publicAPIs.detect = function () {\n // Get the active content\n var active = getActive(contents, settings);\n\n // if there's no active content, deactivate and bail\n if (!active) {\n if (current) {\n deactivate(current, settings);\n current = null;\n }\n return;\n }\n\n // If the active content is the one currently active, do nothing\n if (current && active.content === current.content) return;\n\n // Deactivate the current content and activate the new content\n deactivate(current, settings);\n activate(active, settings);\n\n // Update the currently active content\n current = active;\n };\n\n /**\n * Detect the active content on scroll\n * Debounced for performance\n */\n var scrollHandler = function (event) {\n // If there's a timer, cancel it\n if (timeout) {\n window.cancelAnimationFrame(timeout);\n }\n\n // Setup debounce callback\n timeout = window.requestAnimationFrame(publicAPIs.detect);\n };\n\n /**\n * Update content sorting on resize\n * Debounced for performance\n */\n var resizeHandler = function (event) {\n // If there's a timer, cancel it\n if (timeout) {\n window.cancelAnimationFrame(timeout);\n }\n\n // Setup debounce callback\n timeout = window.requestAnimationFrame(function () {\n sortContents(contents);\n publicAPIs.detect();\n });\n };\n\n /**\n * Destroy the current instantiation\n */\n publicAPIs.destroy = function () {\n // Undo DOM changes\n if (current) {\n deactivate(current, settings);\n }\n\n // Remove event listeners\n window.removeEventListener(\"scroll\", scrollHandler, false);\n if (settings.reflow) {\n window.removeEventListener(\"resize\", resizeHandler, false);\n }\n\n // Reset variables\n contents = null;\n navItems = null;\n current = null;\n timeout = null;\n settings = null;\n };\n\n /**\n * Initialize the current instantiation\n */\n var init = function () {\n // Merge user options into defaults\n settings = extend(defaults, options || {});\n\n // Setup variables based on the current DOM\n publicAPIs.setup();\n\n // Find the currently active content\n publicAPIs.detect();\n\n // Setup event listeners\n window.addEventListener(\"scroll\", scrollHandler, false);\n if (settings.reflow) {\n window.addEventListener(\"resize\", resizeHandler, false);\n }\n };\n\n //\n // Initialize and return the public APIs\n //\n\n init();\n return publicAPIs;\n };\n\n //\n // Return the Constructor\n //\n\n return Constructor;\n },\n);\n","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\t// no module.id needed\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.g = (function() {\n\tif (typeof globalThis === 'object') return globalThis;\n\ttry {\n\t\treturn this || new Function('return this')();\n\t} catch (e) {\n\t\tif (typeof window === 'object') return window;\n\t}\n})();","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","import Gumshoe from \"./gumshoe-patched.js\";\n\n////////////////////////////////////////////////////////////////////////////////\n// Scroll Handling\n////////////////////////////////////////////////////////////////////////////////\nvar tocScroll = null;\nvar header = null;\nvar lastScrollTop = window.pageYOffset || document.documentElement.scrollTop;\nconst GO_TO_TOP_OFFSET = 64;\n\nfunction scrollHandlerForHeader() {\n if (Math.floor(header.getBoundingClientRect().top) == 0) {\n header.classList.add(\"scrolled\");\n } else {\n header.classList.remove(\"scrolled\");\n }\n}\n\nfunction scrollHandlerForBackToTop(positionY) {\n if (positionY < GO_TO_TOP_OFFSET) {\n document.documentElement.classList.remove(\"show-back-to-top\");\n } else {\n if (positionY < lastScrollTop) {\n document.documentElement.classList.add(\"show-back-to-top\");\n } else if (positionY > lastScrollTop) {\n document.documentElement.classList.remove(\"show-back-to-top\");\n }\n }\n lastScrollTop = positionY;\n}\n\nfunction scrollHandlerForTOC(positionY) {\n if (tocScroll === null) {\n return;\n }\n\n // top of page.\n if (positionY == 0) {\n tocScroll.scrollTo(0, 0);\n } else if (\n // bottom of page.\n Math.ceil(positionY) >=\n Math.floor(document.documentElement.scrollHeight - window.innerHeight)\n ) {\n tocScroll.scrollTo(0, tocScroll.scrollHeight);\n } else {\n // somewhere in the middle.\n const current = document.querySelector(\".scroll-current\");\n if (current == null) {\n return;\n }\n\n // https://github.com/pypa/pip/issues/9159 This breaks scroll behaviours.\n // // scroll the currently \"active\" heading in toc, into view.\n // const rect = current.getBoundingClientRect();\n // if (0 > rect.top) {\n // current.scrollIntoView(true); // the argument is \"alignTop\"\n // } else if (rect.bottom > window.innerHeight) {\n // current.scrollIntoView(false);\n // }\n }\n}\n\nfunction scrollHandler(positionY) {\n scrollHandlerForHeader();\n scrollHandlerForBackToTop(positionY);\n scrollHandlerForTOC(positionY);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Theme Toggle\n////////////////////////////////////////////////////////////////////////////////\nfunction setTheme(mode) {\n if (mode !== \"light\" && mode !== \"dark\" && mode !== \"auto\") {\n console.error(`Got invalid theme mode: ${mode}. Resetting to auto.`);\n mode = \"auto\";\n }\n\n document.body.dataset.theme = mode;\n localStorage.setItem(\"theme\", mode);\n console.log(`Changed to ${mode} mode.`);\n}\n\nfunction cycleThemeOnce() {\n const currentTheme = localStorage.getItem(\"theme\") || \"auto\";\n const prefersDark = window.matchMedia(\"(prefers-color-scheme: dark)\").matches;\n\n if (prefersDark) {\n // Auto (dark) -> Light -> Dark\n if (currentTheme === \"auto\") {\n setTheme(\"light\");\n } else if (currentTheme == \"light\") {\n setTheme(\"dark\");\n } else {\n setTheme(\"auto\");\n }\n } else {\n // Auto (light) -> Dark -> Light\n if (currentTheme === \"auto\") {\n setTheme(\"dark\");\n } else if (currentTheme == \"dark\") {\n setTheme(\"light\");\n } else {\n setTheme(\"auto\");\n }\n }\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Setup\n////////////////////////////////////////////////////////////////////////////////\nfunction setupScrollHandler() {\n // Taken from https://developer.mozilla.org/en-US/docs/Web/API/Document/scroll_event\n let last_known_scroll_position = 0;\n let ticking = false;\n\n window.addEventListener(\"scroll\", function (e) {\n last_known_scroll_position = window.scrollY;\n\n if (!ticking) {\n window.requestAnimationFrame(function () {\n scrollHandler(last_known_scroll_position);\n ticking = false;\n });\n\n ticking = true;\n }\n });\n window.scroll();\n}\n\nfunction setupScrollSpy() {\n if (tocScroll === null) {\n return;\n }\n\n // Scrollspy -- highlight table on contents, based on scroll\n new Gumshoe(\".toc-tree a\", {\n reflow: true,\n recursive: true,\n navClass: \"scroll-current\",\n offset: () => {\n let rem = parseFloat(getComputedStyle(document.documentElement).fontSize);\n return header.getBoundingClientRect().height + 0.5 * rem + 1;\n },\n });\n}\n\nfunction setupTheme() {\n // Attach event handlers for toggling themes\n const buttons = document.getElementsByClassName(\"theme-toggle\");\n Array.from(buttons).forEach((btn) => {\n btn.addEventListener(\"click\", cycleThemeOnce);\n });\n}\n\nfunction setup() {\n setupTheme();\n setupScrollHandler();\n setupScrollSpy();\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Main entrypoint\n////////////////////////////////////////////////////////////////////////////////\nfunction main() {\n document.body.parentNode.classList.remove(\"no-js\");\n\n header = document.querySelector(\"header\");\n tocScroll = document.querySelector(\".toc-scroll\");\n\n setup();\n}\n\ndocument.addEventListener(\"DOMContentLoaded\", main);\n"],"names":["root","g","window","this","defaults","navClass","contentClass","nested","nestedClass","offset","reflow","events","emitEvent","type","elem","detail","settings","event","CustomEvent","bubbles","cancelable","dispatchEvent","getOffsetTop","location","offsetParent","offsetTop","sortContents","contents","sort","item1","item2","content","isInView","bottom","bounds","getBoundingClientRect","parseFloat","getOffset","parseInt","innerHeight","document","documentElement","clientHeight","top","isAtBottom","Math","ceil","pageYOffset","max","body","scrollHeight","offsetHeight","getActive","last","length","item","useLastItem","i","deactivateNested","nav","parentNode","li","closest","classList","remove","deactivate","items","link","activateNested","add","selector","options","navItems","current","timeout","publicAPIs","querySelectorAll","Array","prototype","forEach","call","getElementById","decodeURIComponent","hash","substr","push","active","activate","scrollHandler","cancelAnimationFrame","requestAnimationFrame","detect","resizeHandler","destroy","removeEventListener","merged","arguments","obj","key","hasOwnProperty","extend","setup","addEventListener","factory","__webpack_module_cache__","__webpack_require__","moduleId","cachedModule","undefined","exports","module","__webpack_modules__","n","getter","__esModule","d","a","definition","o","Object","defineProperty","enumerable","get","globalThis","Function","e","prop","tocScroll","header","lastScrollTop","scrollTop","GO_TO_TOP_OFFSET","cycleThemeOnce","currentTheme","localStorage","getItem","mode","matchMedia","matches","console","error","dataset","theme","setItem","log","buttons","getElementsByClassName","from","btn","setupTheme","last_known_scroll_position","ticking","scrollY","positionY","floor","scrollHandlerForBackToTop","scrollTo","querySelector","scrollHandlerForTOC","scroll","setupScrollHandler","recursive","rem","getComputedStyle","fontSize","height"],"sourceRoot":""} \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 00000000..7918c3fa --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,574 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + `Search finished, found ${resultCount} page(s) matching the search query.` + ); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent !== undefined) return docContent.textContent; + console.warn( + "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + /** + * execute search (requires search index to be loaded) + */ + query: (query) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + // array of [docname, title, anchor, descr, score, filename] + let results = []; + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + results.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id] of foundEntries) { + let score = Math.round(100 * queryLower.length / entry.length) + results.push([ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // lookup as object + objectTerms.forEach((term) => + results.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); + + // now sort the results by score (in opposite order of appearance, since the + // display function below uses pop() to retrieve items) and then + // alphabetically + results.sort((a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; + }); + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + results = results.reverse(); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord) && !terms[word]) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord) && !titleTerms[word]) + arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); + }); + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) + fileMap.get(file).push(word); + else fileMap.set(file, [word]); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords) => { + const text = Search.htmlToText(htmlText); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/skeleton.css b/_static/skeleton.css new file mode 100644 index 00000000..467c878c --- /dev/null +++ b/_static/skeleton.css @@ -0,0 +1,296 @@ +/* Some sane resets. */ +html { + height: 100%; +} + +body { + margin: 0; + min-height: 100%; +} + +/* All the flexbox magic! */ +body, +.sb-announcement, +.sb-content, +.sb-main, +.sb-container, +.sb-container__inner, +.sb-article-container, +.sb-footer-content, +.sb-header, +.sb-header-secondary, +.sb-footer { + display: flex; +} + +/* These order things vertically */ +body, +.sb-main, +.sb-article-container { + flex-direction: column; +} + +/* Put elements in the center */ +.sb-header, +.sb-header-secondary, +.sb-container, +.sb-content, +.sb-footer, +.sb-footer-content { + justify-content: center; +} +/* Put elements at the ends */ +.sb-article-container { + justify-content: space-between; +} + +/* These elements grow. */ +.sb-main, +.sb-content, +.sb-container, +article { + flex-grow: 1; +} + +/* Because padding making this wider is not fun */ +article { + box-sizing: border-box; +} + +/* The announcements element should never be wider than the page. */ +.sb-announcement { + max-width: 100%; +} + +.sb-sidebar-primary, +.sb-sidebar-secondary { + flex-shrink: 0; + width: 17rem; +} + +.sb-announcement__inner { + justify-content: center; + + box-sizing: border-box; + height: 3rem; + + overflow-x: auto; + white-space: nowrap; +} + +/* Sidebars, with checkbox-based toggle */ +.sb-sidebar-primary, +.sb-sidebar-secondary { + position: fixed; + height: 100%; + top: 0; +} + +.sb-sidebar-primary { + left: -17rem; + transition: left 250ms ease-in-out; +} +.sb-sidebar-secondary { + right: -17rem; + transition: right 250ms ease-in-out; +} + +.sb-sidebar-toggle { + display: none; +} +.sb-sidebar-overlay { + position: fixed; + top: 0; + width: 0; + height: 0; + + transition: width 0ms ease 250ms, height 0ms ease 250ms, opacity 250ms ease; + + opacity: 0; + background-color: rgba(0, 0, 0, 0.54); +} + +#sb-sidebar-toggle--primary:checked + ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--primary"], +#sb-sidebar-toggle--secondary:checked + ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--secondary"] { + width: 100%; + height: 100%; + opacity: 1; + transition: width 0ms ease, height 0ms ease, opacity 250ms ease; +} + +#sb-sidebar-toggle--primary:checked ~ .sb-container .sb-sidebar-primary { + left: 0; +} +#sb-sidebar-toggle--secondary:checked ~ .sb-container .sb-sidebar-secondary { + right: 0; +} + +/* Full-width mode */ +.drop-secondary-sidebar-for-full-width-content + .hide-when-secondary-sidebar-shown { + display: none !important; +} +.drop-secondary-sidebar-for-full-width-content .sb-sidebar-secondary { + display: none !important; +} + +/* Mobile views */ +.sb-page-width { + width: 100%; +} + +.sb-article-container, +.sb-footer-content__inner, +.drop-secondary-sidebar-for-full-width-content .sb-article, +.drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 100vw; +} + +.sb-article, +.match-content-width { + padding: 0 1rem; + box-sizing: border-box; +} + +@media (min-width: 32rem) { + .sb-article, + .match-content-width { + padding: 0 2rem; + } +} + +/* Tablet views */ +@media (min-width: 42rem) { + .sb-article-container { + width: auto; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 42rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} +@media (min-width: 46rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 46rem; + } + .sb-article, + .match-content-width { + width: 46rem; + } +} +@media (min-width: 50rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 50rem; + } + .sb-article, + .match-content-width { + width: 50rem; + } +} + +/* Tablet views */ +@media (min-width: 59rem) { + .sb-sidebar-secondary { + position: static; + } + .hide-when-secondary-sidebar-shown { + display: none !important; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 59rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} +@media (min-width: 63rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 63rem; + } + .sb-article, + .match-content-width { + width: 46rem; + } +} +@media (min-width: 67rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } + .sb-article, + .match-content-width { + width: 50rem; + } +} + +/* Desktop views */ +@media (min-width: 76rem) { + .sb-sidebar-primary { + position: static; + } + .hide-when-primary-sidebar-shown { + display: none !important; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 59rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} + +/* Full desktop views */ +@media (min-width: 80rem) { + .sb-article, + .match-content-width { + width: 46rem; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 63rem; + } +} + +@media (min-width: 84rem) { + .sb-article, + .match-content-width { + width: 50rem; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } +} + +@media (min-width: 88rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } + .sb-page-width { + width: 88rem; + } +} diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 00000000..8a96c69a --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/styles/furo-extensions.css b/_static/styles/furo-extensions.css new file mode 100644 index 00000000..bc447f22 --- /dev/null +++ b/_static/styles/furo-extensions.css @@ -0,0 +1,2 @@ +#furo-sidebar-ad-placement{padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)}#furo-sidebar-ad-placement .ethical-sidebar{background:var(--color-background-secondary);border:none;box-shadow:none}#furo-sidebar-ad-placement .ethical-sidebar:hover{background:var(--color-background-hover)}#furo-sidebar-ad-placement .ethical-sidebar a{color:var(--color-foreground-primary)}#furo-sidebar-ad-placement .ethical-callout a{color:var(--color-foreground-secondary)!important}#furo-readthedocs-versions{background:transparent;display:block;position:static;width:100%}#furo-readthedocs-versions .rst-versions{background:#1a1c1e}#furo-readthedocs-versions .rst-current-version{background:var(--color-sidebar-item-background);cursor:unset}#furo-readthedocs-versions .rst-current-version:hover{background:var(--color-sidebar-item-background)}#furo-readthedocs-versions .rst-current-version .fa-book{color:var(--color-foreground-primary)}#furo-readthedocs-versions>.rst-other-versions{padding:0}#furo-readthedocs-versions>.rst-other-versions small{opacity:1}#furo-readthedocs-versions .injected .rst-versions{position:unset}#furo-readthedocs-versions:focus-within,#furo-readthedocs-versions:hover{box-shadow:0 0 0 1px var(--color-sidebar-background-border)}#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:hover .rst-current-version{background:#1a1c1e;font-size:inherit;height:auto;line-height:inherit;padding:12px;text-align:right}#furo-readthedocs-versions:focus-within .rst-current-version .fa-book,#furo-readthedocs-versions:hover .rst-current-version .fa-book{color:#fff;float:left}#furo-readthedocs-versions:focus-within .fa-caret-down,#furo-readthedocs-versions:hover .fa-caret-down{display:none}#furo-readthedocs-versions:focus-within .injected,#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:focus-within .rst-other-versions,#furo-readthedocs-versions:hover .injected,#furo-readthedocs-versions:hover .rst-current-version,#furo-readthedocs-versions:hover .rst-other-versions{display:block}#furo-readthedocs-versions:focus-within>.rst-current-version,#furo-readthedocs-versions:hover>.rst-current-version{display:none}.highlight:hover button.copybtn{color:var(--color-code-foreground)}.highlight button.copybtn{align-items:center;background-color:var(--color-code-background);border:none;color:var(--color-background-item);cursor:pointer;height:1.25em;opacity:1;right:.5rem;top:.625rem;transition:color .3s,opacity .3s;width:1.25em}.highlight button.copybtn:hover{background-color:var(--color-code-background);color:var(--color-brand-content)}.highlight button.copybtn:after{background-color:transparent;color:var(--color-code-foreground);display:none}.highlight button.copybtn.success{color:#22863a;transition:color 0ms}.highlight button.copybtn.success:after{display:block}.highlight button.copybtn svg{padding:0}body{--sd-color-primary:var(--color-brand-primary);--sd-color-primary-highlight:var(--color-brand-content);--sd-color-primary-text:var(--color-background-primary);--sd-color-shadow:rgba(0,0,0,.05);--sd-color-card-border:var(--color-card-border);--sd-color-card-border-hover:var(--color-brand-content);--sd-color-card-background:var(--color-card-background);--sd-color-card-text:var(--color-foreground-primary);--sd-color-card-header:var(--color-card-marginals-background);--sd-color-card-footer:var(--color-card-marginals-background);--sd-color-tabs-label-active:var(--color-brand-content);--sd-color-tabs-label-hover:var(--color-foreground-muted);--sd-color-tabs-label-inactive:var(--color-foreground-muted);--sd-color-tabs-underline-active:var(--color-brand-content);--sd-color-tabs-underline-hover:var(--color-foreground-border);--sd-color-tabs-underline-inactive:var(--color-background-border);--sd-color-tabs-overline:var(--color-background-border);--sd-color-tabs-underline:var(--color-background-border)}.sd-tab-content{box-shadow:0 -2px var(--sd-color-tabs-overline),0 1px var(--sd-color-tabs-underline)}.sd-card{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)}.sd-shadow-sm{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-md{box-shadow:0 .3rem .75rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-lg{box-shadow:0 .6rem 1.5rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-card-hover:hover{transform:none}.sd-cards-carousel{gap:.25rem;padding:.25rem}body{--tabs--label-text:var(--color-foreground-muted);--tabs--label-text--hover:var(--color-foreground-muted);--tabs--label-text--active:var(--color-brand-content);--tabs--label-text--active--hover:var(--color-brand-content);--tabs--label-background:transparent;--tabs--label-background--hover:transparent;--tabs--label-background--active:transparent;--tabs--label-background--active--hover:transparent;--tabs--padding-x:0.25em;--tabs--margin-x:1em;--tabs--border:var(--color-background-border);--tabs--label-border:transparent;--tabs--label-border--hover:var(--color-foreground-muted);--tabs--label-border--active:var(--color-brand-content);--tabs--label-border--active--hover:var(--color-brand-content)}[role=main] .container{max-width:none;padding-left:0;padding-right:0}.shadow.docutils{border:none;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)!important}.sphinx-bs .card{background-color:var(--color-background-secondary);color:var(--color-foreground)} +/*# sourceMappingURL=furo-extensions.css.map*/ \ No newline at end of file diff --git a/_static/styles/furo-extensions.css.map b/_static/styles/furo-extensions.css.map new file mode 100644 index 00000000..9ba5637f --- /dev/null +++ b/_static/styles/furo-extensions.css.map @@ -0,0 +1 @@ +{"version":3,"file":"styles/furo-extensions.css","mappings":"AAGA,2BACE,oFACA,4CAKE,6CAHA,YACA,eAEA,CACA,kDACE,yCAEF,8CACE,sCAEJ,8CACE,kDAEJ,2BAGE,uBACA,cAHA,gBACA,UAEA,CAGA,yCACE,mBAEF,gDAEE,gDADA,YACA,CACA,sDACE,gDACF,yDACE,sCAEJ,+CACE,UACA,qDACE,UAGF,mDACE,eAEJ,yEAEE,4DAEA,mHASE,mBAPA,kBAEA,YADA,oBAGA,aADA,gBAIA,CAEA,qIAEE,WADA,UACA,CAEJ,uGACE,aAEF,iUAGE,cAEF,mHACE,aC1EJ,gCACE,mCAEF,0BAKE,mBAUA,8CACA,YAFA,mCAKA,eAZA,cALA,UASA,YADA,YAYA,iCAdA,YAcA,CAEA,gCAEE,8CADA,gCACA,CAEF,gCAGE,6BADA,mCADA,YAEA,CAEF,kCAEE,cADA,oBACA,CACA,wCACE,cAEJ,8BACE,UC5CN,KAEE,6CAA8C,CAC9C,uDAAwD,CACxD,uDAAwD,CAGxD,iCAAsC,CAGtC,+CAAgD,CAChD,uDAAwD,CACxD,uDAAwD,CACxD,oDAAqD,CACrD,6DAA8D,CAC9D,6DAA8D,CAG9D,uDAAwD,CACxD,yDAA0D,CAC1D,4DAA6D,CAC7D,2DAA4D,CAC5D,8DAA+D,CAC/D,iEAAkE,CAClE,uDAAwD,CACxD,wDAAyD,CAG3D,gBACE,qFAGF,SACE,6EAEF,cACE,uFAEF,cACE,uFAEF,cACE,uFAGF,qBACE,eAEF,mBACE,WACA,eChDF,KACE,gDAAiD,CACjD,uDAAwD,CACxD,qDAAsD,CACtD,4DAA6D,CAC7D,oCAAqC,CACrC,2CAA4C,CAC5C,4CAA6C,CAC7C,mDAAoD,CACpD,wBAAyB,CACzB,oBAAqB,CACrB,6CAA8C,CAC9C,gCAAiC,CACjC,yDAA0D,CAC1D,uDAAwD,CACxD,8DAA+D,CCbjE,uBACE,eACA,eACA,gBAGF,iBACE,YACA,+EAGF,iBACE,mDACA","sources":["webpack:///./src/furo/assets/styles/extensions/_readthedocs.sass","webpack:///./src/furo/assets/styles/extensions/_copybutton.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-design.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-inline-tabs.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-panels.sass"],"sourcesContent":["// This file contains the styles used for tweaking how ReadTheDoc's embedded\n// contents would show up inside the theme.\n\n#furo-sidebar-ad-placement\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n .ethical-sidebar\n // Remove the border and box-shadow.\n border: none\n box-shadow: none\n // Manage the background colors.\n background: var(--color-background-secondary)\n &:hover\n background: var(--color-background-hover)\n // Ensure the text is legible.\n a\n color: var(--color-foreground-primary)\n\n .ethical-callout a\n color: var(--color-foreground-secondary) !important\n\n#furo-readthedocs-versions\n position: static\n width: 100%\n background: transparent\n display: block\n\n // Make the background color fit with the theme's aesthetic.\n .rst-versions\n background: rgb(26, 28, 30)\n\n .rst-current-version\n cursor: unset\n background: var(--color-sidebar-item-background)\n &:hover\n background: var(--color-sidebar-item-background)\n .fa-book\n color: var(--color-foreground-primary)\n\n > .rst-other-versions\n padding: 0\n small\n opacity: 1\n\n .injected\n .rst-versions\n position: unset\n\n &:hover,\n &:focus-within\n box-shadow: 0 0 0 1px var(--color-sidebar-background-border)\n\n .rst-current-version\n // Undo the tweaks done in RTD's CSS\n font-size: inherit\n line-height: inherit\n height: auto\n text-align: right\n padding: 12px\n\n // Match the rest of the body\n background: #1a1c1e\n\n .fa-book\n float: left\n color: white\n\n .fa-caret-down\n display: none\n\n .rst-current-version,\n .rst-other-versions,\n .injected\n display: block\n\n > .rst-current-version\n display: none\n",".highlight\n &:hover button.copybtn\n color: var(--color-code-foreground)\n\n button.copybtn\n // Make it visible\n opacity: 1\n\n // Align things correctly\n align-items: center\n\n height: 1.25em\n width: 1.25em\n\n top: 0.625rem // $code-spacing-vertical\n right: 0.5rem\n\n // Make it look better\n color: var(--color-background-item)\n background-color: var(--color-code-background)\n border: none\n\n // Change to cursor to make it obvious that you can click on it\n cursor: pointer\n\n // Transition smoothly, for aesthetics\n transition: color 300ms, opacity 300ms\n\n &:hover\n color: var(--color-brand-content)\n background-color: var(--color-code-background)\n\n &::after\n display: none\n color: var(--color-code-foreground)\n background-color: transparent\n\n &.success\n transition: color 0ms\n color: #22863a\n &::after\n display: block\n\n svg\n padding: 0\n","body\n // Colors\n --sd-color-primary: var(--color-brand-primary)\n --sd-color-primary-highlight: var(--color-brand-content)\n --sd-color-primary-text: var(--color-background-primary)\n\n // Shadows\n --sd-color-shadow: rgba(0, 0, 0, 0.05)\n\n // Cards\n --sd-color-card-border: var(--color-card-border)\n --sd-color-card-border-hover: var(--color-brand-content)\n --sd-color-card-background: var(--color-card-background)\n --sd-color-card-text: var(--color-foreground-primary)\n --sd-color-card-header: var(--color-card-marginals-background)\n --sd-color-card-footer: var(--color-card-marginals-background)\n\n // Tabs\n --sd-color-tabs-label-active: var(--color-brand-content)\n --sd-color-tabs-label-hover: var(--color-foreground-muted)\n --sd-color-tabs-label-inactive: var(--color-foreground-muted)\n --sd-color-tabs-underline-active: var(--color-brand-content)\n --sd-color-tabs-underline-hover: var(--color-foreground-border)\n --sd-color-tabs-underline-inactive: var(--color-background-border)\n --sd-color-tabs-overline: var(--color-background-border)\n --sd-color-tabs-underline: var(--color-background-border)\n\n// Tabs\n.sd-tab-content\n box-shadow: 0 -2px var(--sd-color-tabs-overline), 0 1px var(--sd-color-tabs-underline)\n\n// Shadows\n.sd-card // Have a shadow by default\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n.sd-shadow-sm\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-md\n box-shadow: 0 0.3rem 0.75rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-lg\n box-shadow: 0 0.6rem 1.5rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Cards\n.sd-card-hover:hover // Don't change scale on hover\n transform: none\n\n.sd-cards-carousel // Have a bit of gap in the carousel by default\n gap: 0.25rem\n padding: 0.25rem\n","// This file contains styles to tweak sphinx-inline-tabs to work well with Furo.\n\nbody\n --tabs--label-text: var(--color-foreground-muted)\n --tabs--label-text--hover: var(--color-foreground-muted)\n --tabs--label-text--active: var(--color-brand-content)\n --tabs--label-text--active--hover: var(--color-brand-content)\n --tabs--label-background: transparent\n --tabs--label-background--hover: transparent\n --tabs--label-background--active: transparent\n --tabs--label-background--active--hover: transparent\n --tabs--padding-x: 0.25em\n --tabs--margin-x: 1em\n --tabs--border: var(--color-background-border)\n --tabs--label-border: transparent\n --tabs--label-border--hover: var(--color-foreground-muted)\n --tabs--label-border--active: var(--color-brand-content)\n --tabs--label-border--active--hover: var(--color-brand-content)\n","// This file contains styles to tweak sphinx-panels to work well with Furo.\n\n// sphinx-panels includes Bootstrap 4, which uses .container which can conflict\n// with docutils' `.. container::` directive.\n[role=\"main\"] .container\n max-width: initial\n padding-left: initial\n padding-right: initial\n\n// Make the panels look nicer!\n.shadow.docutils\n border: none\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Make panel colors respond to dark mode\n.sphinx-bs .card\n background-color: var(--color-background-secondary)\n color: var(--color-foreground)\n"],"names":[],"sourceRoot":""} \ No newline at end of file diff --git a/_static/styles/furo.css b/_static/styles/furo.css new file mode 100644 index 00000000..3d29a218 --- /dev/null +++ b/_static/styles/furo.css @@ -0,0 +1,2 @@ +/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{-webkit-text-size-adjust:100%;line-height:1.15}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}[hidden],template{display:none}@media print{.content-icon-container,.headerlink,.mobile-header,.related-pages{display:none!important}.highlight{border:.1pt solid var(--color-foreground-border)}a,blockquote,dl,ol,pre,table,ul{page-break-inside:avoid}caption,figure,h1,h2,h3,h4,h5,h6,img{page-break-after:avoid;page-break-inside:avoid}dl,ol,ul{page-break-before:avoid}}.visually-hidden{clip:rect(0,0,0,0)!important;border:0!important;height:1px!important;margin:-1px!important;overflow:hidden!important;padding:0!important;position:absolute!important;white-space:nowrap!important;width:1px!important}:-moz-focusring{outline:auto}body{--font-stack:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;--font-stack--monospace:"SFMono-Regular",Menlo,Consolas,Monaco,Liberation Mono,Lucida Console,monospace;--font-size--normal:100%;--font-size--small:87.5%;--font-size--small--2:81.25%;--font-size--small--3:75%;--font-size--small--4:62.5%;--sidebar-caption-font-size:var(--font-size--small--2);--sidebar-item-font-size:var(--font-size--small);--sidebar-search-input-font-size:var(--font-size--small);--toc-font-size:var(--font-size--small--3);--toc-font-size--mobile:var(--font-size--normal);--toc-title-font-size:var(--font-size--small--4);--admonition-font-size:0.8125rem;--admonition-title-font-size:0.8125rem;--code-font-size:var(--font-size--small--2);--api-font-size:var(--font-size--small);--header-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*4);--header-padding:0.5rem;--sidebar-tree-space-above:1.5rem;--sidebar-caption-space-above:1rem;--sidebar-item-line-height:1rem;--sidebar-item-spacing-vertical:0.5rem;--sidebar-item-spacing-horizontal:1rem;--sidebar-item-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*2);--sidebar-expander-width:var(--sidebar-item-height);--sidebar-search-space-above:0.5rem;--sidebar-search-input-spacing-vertical:0.5rem;--sidebar-search-input-spacing-horizontal:0.5rem;--sidebar-search-input-height:1rem;--sidebar-search-icon-size:var(--sidebar-search-input-height);--toc-title-padding:0.25rem 0;--toc-spacing-vertical:1.5rem;--toc-spacing-horizontal:1.5rem;--toc-item-spacing-vertical:0.4rem;--toc-item-spacing-horizontal:1rem;--icon-search:url('data:image/svg+xml;charset=utf-8,');--icon-pencil:url('data:image/svg+xml;charset=utf-8,');--icon-abstract:url('data:image/svg+xml;charset=utf-8,');--icon-info:url('data:image/svg+xml;charset=utf-8,');--icon-flame:url('data:image/svg+xml;charset=utf-8,');--icon-question:url('data:image/svg+xml;charset=utf-8,');--icon-warning:url('data:image/svg+xml;charset=utf-8,');--icon-failure:url('data:image/svg+xml;charset=utf-8,');--icon-spark:url('data:image/svg+xml;charset=utf-8,');--color-admonition-title--caution:#ff9100;--color-admonition-title-background--caution:rgba(255,145,0,.2);--color-admonition-title--warning:#ff9100;--color-admonition-title-background--warning:rgba(255,145,0,.2);--color-admonition-title--danger:#ff5252;--color-admonition-title-background--danger:rgba(255,82,82,.2);--color-admonition-title--attention:#ff5252;--color-admonition-title-background--attention:rgba(255,82,82,.2);--color-admonition-title--error:#ff5252;--color-admonition-title-background--error:rgba(255,82,82,.2);--color-admonition-title--hint:#00c852;--color-admonition-title-background--hint:rgba(0,200,82,.2);--color-admonition-title--tip:#00c852;--color-admonition-title-background--tip:rgba(0,200,82,.2);--color-admonition-title--important:#00bfa5;--color-admonition-title-background--important:rgba(0,191,165,.2);--color-admonition-title--note:#00b0ff;--color-admonition-title-background--note:rgba(0,176,255,.2);--color-admonition-title--seealso:#448aff;--color-admonition-title-background--seealso:rgba(68,138,255,.2);--color-admonition-title--admonition-todo:grey;--color-admonition-title-background--admonition-todo:hsla(0,0%,50%,.2);--color-admonition-title:#651fff;--color-admonition-title-background:rgba(101,31,255,.2);--icon-admonition-default:var(--icon-abstract);--color-topic-title:#14b8a6;--color-topic-title-background:rgba(20,184,166,.2);--icon-topic-default:var(--icon-pencil);--color-problematic:#b30000;--color-foreground-primary:#000;--color-foreground-secondary:#5a5c63;--color-foreground-muted:#646776;--color-foreground-border:#878787;--color-background-primary:#fff;--color-background-secondary:#f8f9fb;--color-background-hover:#efeff4;--color-background-hover--transparent:#efeff400;--color-background-border:#eeebee;--color-background-item:#ccc;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#2962ff;--color-brand-content:#2a5adf;--color-api-background:var(--color-background-hover--transparent);--color-api-background-hover:var(--color-background-hover);--color-api-overall:var(--color-foreground-secondary);--color-api-name:var(--color-problematic);--color-api-pre-name:var(--color-problematic);--color-api-paren:var(--color-foreground-secondary);--color-api-keyword:var(--color-foreground-primary);--color-highlight-on-target:#ffc;--color-inline-code-background:var(--color-background-secondary);--color-highlighted-background:#def;--color-highlighted-text:var(--color-foreground-primary);--color-guilabel-background:#ddeeff80;--color-guilabel-border:#bedaf580;--color-guilabel-text:var(--color-foreground-primary);--color-admonition-background:transparent;--color-table-header-background:var(--color-background-secondary);--color-table-border:var(--color-background-border);--color-card-border:var(--color-background-secondary);--color-card-background:transparent;--color-card-marginals-background:var(--color-background-secondary);--color-header-background:var(--color-background-primary);--color-header-border:var(--color-background-border);--color-header-text:var(--color-foreground-primary);--color-sidebar-background:var(--color-background-secondary);--color-sidebar-background-border:var(--color-background-border);--color-sidebar-brand-text:var(--color-foreground-primary);--color-sidebar-caption-text:var(--color-foreground-muted);--color-sidebar-link-text:var(--color-foreground-secondary);--color-sidebar-link-text--top-level:var(--color-brand-primary);--color-sidebar-item-background:var(--color-sidebar-background);--color-sidebar-item-background--current:var( --color-sidebar-item-background );--color-sidebar-item-background--hover:linear-gradient(90deg,var(--color-background-hover--transparent) 0%,var(--color-background-hover) var(--sidebar-item-spacing-horizontal),var(--color-background-hover) 100%);--color-sidebar-item-expander-background:transparent;--color-sidebar-item-expander-background--hover:var( --color-background-hover );--color-sidebar-search-text:var(--color-foreground-primary);--color-sidebar-search-background:var(--color-background-secondary);--color-sidebar-search-background--focus:var(--color-background-primary);--color-sidebar-search-border:var(--color-background-border);--color-sidebar-search-icon:var(--color-foreground-muted);--color-toc-background:var(--color-background-primary);--color-toc-title-text:var(--color-foreground-muted);--color-toc-item-text:var(--color-foreground-secondary);--color-toc-item-text--hover:var(--color-foreground-primary);--color-toc-item-text--active:var(--color-brand-primary);--color-content-foreground:var(--color-foreground-primary);--color-content-background:transparent;--color-link:var(--color-brand-content);--color-link--hover:var(--color-brand-content);--color-link-underline:var(--color-background-border);--color-link-underline--hover:var(--color-foreground-border)}.only-light{display:block!important}html body .only-dark{display:none!important}@media not print{body[data-theme=dark]{--color-problematic:#ee5151;--color-foreground-primary:#ffffffcc;--color-foreground-secondary:#9ca0a5;--color-foreground-muted:#81868d;--color-foreground-border:#666;--color-background-primary:#131416;--color-background-secondary:#1a1c1e;--color-background-hover:#1e2124;--color-background-hover--transparent:#1e212400;--color-background-border:#303335;--color-background-item:#444;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#2b8cee;--color-brand-content:#368ce2;--color-highlighted-background:#083563;--color-guilabel-background:#08356380;--color-guilabel-border:#13395f80;--color-api-keyword:var(--color-foreground-secondary);--color-highlight-on-target:#330;--color-admonition-background:#18181a;--color-card-border:var(--color-background-secondary);--color-card-background:#18181a;--color-card-marginals-background:var(--color-background-hover)}html body[data-theme=dark] .only-light{display:none!important}body[data-theme=dark] .only-dark{display:block!important}@media(prefers-color-scheme:dark){body:not([data-theme=light]){--color-problematic:#ee5151;--color-foreground-primary:#ffffffcc;--color-foreground-secondary:#9ca0a5;--color-foreground-muted:#81868d;--color-foreground-border:#666;--color-background-primary:#131416;--color-background-secondary:#1a1c1e;--color-background-hover:#1e2124;--color-background-hover--transparent:#1e212400;--color-background-border:#303335;--color-background-item:#444;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#2b8cee;--color-brand-content:#368ce2;--color-highlighted-background:#083563;--color-guilabel-background:#08356380;--color-guilabel-border:#13395f80;--color-api-keyword:var(--color-foreground-secondary);--color-highlight-on-target:#330;--color-admonition-background:#18181a;--color-card-border:var(--color-background-secondary);--color-card-background:#18181a;--color-card-marginals-background:var(--color-background-hover)}html body:not([data-theme=light]) .only-light{display:none!important}body:not([data-theme=light]) .only-dark{display:block!important}}}body[data-theme=auto] .theme-toggle svg.theme-icon-when-auto,body[data-theme=dark] .theme-toggle svg.theme-icon-when-dark,body[data-theme=light] .theme-toggle svg.theme-icon-when-light{display:block}body{font-family:var(--font-stack)}code,kbd,pre,samp{font-family:var(--font-stack--monospace)}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}article{line-height:1.5}h1,h2,h3,h4,h5,h6{border-radius:.5rem;font-weight:700;line-height:1.25;margin:.5rem -.5rem;padding-left:.5rem;padding-right:.5rem}h1+p,h2+p,h3+p,h4+p,h5+p,h6+p{margin-top:0}h1{font-size:2.5em;margin-bottom:1rem}h1,h2{margin-top:1.75rem}h2{font-size:2em}h3{font-size:1.5em}h4{font-size:1.25em}h5{font-size:1.125em}h6{font-size:1em}small{font-size:80%;opacity:75%}p{margin-bottom:.75rem;margin-top:.5rem}hr.docutils{background-color:var(--color-background-border);border:0;height:1px;margin:2rem 0;padding:0}.centered{text-align:center}a{color:var(--color-link);text-decoration:underline;text-decoration-color:var(--color-link-underline)}a:hover{color:var(--color-link--hover);text-decoration-color:var(--color-link-underline--hover)}a.muted-link{color:inherit}a.muted-link:hover{color:var(--color-link);text-decoration-color:var(--color-link-underline--hover)}html{overflow-x:hidden;overflow-y:scroll;scroll-behavior:smooth}.sidebar-scroll,.toc-scroll,article[role=main] *{scrollbar-color:var(--color-foreground-border) transparent;scrollbar-width:thin}.sidebar-scroll::-webkit-scrollbar,.toc-scroll::-webkit-scrollbar,article[role=main] ::-webkit-scrollbar{height:.25rem;width:.25rem}.sidebar-scroll::-webkit-scrollbar-thumb,.toc-scroll::-webkit-scrollbar-thumb,article[role=main] ::-webkit-scrollbar-thumb{background-color:var(--color-foreground-border);border-radius:.125rem}body,html{background:var(--color-background-primary);color:var(--color-foreground-primary);height:100%}article{background:var(--color-content-background);color:var(--color-content-foreground);overflow-wrap:break-word}.page{display:flex;min-height:100%}.mobile-header{background-color:var(--color-header-background);border-bottom:1px solid var(--color-header-border);color:var(--color-header-text);display:none;height:var(--header-height);width:100%;z-index:10}.mobile-header.scrolled{border-bottom:none;box-shadow:0 0 .2rem rgba(0,0,0,.1),0 .2rem .4rem rgba(0,0,0,.2)}.mobile-header .header-center a{color:var(--color-header-text);text-decoration:none}.main{display:flex;flex:1}.sidebar-drawer{background:var(--color-sidebar-background);border-right:1px solid var(--color-sidebar-background-border);box-sizing:border-box;display:flex;justify-content:flex-end;min-width:15em;width:calc(50% - 26em)}.sidebar-container,.toc-drawer{box-sizing:border-box;width:15em}.toc-drawer{background:var(--color-toc-background);padding-right:1rem}.sidebar-sticky,.toc-sticky{display:flex;flex-direction:column;height:min(100%,100vh);height:100vh;position:sticky;top:0}.sidebar-scroll,.toc-scroll{flex-grow:1;flex-shrink:1;overflow:auto;scroll-behavior:smooth}.content{display:flex;flex-direction:column;justify-content:space-between;padding:0 3em;width:46em}.icon{display:inline-block;height:1rem;width:1rem}.icon svg{height:100%;width:100%}.announcement{align-items:center;background-color:var(--color-announcement-background);color:var(--color-announcement-text);display:flex;height:var(--header-height);overflow-x:auto}.announcement+.page{min-height:calc(100% - var(--header-height))}.announcement-content{box-sizing:border-box;min-width:100%;padding:.5rem;text-align:center;white-space:nowrap}.announcement-content a{color:var(--color-announcement-text);text-decoration-color:var(--color-announcement-text)}.announcement-content a:hover{color:var(--color-announcement-text);text-decoration-color:var(--color-link--hover)}.no-js .theme-toggle-container{display:none}.theme-toggle-container{vertical-align:middle}.theme-toggle{background:transparent;border:none;cursor:pointer;padding:0}.theme-toggle svg{color:var(--color-foreground-primary);display:none;height:1rem;vertical-align:middle;width:1rem}.theme-toggle-header{float:left;padding:1rem .5rem}.nav-overlay-icon,.toc-overlay-icon{cursor:pointer;display:none}.nav-overlay-icon .icon,.toc-overlay-icon .icon{color:var(--color-foreground-secondary);height:1rem;width:1rem}.nav-overlay-icon,.toc-header-icon{align-items:center;justify-content:center}.toc-content-icon{height:1.5rem;width:1.5rem}.content-icon-container{display:flex;float:right;gap:.5rem;margin-bottom:1rem;margin-left:1rem;margin-top:1.5rem}.content-icon-container .edit-this-page svg{color:inherit;height:1rem;width:1rem}.sidebar-toggle{display:none;position:absolute}.sidebar-toggle[name=__toc]{left:20px}.sidebar-toggle:checked{left:40px}.overlay{background-color:rgba(0,0,0,.54);height:0;opacity:0;position:fixed;top:0;transition:width 0ms,height 0ms,opacity .25s ease-out;width:0}.sidebar-overlay{z-index:20}.toc-overlay{z-index:40}.sidebar-drawer{transition:left .25s ease-in-out;z-index:30}.toc-drawer{transition:right .25s ease-in-out;z-index:50}#__navigation:checked~.sidebar-overlay{height:100%;opacity:1;width:100%}#__navigation:checked~.page .sidebar-drawer{left:0;top:0}#__toc:checked~.toc-overlay{height:100%;opacity:1;width:100%}#__toc:checked~.page .toc-drawer{right:0;top:0}.back-to-top{background:var(--color-background-primary);border-radius:1rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 1px 0 hsla(220,9%,46%,.502);display:none;font-size:.8125rem;left:0;margin-left:50%;padding:.5rem .75rem .5rem .5rem;position:fixed;text-decoration:none;top:1rem;transform:translateX(-50%);z-index:10}.back-to-top svg{fill:currentColor;display:inline-block;height:1rem;width:1rem}.back-to-top span{margin-left:.25rem}.show-back-to-top .back-to-top{align-items:center;display:flex}@media(min-width:97em){html{font-size:110%}}@media(max-width:82em){.toc-content-icon{display:flex}.toc-drawer{border-left:1px solid var(--color-background-muted);height:100vh;position:fixed;right:-15em;top:0}.toc-tree{border-left:none;font-size:var(--toc-font-size--mobile)}.sidebar-drawer{width:calc(50% - 18.5em)}}@media(max-width:67em){.nav-overlay-icon{display:flex}.sidebar-drawer{height:100vh;left:-15em;position:fixed;top:0;width:15em}.toc-header-icon{display:flex}.theme-toggle-content,.toc-content-icon{display:none}.theme-toggle-header{display:block}.mobile-header{align-items:center;display:flex;justify-content:space-between;position:sticky;top:0}.mobile-header .header-left,.mobile-header .header-right{display:flex;height:var(--header-height);padding:0 var(--header-padding)}.mobile-header .header-left label,.mobile-header .header-right label{height:100%;-webkit-user-select:none;-moz-user-select:none;user-select:none;width:100%}.nav-overlay-icon .icon,.theme-toggle svg{height:1.25rem;width:1.25rem}:target{scroll-margin-top:var(--header-height)}.back-to-top{top:calc(var(--header-height) + .5rem)}.page{flex-direction:column;justify-content:center}.content{margin-left:auto;margin-right:auto}}@media(max-width:52em){.content{overflow-x:auto;width:100%}}@media(max-width:46em){.content{padding:0 1em}article aside.sidebar{float:none;margin:1rem 0;width:100%}}.admonition,.topic{background:var(--color-admonition-background);border-radius:.2rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1);font-size:var(--admonition-font-size);margin:1rem auto;overflow:hidden;padding:0 .5rem .5rem;page-break-inside:avoid}.admonition>:nth-child(2),.topic>:nth-child(2){margin-top:0}.admonition>:last-child,.topic>:last-child{margin-bottom:0}.admonition p.admonition-title,p.topic-title{font-size:var(--admonition-title-font-size);font-weight:500;line-height:1.3;margin:0 -.5rem .5rem;padding:.4rem .5rem .4rem 2rem;position:relative}.admonition p.admonition-title:before,p.topic-title:before{content:"";height:1rem;left:.5rem;position:absolute;width:1rem}p.admonition-title{background-color:var(--color-admonition-title-background)}p.admonition-title:before{background-color:var(--color-admonition-title);-webkit-mask-image:var(--icon-admonition-default);mask-image:var(--icon-admonition-default);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat}p.topic-title{background-color:var(--color-topic-title-background)}p.topic-title:before{background-color:var(--color-topic-title);-webkit-mask-image:var(--icon-topic-default);mask-image:var(--icon-topic-default);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat}.admonition{border-left:.2rem solid var(--color-admonition-title)}.admonition.caution{border-left-color:var(--color-admonition-title--caution)}.admonition.caution>.admonition-title{background-color:var(--color-admonition-title-background--caution)}.admonition.caution>.admonition-title:before{background-color:var(--color-admonition-title--caution);-webkit-mask-image:var(--icon-spark);mask-image:var(--icon-spark)}.admonition.warning{border-left-color:var(--color-admonition-title--warning)}.admonition.warning>.admonition-title{background-color:var(--color-admonition-title-background--warning)}.admonition.warning>.admonition-title:before{background-color:var(--color-admonition-title--warning);-webkit-mask-image:var(--icon-warning);mask-image:var(--icon-warning)}.admonition.danger{border-left-color:var(--color-admonition-title--danger)}.admonition.danger>.admonition-title{background-color:var(--color-admonition-title-background--danger)}.admonition.danger>.admonition-title:before{background-color:var(--color-admonition-title--danger);-webkit-mask-image:var(--icon-spark);mask-image:var(--icon-spark)}.admonition.attention{border-left-color:var(--color-admonition-title--attention)}.admonition.attention>.admonition-title{background-color:var(--color-admonition-title-background--attention)}.admonition.attention>.admonition-title:before{background-color:var(--color-admonition-title--attention);-webkit-mask-image:var(--icon-warning);mask-image:var(--icon-warning)}.admonition.error{border-left-color:var(--color-admonition-title--error)}.admonition.error>.admonition-title{background-color:var(--color-admonition-title-background--error)}.admonition.error>.admonition-title:before{background-color:var(--color-admonition-title--error);-webkit-mask-image:var(--icon-failure);mask-image:var(--icon-failure)}.admonition.hint{border-left-color:var(--color-admonition-title--hint)}.admonition.hint>.admonition-title{background-color:var(--color-admonition-title-background--hint)}.admonition.hint>.admonition-title:before{background-color:var(--color-admonition-title--hint);-webkit-mask-image:var(--icon-question);mask-image:var(--icon-question)}.admonition.tip{border-left-color:var(--color-admonition-title--tip)}.admonition.tip>.admonition-title{background-color:var(--color-admonition-title-background--tip)}.admonition.tip>.admonition-title:before{background-color:var(--color-admonition-title--tip);-webkit-mask-image:var(--icon-info);mask-image:var(--icon-info)}.admonition.important{border-left-color:var(--color-admonition-title--important)}.admonition.important>.admonition-title{background-color:var(--color-admonition-title-background--important)}.admonition.important>.admonition-title:before{background-color:var(--color-admonition-title--important);-webkit-mask-image:var(--icon-flame);mask-image:var(--icon-flame)}.admonition.note{border-left-color:var(--color-admonition-title--note)}.admonition.note>.admonition-title{background-color:var(--color-admonition-title-background--note)}.admonition.note>.admonition-title:before{background-color:var(--color-admonition-title--note);-webkit-mask-image:var(--icon-pencil);mask-image:var(--icon-pencil)}.admonition.seealso{border-left-color:var(--color-admonition-title--seealso)}.admonition.seealso>.admonition-title{background-color:var(--color-admonition-title-background--seealso)}.admonition.seealso>.admonition-title:before{background-color:var(--color-admonition-title--seealso);-webkit-mask-image:var(--icon-info);mask-image:var(--icon-info)}.admonition.admonition-todo{border-left-color:var(--color-admonition-title--admonition-todo)}.admonition.admonition-todo>.admonition-title{background-color:var(--color-admonition-title-background--admonition-todo)}.admonition.admonition-todo>.admonition-title:before{background-color:var(--color-admonition-title--admonition-todo);-webkit-mask-image:var(--icon-pencil);mask-image:var(--icon-pencil)}.admonition-todo>.admonition-title{text-transform:uppercase}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd{margin-left:2rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd>:first-child{margin-top:.125rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list,dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd>:last-child{margin-bottom:.75rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list>dt{font-size:var(--font-size--small);text-transform:uppercase}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd:empty{margin-bottom:.5rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul{margin-left:-1.2rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul>li>p:nth-child(2){margin-top:0}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul>li>p+p:last-child:empty{margin-bottom:0;margin-top:0}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt{color:var(--color-api-overall)}.sig:not(.sig-inline){background:var(--color-api-background);border-radius:.25rem;font-family:var(--font-stack--monospace);font-size:var(--api-font-size);font-weight:700;margin-left:-.25rem;margin-right:-.25rem;padding:.25rem .5rem .25rem 3em;text-indent:-2.5em;transition:background .1s ease-out}.sig:not(.sig-inline):hover{background:var(--color-api-background-hover)}.sig:not(.sig-inline) a.reference .viewcode-link{font-weight:400;width:3.5rem}em.property{font-style:normal}em.property:first-child{color:var(--color-api-keyword)}.sig-name{color:var(--color-api-name)}.sig-prename{color:var(--color-api-pre-name);font-weight:400}.sig-paren{color:var(--color-api-paren)}.sig-param{font-style:normal}.versionmodified{font-style:italic}div.deprecated p,div.versionadded p,div.versionchanged p{margin-bottom:.125rem;margin-top:.125rem}.viewcode-back,.viewcode-link{float:right;text-align:right}.line-block{margin-bottom:.75rem;margin-top:.5rem}.line-block .line-block{margin-bottom:0;margin-top:0;padding-left:1rem}.code-block-caption,article p.caption,table>caption{font-size:var(--font-size--small);text-align:center}.toctree-wrapper.compound .caption,.toctree-wrapper.compound :not(.caption)>.caption-text{font-size:var(--font-size--small);margin-bottom:0;text-align:initial;text-transform:uppercase}.toctree-wrapper.compound>ul{margin-bottom:0;margin-top:0}.sig-inline,code.literal{background:var(--color-inline-code-background);border-radius:.2em;font-size:var(--font-size--small--2);padding:.1em .2em}pre.literal-block .sig-inline,pre.literal-block code.literal{font-size:inherit;padding:0}p .sig-inline,p code.literal{border:1px solid var(--color-background-border)}.sig-inline{font-family:var(--font-stack--monospace)}div[class*=" highlight-"],div[class^=highlight-]{display:flex;margin:1em 0}div[class*=" highlight-"] .table-wrapper,div[class^=highlight-] .table-wrapper,pre{margin:0;padding:0}pre{overflow:auto}article[role=main] .highlight pre{line-height:1.5}.highlight pre,pre.literal-block{font-size:var(--code-font-size);padding:.625rem .875rem}pre.literal-block{background-color:var(--color-code-background);border-radius:.2rem;color:var(--color-code-foreground);margin-bottom:1rem;margin-top:1rem}.highlight{border-radius:.2rem;width:100%}.highlight .gp,.highlight span.linenos{pointer-events:none;-webkit-user-select:none;-moz-user-select:none;user-select:none}.highlight .hll{display:block;margin-left:-.875rem;margin-right:-.875rem;padding-left:.875rem;padding-right:.875rem}.code-block-caption{background-color:var(--color-code-background);border-bottom:1px solid;border-radius:.25rem;border-bottom-left-radius:0;border-bottom-right-radius:0;border-color:var(--color-background-border);color:var(--color-code-foreground);display:flex;font-weight:300;padding:.625rem .875rem}.code-block-caption+div[class]{margin-top:0}.code-block-caption+div[class] pre{border-top-left-radius:0;border-top-right-radius:0}.highlighttable{display:block;width:100%}.highlighttable tbody{display:block}.highlighttable tr{display:flex}.highlighttable td.linenos{background-color:var(--color-code-background);border-bottom-left-radius:.2rem;border-top-left-radius:.2rem;color:var(--color-code-foreground);padding:.625rem 0 .625rem .875rem}.highlighttable .linenodiv{box-shadow:-.0625rem 0 var(--color-foreground-border) inset;font-size:var(--code-font-size);padding-right:.875rem}.highlighttable td.code{display:block;flex:1;overflow:hidden;padding:0}.highlighttable td.code .highlight{border-bottom-left-radius:0;border-top-left-radius:0}.highlight span.linenos{box-shadow:-.0625rem 0 var(--color-foreground-border) inset;display:inline-block;margin-right:.875rem;padding-left:0;padding-right:.875rem}.footnote-reference{font-size:var(--font-size--small--4);vertical-align:super}dl.footnote.brackets{color:var(--color-foreground-secondary);display:grid;font-size:var(--font-size--small);grid-template-columns:max-content auto}dl.footnote.brackets dt{margin:0}dl.footnote.brackets dt>.fn-backref{margin-left:.25rem}dl.footnote.brackets dt:after{content:":"}dl.footnote.brackets dt .brackets:before{content:"["}dl.footnote.brackets dt .brackets:after{content:"]"}dl.footnote.brackets dd{margin:0;padding:0 1rem}aside.footnote{color:var(--color-foreground-secondary);font-size:var(--font-size--small)}aside.footnote>span,div.citation>span{float:left;font-weight:500;padding-right:.25rem}aside.footnote>p,div.citation>p{margin-left:2rem}img{box-sizing:border-box;height:auto;max-width:100%}article .figure,article figure{border-radius:.2rem;margin:0}article .figure :last-child,article figure :last-child{margin-bottom:0}article .align-left{clear:left;float:left;margin:0 1rem 1rem}article .align-right{clear:right;float:right;margin:0 1rem 1rem}article .align-center,article .align-default{display:block;margin-left:auto;margin-right:auto;text-align:center}article table.align-default{display:table;text-align:initial}.domainindex-jumpbox,.genindex-jumpbox{border-bottom:1px solid var(--color-background-border);border-top:1px solid var(--color-background-border);padding:.25rem}.domainindex-section h2,.genindex-section h2{margin-bottom:.5rem;margin-top:.75rem}.domainindex-section ul,.genindex-section ul{margin-bottom:0;margin-top:0}ol,ul{margin-bottom:1rem;margin-top:1rem;padding-left:1.2rem}ol li>p:first-child,ul li>p:first-child{margin-bottom:.25rem;margin-top:.25rem}ol li>p:last-child,ul li>p:last-child{margin-top:.25rem}ol li>ol,ol li>ul,ul li>ol,ul li>ul{margin-bottom:.5rem;margin-top:.5rem}ol.arabic{list-style:decimal}ol.loweralpha{list-style:lower-alpha}ol.upperalpha{list-style:upper-alpha}ol.lowerroman{list-style:lower-roman}ol.upperroman{list-style:upper-roman}.simple li>ol,.simple li>ul,.toctree-wrapper li>ol,.toctree-wrapper li>ul{margin-bottom:0;margin-top:0}.field-list dt,.option-list dt,dl.footnote dt,dl.glossary dt,dl.simple dt,dl:not([class]) dt{font-weight:500;margin-top:.25rem}.field-list dt+dt,.option-list dt+dt,dl.footnote dt+dt,dl.glossary dt+dt,dl.simple dt+dt,dl:not([class]) dt+dt{margin-top:0}.field-list dt .classifier:before,.option-list dt .classifier:before,dl.footnote dt .classifier:before,dl.glossary dt .classifier:before,dl.simple dt .classifier:before,dl:not([class]) dt .classifier:before{content:":";margin-left:.2rem;margin-right:.2rem}.field-list dd ul,.field-list dd>p:first-child,.option-list dd ul,.option-list dd>p:first-child,dl.footnote dd ul,dl.footnote dd>p:first-child,dl.glossary dd ul,dl.glossary dd>p:first-child,dl.simple dd ul,dl.simple dd>p:first-child,dl:not([class]) dd ul,dl:not([class]) dd>p:first-child{margin-top:.125rem}.field-list dd ul,.option-list dd ul,dl.footnote dd ul,dl.glossary dd ul,dl.simple dd ul,dl:not([class]) dd ul{margin-bottom:.125rem}.math-wrapper{overflow-x:auto;width:100%}div.math{position:relative;text-align:center}div.math .headerlink,div.math:focus .headerlink{display:none}div.math:hover .headerlink{display:inline-block}div.math span.eqno{position:absolute;right:.5rem;top:50%;transform:translateY(-50%);z-index:1}abbr[title]{cursor:help}.problematic{color:var(--color-problematic)}kbd:not(.compound){background-color:var(--color-background-secondary);border:1px solid var(--color-foreground-border);border-radius:.2rem;box-shadow:0 .0625rem 0 rgba(0,0,0,.2),inset 0 0 0 .125rem var(--color-background-primary);color:var(--color-foreground-primary);display:inline-block;font-size:var(--font-size--small--3);margin:0 .2rem;padding:0 .2rem;vertical-align:text-bottom}blockquote{background:var(--color-background-secondary);border-left:4px solid var(--color-background-border);margin-left:0;margin-right:0;padding:.5rem 1rem}blockquote .attribution{font-weight:600;text-align:right}blockquote.highlights,blockquote.pull-quote{font-size:1.25em}blockquote.epigraph,blockquote.pull-quote{border-left-width:0;border-radius:.5rem}blockquote.highlights{background:transparent;border-left-width:0}p .reference img{vertical-align:middle}p.rubric{font-size:1.125em;font-weight:700;line-height:1.25}dd p.rubric{font-size:var(--font-size--small);font-weight:inherit;line-height:inherit;text-transform:uppercase}article .sidebar{background-color:var(--color-background-secondary);border:1px solid var(--color-background-border);border-radius:.2rem;clear:right;float:right;margin-left:1rem;margin-right:0;width:30%}article .sidebar>*{padding-left:1rem;padding-right:1rem}article .sidebar>ol,article .sidebar>ul{padding-left:2.2rem}article .sidebar .sidebar-title{border-bottom:1px solid var(--color-background-border);font-weight:500;margin:0;padding:.5rem 1rem}.table-wrapper{margin-bottom:.5rem;margin-top:1rem;overflow-x:auto;padding:.2rem .2rem .75rem;width:100%}table.docutils{border-collapse:collapse;border-radius:.2rem;border-spacing:0;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)}table.docutils th{background:var(--color-table-header-background)}table.docutils td,table.docutils th{border-bottom:1px solid var(--color-table-border);border-left:1px solid var(--color-table-border);border-right:1px solid var(--color-table-border);padding:0 .25rem}table.docutils td p,table.docutils th p{margin:.25rem}table.docutils td:first-child,table.docutils th:first-child{border-left:none}table.docutils td:last-child,table.docutils th:last-child{border-right:none}table.docutils td.text-left,table.docutils th.text-left{text-align:left}table.docutils td.text-right,table.docutils th.text-right{text-align:right}table.docutils td.text-center,table.docutils th.text-center{text-align:center}:target{scroll-margin-top:.5rem}@media(max-width:67em){:target{scroll-margin-top:calc(.5rem + var(--header-height))}section>span:target{scroll-margin-top:calc(.8rem + var(--header-height))}}.headerlink{font-weight:100;-webkit-user-select:none;-moz-user-select:none;user-select:none}.code-block-caption>.headerlink,dl dt>.headerlink,figcaption p>.headerlink,h1>.headerlink,h2>.headerlink,h3>.headerlink,h4>.headerlink,h5>.headerlink,h6>.headerlink,p.caption>.headerlink,table>caption>.headerlink{margin-left:.5rem;visibility:hidden}.code-block-caption:hover>.headerlink,dl dt:hover>.headerlink,figcaption p:hover>.headerlink,h1:hover>.headerlink,h2:hover>.headerlink,h3:hover>.headerlink,h4:hover>.headerlink,h5:hover>.headerlink,h6:hover>.headerlink,p.caption:hover>.headerlink,table>caption:hover>.headerlink{visibility:visible}.code-block-caption>.toc-backref,dl dt>.toc-backref,figcaption p>.toc-backref,h1>.toc-backref,h2>.toc-backref,h3>.toc-backref,h4>.toc-backref,h5>.toc-backref,h6>.toc-backref,p.caption>.toc-backref,table>caption>.toc-backref{color:inherit;text-decoration-line:none}figure:hover>figcaption>p>.headerlink,table:hover>caption>.headerlink{visibility:visible}:target>h1:first-of-type,:target>h2:first-of-type,:target>h3:first-of-type,:target>h4:first-of-type,:target>h5:first-of-type,:target>h6:first-of-type,span:target~h1:first-of-type,span:target~h2:first-of-type,span:target~h3:first-of-type,span:target~h4:first-of-type,span:target~h5:first-of-type,span:target~h6:first-of-type{background-color:var(--color-highlight-on-target)}:target>h1:first-of-type code.literal,:target>h2:first-of-type code.literal,:target>h3:first-of-type code.literal,:target>h4:first-of-type code.literal,:target>h5:first-of-type code.literal,:target>h6:first-of-type code.literal,span:target~h1:first-of-type code.literal,span:target~h2:first-of-type code.literal,span:target~h3:first-of-type code.literal,span:target~h4:first-of-type code.literal,span:target~h5:first-of-type code.literal,span:target~h6:first-of-type code.literal{background-color:transparent}.literal-block-wrapper:target .code-block-caption,.this-will-duplicate-information-and-it-is-still-useful-here li :target,figure:target,table:target>caption{background-color:var(--color-highlight-on-target)}dt:target{background-color:var(--color-highlight-on-target)!important}.footnote-reference:target,.footnote>dt:target+dd{background-color:var(--color-highlight-on-target)}.guilabel{background-color:var(--color-guilabel-background);border:1px solid var(--color-guilabel-border);border-radius:.5em;color:var(--color-guilabel-text);font-size:.9em;padding:0 .3em}footer{display:flex;flex-direction:column;font-size:var(--font-size--small);margin-top:2rem}.bottom-of-page{align-items:center;border-top:1px solid var(--color-background-border);color:var(--color-foreground-secondary);display:flex;justify-content:space-between;line-height:1.5;margin-top:1rem;padding-bottom:1rem;padding-top:1rem}@media(max-width:46em){.bottom-of-page{flex-direction:column-reverse;gap:.25rem;text-align:center}}.bottom-of-page .left-details{font-size:var(--font-size--small)}.bottom-of-page .right-details{display:flex;flex-direction:column;gap:.25rem;text-align:right}.bottom-of-page .icons{display:flex;font-size:1rem;gap:.25rem;justify-content:flex-end}.bottom-of-page .icons a{text-decoration:none}.bottom-of-page .icons img,.bottom-of-page .icons svg{font-size:1.125rem;height:1em;width:1em}.related-pages a{align-items:center;display:flex;text-decoration:none}.related-pages a:hover .page-info .title{color:var(--color-link);text-decoration:underline;text-decoration-color:var(--color-link-underline)}.related-pages a svg.furo-related-icon,.related-pages a svg.furo-related-icon>use{color:var(--color-foreground-border);flex-shrink:0;height:.75rem;margin:0 .5rem;width:.75rem}.related-pages a.next-page{clear:right;float:right;max-width:50%;text-align:right}.related-pages a.prev-page{clear:left;float:left;max-width:50%}.related-pages a.prev-page svg{transform:rotate(180deg)}.page-info{display:flex;flex-direction:column;overflow-wrap:anywhere}.next-page .page-info{align-items:flex-end}.page-info .context{align-items:center;color:var(--color-foreground-muted);display:flex;font-size:var(--font-size--small);padding-bottom:.1rem;text-decoration:none}ul.search{list-style:none;padding-left:0}ul.search li{border-bottom:1px solid var(--color-background-border);padding:1rem 0}[role=main] .highlighted{background-color:var(--color-highlighted-background);color:var(--color-highlighted-text)}.sidebar-brand{display:flex;flex-direction:column;flex-shrink:0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-decoration:none}.sidebar-brand-text{color:var(--color-sidebar-brand-text);font-size:1.5rem;overflow-wrap:break-word}.sidebar-brand-text,.sidebar-logo-container{margin:var(--sidebar-item-spacing-vertical) 0}.sidebar-logo{display:block;margin:0 auto;max-width:100%}.sidebar-search-container{align-items:center;background:var(--color-sidebar-search-background);display:flex;margin-top:var(--sidebar-search-space-above);position:relative}.sidebar-search-container:focus-within,.sidebar-search-container:hover{background:var(--color-sidebar-search-background--focus)}.sidebar-search-container:before{background-color:var(--color-sidebar-search-icon);content:"";height:var(--sidebar-search-icon-size);left:var(--sidebar-item-spacing-horizontal);-webkit-mask-image:var(--icon-search);mask-image:var(--icon-search);position:absolute;width:var(--sidebar-search-icon-size)}.sidebar-search{background:transparent;border:none;border-bottom:1px solid var(--color-sidebar-search-border);border-top:1px solid var(--color-sidebar-search-border);box-sizing:border-box;color:var(--color-sidebar-search-foreground);padding:var(--sidebar-search-input-spacing-vertical) var(--sidebar-search-input-spacing-horizontal) var(--sidebar-search-input-spacing-vertical) calc(var(--sidebar-item-spacing-horizontal) + var(--sidebar-search-input-spacing-horizontal) + var(--sidebar-search-icon-size));width:100%;z-index:10}.sidebar-search:focus{outline:none}.sidebar-search::-moz-placeholder{font-size:var(--sidebar-search-input-font-size)}.sidebar-search::placeholder{font-size:var(--sidebar-search-input-font-size)}#searchbox .highlight-link{margin:0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal) 0;text-align:center}#searchbox .highlight-link a{color:var(--color-sidebar-search-icon);font-size:var(--font-size--small--2)}.sidebar-tree{font-size:var(--sidebar-item-font-size);margin-bottom:var(--sidebar-item-spacing-vertical);margin-top:var(--sidebar-tree-space-above)}.sidebar-tree ul{display:flex;flex-direction:column;list-style:none;margin-bottom:0;margin-top:0;padding:0}.sidebar-tree li{margin:0;position:relative}.sidebar-tree li>ul{margin-left:var(--sidebar-item-spacing-horizontal)}.sidebar-tree .icon,.sidebar-tree .reference{color:var(--color-sidebar-link-text)}.sidebar-tree .reference{box-sizing:border-box;display:inline-block;height:100%;line-height:var(--sidebar-item-line-height);overflow-wrap:anywhere;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-decoration:none;width:100%}.sidebar-tree .reference:hover{background:var(--color-sidebar-item-background--hover)}.sidebar-tree .reference.external:after{color:var(--color-sidebar-link-text);content:url("data:image/svg+xml;charset=utf-8,%3Csvg width='12' height='12' xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' stroke-width='1.5' stroke='%23607D8B' fill='none' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='M0 0h24v24H0z' stroke='none'/%3E%3Cpath d='M11 7H6a2 2 0 0 0-2 2v9a2 2 0 0 0 2 2h9a2 2 0 0 0 2-2v-5M10 14 20 4M15 4h5v5'/%3E%3C/svg%3E");margin:0 .25rem;vertical-align:middle}.sidebar-tree .current-page>.reference{font-weight:700}.sidebar-tree label{align-items:center;cursor:pointer;display:flex;height:var(--sidebar-item-height);justify-content:center;position:absolute;right:0;top:0;-webkit-user-select:none;-moz-user-select:none;user-select:none;width:var(--sidebar-expander-width)}.sidebar-tree .caption,.sidebar-tree :not(.caption)>.caption-text{color:var(--color-sidebar-caption-text);font-size:var(--sidebar-caption-font-size);font-weight:700;margin:var(--sidebar-caption-space-above) 0 0 0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-transform:uppercase}.sidebar-tree li.has-children>.reference{padding-right:var(--sidebar-expander-width)}.sidebar-tree .toctree-l1>.reference,.sidebar-tree .toctree-l1>label .icon{color:var(--color-sidebar-link-text--top-level)}.sidebar-tree label{background:var(--color-sidebar-item-expander-background)}.sidebar-tree label:hover{background:var(--color-sidebar-item-expander-background--hover)}.sidebar-tree .current>.reference{background:var(--color-sidebar-item-background--current)}.sidebar-tree .current>.reference:hover{background:var(--color-sidebar-item-background--hover)}.toctree-checkbox{display:none;position:absolute}.toctree-checkbox~ul{display:none}.toctree-checkbox~label .icon svg{transform:rotate(90deg)}.toctree-checkbox:checked~ul{display:block}.toctree-checkbox:checked~label .icon svg{transform:rotate(-90deg)}.toc-title-container{padding:var(--toc-title-padding);padding-top:var(--toc-spacing-vertical)}.toc-title{color:var(--color-toc-title-text);font-size:var(--toc-title-font-size);padding-left:var(--toc-spacing-horizontal);text-transform:uppercase}.no-toc{display:none}.toc-tree-container{padding-bottom:var(--toc-spacing-vertical)}.toc-tree{border-left:1px solid var(--color-background-border);font-size:var(--toc-font-size);line-height:1.3;padding-left:calc(var(--toc-spacing-horizontal) - var(--toc-item-spacing-horizontal))}.toc-tree>ul>li:first-child{padding-top:0}.toc-tree>ul>li:first-child>ul{padding-left:0}.toc-tree>ul>li:first-child>a{display:none}.toc-tree ul{list-style-type:none;margin-bottom:0;margin-top:0;padding-left:var(--toc-item-spacing-horizontal)}.toc-tree li{padding-top:var(--toc-item-spacing-vertical)}.toc-tree li.scroll-current>.reference{color:var(--color-toc-item-text--active);font-weight:700}.toc-tree .reference{color:var(--color-toc-item-text);overflow-wrap:anywhere;text-decoration:none}.toc-scroll{max-height:100vh;overflow-y:scroll}.contents:not(.this-will-duplicate-information-and-it-is-still-useful-here){background:rgba(255,0,0,.25);color:var(--color-problematic)}.contents:not(.this-will-duplicate-information-and-it-is-still-useful-here):before{content:"ERROR: Adding a table of contents in Furo-based documentation is unnecessary, and does not work well with existing styling.Add a 'this-will-duplicate-information-and-it-is-still-useful-here' class, if you want an escape hatch."}.text-align\:left>p{text-align:left}.text-align\:center>p{text-align:center}.text-align\:right>p{text-align:right} +/*# sourceMappingURL=furo.css.map*/ \ No newline at end of file diff --git a/_static/styles/furo.css.map b/_static/styles/furo.css.map new file mode 100644 index 00000000..d1dfb109 --- /dev/null +++ b/_static/styles/furo.css.map @@ -0,0 +1 @@ +{"version":3,"file":"styles/furo.css","mappings":"AAAA,2EAA2E,CAU3E,KAEE,6BAA8B,CAD9B,gBAEF,CASA,KACE,QACF,CAMA,KACE,aACF,CAOA,GACE,aAAc,CACd,cACF,CAUA,GACE,sBAAuB,CACvB,QAAS,CACT,gBACF,CAOA,IACE,+BAAiC,CACjC,aACF,CASA,EACE,4BACF,CAOA,YACE,kBAAmB,CACnB,yBAA0B,CAC1B,gCACF,CAMA,SAEE,kBACF,CAOA,cAGE,+BAAiC,CACjC,aACF,CAeA,QAEE,aAAc,CACd,aAAc,CACd,iBAAkB,CAClB,uBACF,CAEA,IACE,aACF,CAEA,IACE,SACF,CASA,IACE,iBACF,CAUA,sCAKE,mBAAoB,CACpB,cAAe,CACf,gBAAiB,CACjB,QACF,CAOA,aAEE,gBACF,CAOA,cAEE,mBACF,CAMA,gDAIE,yBACF,CAMA,wHAIE,iBAAkB,CAClB,SACF,CAMA,4GAIE,6BACF,CAMA,SACE,0BACF,CASA,OACE,qBAAsB,CACtB,aAAc,CACd,aAAc,CACd,cAAe,CACf,SAAU,CACV,kBACF,CAMA,SACE,uBACF,CAMA,SACE,aACF,CAOA,6BAEE,qBAAsB,CACtB,SACF,CAMA,kFAEE,WACF,CAOA,cACE,4BAA6B,CAC7B,mBACF,CAMA,yCACE,uBACF,CAOA,6BACE,yBAA0B,CAC1B,YACF,CASA,QACE,aACF,CAMA,QACE,iBACF,CAiBA,kBACE,YACF,CCvVA,aAcE,kEACE,uBAOF,WACE,iDAMF,gCACE,wBAEF,qCAEE,uBADA,uBACA,CAEF,SACE,wBAtBA,CCpBJ,iBAOE,6BAEA,mBANA,qBAEA,sBACA,0BAFA,oBAHA,4BAOA,6BANA,mBAOA,CAEF,gBACE,aCPF,KCGE,mHAEA,wGAGA,wBAAyB,CACzB,wBAAyB,CACzB,4BAA6B,CAC7B,yBAA0B,CAC1B,2BAA4B,CAG5B,sDAAuD,CACvD,gDAAiD,CACjD,wDAAyD,CAGzD,0CAA2C,CAC3C,gDAAiD,CACjD,gDAAiD,CAKjD,gCAAiC,CACjC,sCAAuC,CAGvC,2CAA4C,CAG5C,uCAAwC,CChCxC,+FAGA,uBAAwB,CAGxB,iCAAkC,CAClC,kCAAmC,CAEnC,+BAAgC,CAChC,sCAAuC,CACvC,sCAAuC,CACvC,qGAIA,mDAAoD,CAEpD,mCAAoC,CACpC,8CAA+C,CAC/C,gDAAiD,CACjD,kCAAmC,CACnC,6DAA8D,CAG9D,6BAA8B,CAC9B,6BAA8B,CAC9B,+BAAgC,CAChC,kCAAmC,CACnC,kCAAmC,CCPjC,ukBCYA,srCAZF,kaCVA,mLAOA,oTAWA,2UAaA,0CACA,gEACA,0CAGA,gEAUA,yCACA,+DAGA,4CACA,CACA,iEAGA,sGACA,uCACA,4DAGA,sCACA,2DAEA,4CACA,kEACA,oGACA,CAEA,0GACA,+CAGA,+MAOA,+EACA,wCAIA,4DACA,sEACA,kEACA,sEACA,gDAGA,+DACA,0CACA,gEACA,gGACA,CAGA,2DACA,qDAGA,0CACA,8CACA,oDACA,oDL7GF,iCAEA,iEAME,oCKyGA,yDAIA,sCACA,kCACA,sDAGA,0CACA,kEACA,oDAEA,sDAGA,oCACA,oEAIA,CAGA,yDAGA,qDACA,oDAGA,6DAIA,iEAGA,2DAEA,2DL9IE,4DAEA,gEAIF,gEKgGA,gFAIA,oNAOA,qDAEA,gFAIA,4DAIA,oEAMA,yEAIA,6DACA,0DAGA,uDAGA,qDAEA,wDLpII,6DAEA,yDACE,2DAMN,uCAIA,yCACE,8CAGF,sDMjDA,6DAKA,oCAIA,4CACA,kBAGF,sBAMA,2BAME,qCAGA,qCAEA,iCAEA,+BAEA,mCAEA,qCAIA,CACA,gCACA,gDAKA,kCAIA,6BAEA,0CAQA,kCAIF,8BAGE,8BACA,uCAGF,sCAKE,kCAEA,sDAGA,iCACE,CACA,2FAGA,gCACE,CACA,+DCzEJ,wCAEA,sBAEF,yDAEE,mCACA,wDAGA,2GAGA,wIACE,gDAMJ,kCAGE,6BACA,0CAGA,gEACA,8BACA,uCAKA,sCAIA,kCACA,sDACA,iCACA,sCAOA,sDAKE,gGAIE,+CAGN,sBAEE,yCAMA,0BAMA,yLAMA,aACA,MAEF,6BACE,2DAIF,wCAIE,kCAGA,SACA,kCAKA,mBAGA,CAJA,eACA,CAHF,gBAEE,CAWA,mBACA,mBACA,mDAGA,YACA,CACA,kBACA,CAEE,kBAKJ,OAPE,kBAQA,CADF,GACE,iCACA,wCAEA,wBACA,aACA,CAFA,WAEA,GACA,oBACA,CAFA,gBAEA,aACE,+CAIF,UAJE,kCAIF,WACA,iBACA,GAGA,uBACE,CAJF,yBAGA,CACE,iDACA,uCAEA,yDACE,cACA,wDAKN,yDAIE,uBAEF,kBACE,uBAEA,kDAIA,0DAGA,CAHA,oBAGA,0GAYA,aAEA,CAHA,YAGA,4HAKF,+CAGE,sBAEF,WAKE,0CAEA,CALA,qCAGA,CAJA,WAOA,SAIA,2CAJA,qCAIA,CACE,wBACA,OACA,YAEJ,gBACE,gBAIA,+CAKF,CAGE,kDAGA,CANF,8BAGE,CAGA,YAEA,CAdF,2BACE,CAHA,UAEF,CAYE,UAEA,CACA,0CACF,iEAOE,iCACA,8BAGA,wCAIA,wBAKE,0CAKF,CARE,6DAGA,CALF,qBAEE,CASA,YACA,yBAGA,CAEE,cAKN,CAPI,sBAOJ,gCAGE,qBAEA,WACA,aACA,sCAEA,mBACA,6BAGA,uEADA,qBACA,6BAIA,yBACA,qCAEE,UAEA,YACA,sBAEF,8BAGA,CAPE,aACA,WAMF,4BACE,sBACA,WAMJ,uBACE,cAYE,mBAXA,qDAKA,qCAGA,CAEA,YACA,CAHA,2BAEA,CACA,oCAEA,4CACA,uBAIA,oCAEJ,CAFI,cAIF,iBACE,CAHJ,kBAGI,yBAEA,oCAIA,qDAMF,mEAEA,CACE,8CAKA,gCAEA,qCAGA,oCAGE,sBACA,CAJF,WAEE,CAFF,eAEE,SAEA,mBACA,qCACE,aACA,CAFF,YADA,qBACA,WAEE,sBACA,kEAEN,2BAEE,iDAKA,uCAGF,CACE,0DAKA,kBACF,CAFE,sBAGA,mBACA,0BAEJ,yBAII,aADA,WACA,CAMF,UAFE,kBAEF,CAJF,gBACE,CAHE,iBAMF,6CC9ZF,yBACE,WACA,iBAEA,aAFA,iBAEA,6BAEA,kCACA,mBAKA,gCAGA,CARA,QAEA,CAGA,UALA,qBAEA,qDAGA,CALA,OAQA,4BACE,cAGF,2BACE,gCAEJ,CAHE,UAGF,8CAGE,CAHF,UAGE,wCAGA,qBACA,CAFA,UAEA,6CAGA,yCAIA,sBAHA,UAGA,kCACE,OACA,CAFF,KAEE,cAQF,0CACE,CAFF,kBACA,CACE,wEACA,CARA,YACA,CAKF,mBAFF,OAII,eACA,CAJF,iCAJE,cAGJ,CANI,oBAEA,CAKF,SAIE,2BADA,UACA,kBAGF,sCACA,CAFF,WACE,WACA,qCACE,gCACA,2EACA,sDAKJ,aACE,mDAII,CAJJ,6CAII,kEACA,iBACE,iDACA,+CACE,aACA,WADA,+BACA,uEANN,YACE,mDAEE,mBADF,0CACE,CADF,qBACE,0DACA,YACE,4DACA,sEANN,YACE,8CACA,kBADA,UACA,2CACE,2EACA,cACE,kEACA,mEANN,yBACE,4DACA,sBACE,+EAEE,iEACA,qEANN,sCACE,CAGE,iBAHF,gBAGE,qBACE,CAJJ,uBACA,gDACE,wDACA,6DAHF,2CACA,CADA,gBACA,eACE,CAGE,sBANN,8BACE,CAII,iBAFF,4DACA,WACE,YADF,uCACE,6EACA,2BANN,8CACE,kDACA,0CACE,8BACA,yFACE,sBACA,sFALJ,mEACA,sBACE,kEACA,6EACE,uCACA,kEALJ,qGAEE,kEACA,6EACE,uCACA,kEALJ,8CACA,uDACE,sEACA,2EACE,sCACA,iEALJ,mGACA,qCACE,oDACA,0DACE,6GACA,gDAGR,yDCrEA,sEACE,CACA,6GACE,gEACF,iGAIF,wFACE,qDAGA,mGAEE,2CAEF,4FACE,gCACF,wGACE,8DAEE,6FAIA,iJAKN,6GACE,gDAKF,yDACA,qCAGA,6BACA,kBACA,qDAKA,oCAEA,+DAGA,2CAGE,oDAIA,oEAEE,qBAGJ,wDAEE,uCAEF,kEAGA,8CAEA,uDAKA,oCAEA,yDAEE,gEAKF,+CC5FA,0EAGE,CACA,qDCLJ,+DAIE,sCAIA,kEACE,yBACA,2FAMA,gBACA,yGCbF,mBAOA,2MAIA,4HAYA,0DACE,8GAYF,8HAQE,mBAEA,6HAOF,YAGA,mIAME,eACA,CAFF,YAEE,4FAMJ,8BAEE,uBAYA,sCAEE,CAJF,oBAEA,CARA,wCAEA,CAHA,8BACA,CAFA,eACA,CAGA,wCAEA,CAEA,mDAIE,kCACE,6BACA,4CAKJ,kDAIA,eACE,aAGF,8BACE,uDACA,sCACA,cAEA,+BACA,CAFA,eAEA,wCAEF,YACE,iBACA,mCACA,0DAGF,qBAEE,CAFF,kBAEE,+BAIA,yCAEE,qBADA,gBACA,yBAKF,eACA,CAFF,YACE,CACA,iBACA,qDAEA,mDCvIJ,2FAOE,iCACA,CAEA,eACA,CAHA,kBAEA,CAFA,wBAGA,8BACA,eACE,CAFF,YAEE,0BACA,8CAGA,oBACE,oCAGA,kBACE,8DAEA,iBAEN,UACE,8BAIJ,+CAEE,qDAEF,kDAIE,YAEF,CAFE,YAEF,CCjCE,mFAJA,QACA,UAIE,CADF,iBACE,mCAGA,iDACE,+BAGF,wBAEA,mBAKA,6CAEF,CAHE,mBACA,CAEF,kCAIE,CARA,kBACA,CAFF,eASE,YACA,mBAGF,CAJE,UAIF,wCCjCA,oBDmCE,wBCpCJ,uCACE,8BACA,4CACA,oBAGA,2CCAA,6CAGE,CAPF,uBAIA,CDGA,gDACE,6BCVJ,CAWM,2CAEF,CAJA,kCAEE,CDJF,aCLF,gBDKE,uBCMA,gCAGA,gDAGE,wBAGJ,0BAEA,iBACE,aACF,CADE,UACF,uBACE,aACF,oBACE,YACF,4BACE,6CAMA,CAYF,6DAZE,mCAGE,iCASJ,4BAGE,4DADA,+BACA,CAFA,qBAEA,yBACE,aAEF,wBAHA,SAGA,iHACE,2DAKF,CANA,yCACE,CADF,oCAMA,uSAIA,sGACE,oDChEJ,WAEF,yBACE,QACA,eAEA,gBAEE,uCAGA,CALF,iCAKE,uCAGA,0BACA,CACA,oBACA,iCClBJ,gBACE,KAGF,qBACE,YAGF,CAHE,cAGF,gCAEE,mBACA,iEAEA,oCACA,wCAEA,sBACA,WAEA,CAFA,YAEA,8EAEA,mCAFA,iBAEA,6BAIA,wEAKA,sDAIE,CARF,mDAIA,CAIE,cAEF,8CAIA,oBAFE,iBAEF,8CAGE,eAEF,CAFE,YAEF,OAEE,kBAGJ,CAJI,eACA,CAFF,mBAKF,yCCjDE,oBACA,CAFA,iBAEA,uCAKE,iBACA,qCAGA,mBCZJ,CDWI,gBCXJ,6BAEE,eACA,sBAGA,eAEA,sBACA,oDACA,iGAMA,gBAFE,YAEF,8FAME,iJClBF,YACA,gNAUE,6BAEF,oTAcI,kBACF,gHAIA,qBACE,eACF,qDACE,kBACF,6DACE,4BCxCJ,oBAEF,qCAEI,+CAGF,uBACE,uDAGJ,oBAkBE,mDAhBA,+CAaA,CAbA,oBAaA,0FAEE,CAFF,gGAbA,+BAaA,0BAGA,mQAIA,oNAEE,iBAGJ,CAHI,gBADA,gBAIJ,8CAYI,CAZJ,wCAYI,sVACE,iCAGA,uEAHA,QAGA,qXAKJ,iDAGF,CARM,+CACE,iDAIN,CALI,gBAQN,mHACE,gBAGF,2DACE,0EAOA,0EAKA,6EC/EA,iDACA,gCACA,oDAGA,qBACA,oDCFA,cACA,eAEA,yBAGF,sBAEE,iBACA,sNAWA,iBACE,kBACA,wRAgBA,kBAEA,iOAgBA,uCACE,uEAEA,kBAEF,qUAuBE,iDAIJ,CACA,geCxFF,4BAEE,CAQA,6JACA,iDAIA,sEAGA,mDAOF,iDAGE,4DAIA,8CACA,qDAEE,eAFF,cAEE,oBAEF,uBAFE,kCAGA,eACA,iBACA,mBAIA,mDACA,CAHA,uCAEA,CAJA,0CACA,CAIA,gBAJA,gBACA,oBADA,gBAIA,wBAEJ,gBAGE,6BACA,YAHA,iBAGA,gCACA,iEAEA,6CACA,sDACA,0BADA,wBACA,0BACA,oIAIA,mBAFA,YAEA,qBACA,0CAIE,uBAEF,CAHA,yBACE,CAEF,iDACE,mFAKJ,oCACE,CANE,aAKJ,CACE,qEAIA,YAFA,WAEA,CAHA,aACA,CAEA,gBACE,4BACA,sBADA,aACA,gCAMF,oCACA,yDACA,2CAEA,qBAGE,kBAEA,CACA,mCAIF,CARE,YACA,CAOF,iCAEE,CAPA,oBACA,CAQA,oBACE,uDAEJ,sDAGA,CAHA,cAGA,0BACE,oDAIA,oCACA,4BACA,sBAGA,cAEA,oFAGA,sBAEA,yDACE,CAIA,iBAJA,wBAIA,6CAJA,6CAOA,4BAGJ,CAHI,cAGJ,yCAGA,kBACE,CAIA,iDAEA,CATA,YAEF,CACE,4CAGA,kBAIA,wEAEA,wDAIF,kCAOE,iDACA,CARF,WAIE,sCAGA,CANA,2CACA,CAMA,oEARF,iBACE,CACA,qCAMA,iBAuBE,uBAlBF,YAKA,2DALA,uDAKA,CALA,sBAiBA,4CACE,CALA,gRAIF,YACE,UAEN,uBACE,YACA,mCAOE,+CAGA,8BAGF,+CAGA,4BCjNA,SDiNA,qFCjNA,gDAGA,sCACA,qCACA,sDAIF,CAIE,kDAGA,CAPF,0CAOE,kBAEA,kDAEA,CAHA,eACA,CAFA,YACA,CADA,SAIA,mHAIE,CAGA,6CAFA,oCAeE,CAbF,yBACE,qBAEJ,CAGE,oBACA,CAEA,YAFA,2CACF,CACE,uBAEA,mFAEE,CALJ,oBACE,CAEA,UAEE,gCAGF,sDAEA,yCC7CJ,oCAGA,CD6CE,yXAQE,sCCrDJ,wCAGA,oCACE","sources":["webpack:///./node_modules/normalize.css/normalize.css","webpack:///./src/furo/assets/styles/base/_print.sass","webpack:///./src/furo/assets/styles/base/_screen-readers.sass","webpack:///./src/furo/assets/styles/base/_theme.sass","webpack:///./src/furo/assets/styles/variables/_fonts.scss","webpack:///./src/furo/assets/styles/variables/_spacing.scss","webpack:///./src/furo/assets/styles/variables/_icons.scss","webpack:///./src/furo/assets/styles/variables/_admonitions.scss","webpack:///./src/furo/assets/styles/variables/_colors.scss","webpack:///./src/furo/assets/styles/base/_typography.sass","webpack:///./src/furo/assets/styles/_scaffold.sass","webpack:///./src/furo/assets/styles/content/_admonitions.sass","webpack:///./src/furo/assets/styles/content/_api.sass","webpack:///./src/furo/assets/styles/content/_blocks.sass","webpack:///./src/furo/assets/styles/content/_captions.sass","webpack:///./src/furo/assets/styles/content/_code.sass","webpack:///./src/furo/assets/styles/content/_footnotes.sass","webpack:///./src/furo/assets/styles/content/_images.sass","webpack:///./src/furo/assets/styles/content/_indexes.sass","webpack:///./src/furo/assets/styles/content/_lists.sass","webpack:///./src/furo/assets/styles/content/_math.sass","webpack:///./src/furo/assets/styles/content/_misc.sass","webpack:///./src/furo/assets/styles/content/_rubrics.sass","webpack:///./src/furo/assets/styles/content/_sidebar.sass","webpack:///./src/furo/assets/styles/content/_tables.sass","webpack:///./src/furo/assets/styles/content/_target.sass","webpack:///./src/furo/assets/styles/content/_gui-labels.sass","webpack:///./src/furo/assets/styles/components/_footer.sass","webpack:///./src/furo/assets/styles/components/_sidebar.sass","webpack:///./src/furo/assets/styles/components/_table_of_contents.sass","webpack:///./src/furo/assets/styles/_shame.sass"],"sourcesContent":["/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */\n\n/* Document\n ========================================================================== */\n\n/**\n * 1. Correct the line height in all browsers.\n * 2. Prevent adjustments of font size after orientation changes in iOS.\n */\n\nhtml {\n line-height: 1.15; /* 1 */\n -webkit-text-size-adjust: 100%; /* 2 */\n}\n\n/* Sections\n ========================================================================== */\n\n/**\n * Remove the margin in all browsers.\n */\n\nbody {\n margin: 0;\n}\n\n/**\n * Render the `main` element consistently in IE.\n */\n\nmain {\n display: block;\n}\n\n/**\n * Correct the font size and margin on `h1` elements within `section` and\n * `article` contexts in Chrome, Firefox, and Safari.\n */\n\nh1 {\n font-size: 2em;\n margin: 0.67em 0;\n}\n\n/* Grouping content\n ========================================================================== */\n\n/**\n * 1. Add the correct box sizing in Firefox.\n * 2. Show the overflow in Edge and IE.\n */\n\nhr {\n box-sizing: content-box; /* 1 */\n height: 0; /* 1 */\n overflow: visible; /* 2 */\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\npre {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/* Text-level semantics\n ========================================================================== */\n\n/**\n * Remove the gray background on active links in IE 10.\n */\n\na {\n background-color: transparent;\n}\n\n/**\n * 1. Remove the bottom border in Chrome 57-\n * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari.\n */\n\nabbr[title] {\n border-bottom: none; /* 1 */\n text-decoration: underline; /* 2 */\n text-decoration: underline dotted; /* 2 */\n}\n\n/**\n * Add the correct font weight in Chrome, Edge, and Safari.\n */\n\nb,\nstrong {\n font-weight: bolder;\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\ncode,\nkbd,\nsamp {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/**\n * Add the correct font size in all browsers.\n */\n\nsmall {\n font-size: 80%;\n}\n\n/**\n * Prevent `sub` and `sup` elements from affecting the line height in\n * all browsers.\n */\n\nsub,\nsup {\n font-size: 75%;\n line-height: 0;\n position: relative;\n vertical-align: baseline;\n}\n\nsub {\n bottom: -0.25em;\n}\n\nsup {\n top: -0.5em;\n}\n\n/* Embedded content\n ========================================================================== */\n\n/**\n * Remove the border on images inside links in IE 10.\n */\n\nimg {\n border-style: none;\n}\n\n/* Forms\n ========================================================================== */\n\n/**\n * 1. Change the font styles in all browsers.\n * 2. Remove the margin in Firefox and Safari.\n */\n\nbutton,\ninput,\noptgroup,\nselect,\ntextarea {\n font-family: inherit; /* 1 */\n font-size: 100%; /* 1 */\n line-height: 1.15; /* 1 */\n margin: 0; /* 2 */\n}\n\n/**\n * Show the overflow in IE.\n * 1. Show the overflow in Edge.\n */\n\nbutton,\ninput { /* 1 */\n overflow: visible;\n}\n\n/**\n * Remove the inheritance of text transform in Edge, Firefox, and IE.\n * 1. Remove the inheritance of text transform in Firefox.\n */\n\nbutton,\nselect { /* 1 */\n text-transform: none;\n}\n\n/**\n * Correct the inability to style clickable types in iOS and Safari.\n */\n\nbutton,\n[type=\"button\"],\n[type=\"reset\"],\n[type=\"submit\"] {\n -webkit-appearance: button;\n}\n\n/**\n * Remove the inner border and padding in Firefox.\n */\n\nbutton::-moz-focus-inner,\n[type=\"button\"]::-moz-focus-inner,\n[type=\"reset\"]::-moz-focus-inner,\n[type=\"submit\"]::-moz-focus-inner {\n border-style: none;\n padding: 0;\n}\n\n/**\n * Restore the focus styles unset by the previous rule.\n */\n\nbutton:-moz-focusring,\n[type=\"button\"]:-moz-focusring,\n[type=\"reset\"]:-moz-focusring,\n[type=\"submit\"]:-moz-focusring {\n outline: 1px dotted ButtonText;\n}\n\n/**\n * Correct the padding in Firefox.\n */\n\nfieldset {\n padding: 0.35em 0.75em 0.625em;\n}\n\n/**\n * 1. Correct the text wrapping in Edge and IE.\n * 2. Correct the color inheritance from `fieldset` elements in IE.\n * 3. Remove the padding so developers are not caught out when they zero out\n * `fieldset` elements in all browsers.\n */\n\nlegend {\n box-sizing: border-box; /* 1 */\n color: inherit; /* 2 */\n display: table; /* 1 */\n max-width: 100%; /* 1 */\n padding: 0; /* 3 */\n white-space: normal; /* 1 */\n}\n\n/**\n * Add the correct vertical alignment in Chrome, Firefox, and Opera.\n */\n\nprogress {\n vertical-align: baseline;\n}\n\n/**\n * Remove the default vertical scrollbar in IE 10+.\n */\n\ntextarea {\n overflow: auto;\n}\n\n/**\n * 1. Add the correct box sizing in IE 10.\n * 2. Remove the padding in IE 10.\n */\n\n[type=\"checkbox\"],\n[type=\"radio\"] {\n box-sizing: border-box; /* 1 */\n padding: 0; /* 2 */\n}\n\n/**\n * Correct the cursor style of increment and decrement buttons in Chrome.\n */\n\n[type=\"number\"]::-webkit-inner-spin-button,\n[type=\"number\"]::-webkit-outer-spin-button {\n height: auto;\n}\n\n/**\n * 1. Correct the odd appearance in Chrome and Safari.\n * 2. Correct the outline style in Safari.\n */\n\n[type=\"search\"] {\n -webkit-appearance: textfield; /* 1 */\n outline-offset: -2px; /* 2 */\n}\n\n/**\n * Remove the inner padding in Chrome and Safari on macOS.\n */\n\n[type=\"search\"]::-webkit-search-decoration {\n -webkit-appearance: none;\n}\n\n/**\n * 1. Correct the inability to style clickable types in iOS and Safari.\n * 2. Change font properties to `inherit` in Safari.\n */\n\n::-webkit-file-upload-button {\n -webkit-appearance: button; /* 1 */\n font: inherit; /* 2 */\n}\n\n/* Interactive\n ========================================================================== */\n\n/*\n * Add the correct display in Edge, IE 10+, and Firefox.\n */\n\ndetails {\n display: block;\n}\n\n/*\n * Add the correct display in all browsers.\n */\n\nsummary {\n display: list-item;\n}\n\n/* Misc\n ========================================================================== */\n\n/**\n * Add the correct display in IE 10+.\n */\n\ntemplate {\n display: none;\n}\n\n/**\n * Add the correct display in IE 10.\n */\n\n[hidden] {\n display: none;\n}\n","// This file contains styles for managing print media.\n\n////////////////////////////////////////////////////////////////////////////////\n// Hide elements not relevant to print media.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n // Hide icon container.\n .content-icon-container\n display: none !important\n\n // Hide showing header links if hovering over when printing.\n .headerlink\n display: none !important\n\n // Hide mobile header.\n .mobile-header\n display: none !important\n\n // Hide navigation links.\n .related-pages\n display: none !important\n\n////////////////////////////////////////////////////////////////////////////////\n// Tweaks related to decolorization.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n // Apply a border around code which no longer have a color background.\n .highlight\n border: 0.1pt solid var(--color-foreground-border)\n\n////////////////////////////////////////////////////////////////////////////////\n// Avoid page break in some relevant cases.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n ul, ol, dl, a, table, pre, blockquote\n page-break-inside: avoid\n\n h1, h2, h3, h4, h5, h6, img, figure, caption\n page-break-inside: avoid\n page-break-after: avoid\n\n ul, ol, dl\n page-break-before: avoid\n",".visually-hidden\n position: absolute !important\n width: 1px !important\n height: 1px !important\n padding: 0 !important\n margin: -1px !important\n overflow: hidden !important\n clip: rect(0,0,0,0) !important\n white-space: nowrap !important\n border: 0 !important\n\n:-moz-focusring\n outline: auto\n","// This file serves as the \"skeleton\" of the theming logic.\n//\n// This contains the bulk of the logic for handling dark mode, color scheme\n// toggling and the handling of color-scheme-specific hiding of elements.\n\nbody\n @include fonts\n @include spacing\n @include icons\n @include admonitions\n @include default-admonition(#651fff, \"abstract\")\n @include default-topic(#14B8A6, \"pencil\")\n\n @include colors\n\n.only-light\n display: block !important\nhtml body .only-dark\n display: none !important\n\n// Ignore dark-mode hints if print media.\n@media not print\n // Enable dark-mode, if requested.\n body[data-theme=\"dark\"]\n @include colors-dark\n\n html & .only-light\n display: none !important\n .only-dark\n display: block !important\n\n // Enable dark mode, unless explicitly told to avoid.\n @media (prefers-color-scheme: dark)\n body:not([data-theme=\"light\"])\n @include colors-dark\n\n html & .only-light\n display: none !important\n .only-dark\n display: block !important\n\n//\n// Theme toggle presentation\n//\nbody[data-theme=\"auto\"]\n .theme-toggle svg.theme-icon-when-auto\n display: block\n\nbody[data-theme=\"dark\"]\n .theme-toggle svg.theme-icon-when-dark\n display: block\n\nbody[data-theme=\"light\"]\n .theme-toggle svg.theme-icon-when-light\n display: block\n","// Fonts used by this theme.\n//\n// There are basically two things here -- using the system font stack and\n// defining sizes for various elements in %ages. We could have also used `em`\n// but %age is easier to reason about for me.\n\n@mixin fonts {\n // These are adapted from https://systemfontstack.com/\n --font-stack: -apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial,\n sans-serif, Apple Color Emoji, Segoe UI Emoji;\n --font-stack--monospace: \"SFMono-Regular\", Menlo, Consolas, Monaco,\n Liberation Mono, Lucida Console, monospace;\n\n --font-size--normal: 100%;\n --font-size--small: 87.5%;\n --font-size--small--2: 81.25%;\n --font-size--small--3: 75%;\n --font-size--small--4: 62.5%;\n\n // Sidebar\n --sidebar-caption-font-size: var(--font-size--small--2);\n --sidebar-item-font-size: var(--font-size--small);\n --sidebar-search-input-font-size: var(--font-size--small);\n\n // Table of Contents\n --toc-font-size: var(--font-size--small--3);\n --toc-font-size--mobile: var(--font-size--normal);\n --toc-title-font-size: var(--font-size--small--4);\n\n // Admonitions\n //\n // These aren't defined in terms of %ages, since nesting these is permitted.\n --admonition-font-size: 0.8125rem;\n --admonition-title-font-size: 0.8125rem;\n\n // Code\n --code-font-size: var(--font-size--small--2);\n\n // API\n --api-font-size: var(--font-size--small);\n}\n","// Spacing for various elements on the page\n//\n// If the user wants to tweak things in a certain way, they are permitted to.\n// They also have to deal with the consequences though!\n\n@mixin spacing {\n // Header!\n --header-height: calc(\n var(--sidebar-item-line-height) + 4 * #{var(--sidebar-item-spacing-vertical)}\n );\n --header-padding: 0.5rem;\n\n // Sidebar\n --sidebar-tree-space-above: 1.5rem;\n --sidebar-caption-space-above: 1rem;\n\n --sidebar-item-line-height: 1rem;\n --sidebar-item-spacing-vertical: 0.5rem;\n --sidebar-item-spacing-horizontal: 1rem;\n --sidebar-item-height: calc(\n var(--sidebar-item-line-height) + 2 *#{var(--sidebar-item-spacing-vertical)}\n );\n\n --sidebar-expander-width: var(--sidebar-item-height); // be square\n\n --sidebar-search-space-above: 0.5rem;\n --sidebar-search-input-spacing-vertical: 0.5rem;\n --sidebar-search-input-spacing-horizontal: 0.5rem;\n --sidebar-search-input-height: 1rem;\n --sidebar-search-icon-size: var(--sidebar-search-input-height);\n\n // Table of Contents\n --toc-title-padding: 0.25rem 0;\n --toc-spacing-vertical: 1.5rem;\n --toc-spacing-horizontal: 1.5rem;\n --toc-item-spacing-vertical: 0.4rem;\n --toc-item-spacing-horizontal: 1rem;\n}\n","// Expose theme icons as CSS variables.\n\n$icons: (\n // Adapted from tabler-icons\n // url: https://tablericons.com/\n \"search\":\n url('data:image/svg+xml;charset=utf-8,'),\n // Factored out from mkdocs-material on 24-Aug-2020.\n // url: https://squidfunk.github.io/mkdocs-material/reference/admonitions/\n \"pencil\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"abstract\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"info\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"flame\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"question\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"warning\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"failure\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"spark\":\n url('data:image/svg+xml;charset=utf-8,')\n);\n\n@mixin icons {\n @each $name, $glyph in $icons {\n --icon-#{$name}: #{$glyph};\n }\n}\n","// Admonitions\n\n// Structure of these is:\n// admonition-class: color \"icon-name\";\n//\n// The colors are translated into CSS variables below. The icons are\n// used directly in the main declarations to set the `mask-image` in\n// the title.\n\n// prettier-ignore\n$admonitions: (\n // Each of these has an reST directives for it.\n \"caution\": #ff9100 \"spark\",\n \"warning\": #ff9100 \"warning\",\n \"danger\": #ff5252 \"spark\",\n \"attention\": #ff5252 \"warning\",\n \"error\": #ff5252 \"failure\",\n \"hint\": #00c852 \"question\",\n \"tip\": #00c852 \"info\",\n \"important\": #00bfa5 \"flame\",\n \"note\": #00b0ff \"pencil\",\n \"seealso\": #448aff \"info\",\n \"admonition-todo\": #808080 \"pencil\"\n);\n\n@mixin default-admonition($color, $icon-name) {\n --color-admonition-title: #{$color};\n --color-admonition-title-background: #{rgba($color, 0.2)};\n\n --icon-admonition-default: var(--icon-#{$icon-name});\n}\n\n@mixin default-topic($color, $icon-name) {\n --color-topic-title: #{$color};\n --color-topic-title-background: #{rgba($color, 0.2)};\n\n --icon-topic-default: var(--icon-#{$icon-name});\n}\n\n@mixin admonitions {\n @each $name, $values in $admonitions {\n --color-admonition-title--#{$name}: #{nth($values, 1)};\n --color-admonition-title-background--#{$name}: #{rgba(\n nth($values, 1),\n 0.2\n )};\n }\n}\n","// Colors used throughout this theme.\n//\n// The aim is to give the user more control. Thus, instead of hard-coding colors\n// in various parts of the stylesheet, the approach taken is to define all\n// colors as CSS variables and reusing them in all the places.\n//\n// `colors-dark` depends on `colors` being included at a lower specificity.\n\n@mixin colors {\n --color-problematic: #b30000;\n\n // Base Colors\n --color-foreground-primary: black; // for main text and headings\n --color-foreground-secondary: #5a5c63; // for secondary text\n --color-foreground-muted: #646776; // for muted text\n --color-foreground-border: #878787; // for content borders\n\n --color-background-primary: white; // for content\n --color-background-secondary: #f8f9fb; // for navigation + ToC\n --color-background-hover: #efeff4ff; // for navigation-item hover\n --color-background-hover--transparent: #efeff400;\n --color-background-border: #eeebee; // for UI borders\n --color-background-item: #ccc; // for \"background\" items (eg: copybutton)\n\n // Announcements\n --color-announcement-background: #000000dd;\n --color-announcement-text: #eeebee;\n\n // Brand colors\n --color-brand-primary: #2962ff;\n --color-brand-content: #2a5adf;\n\n // API documentation\n --color-api-background: var(--color-background-hover--transparent);\n --color-api-background-hover: var(--color-background-hover);\n --color-api-overall: var(--color-foreground-secondary);\n --color-api-name: var(--color-problematic);\n --color-api-pre-name: var(--color-problematic);\n --color-api-paren: var(--color-foreground-secondary);\n --color-api-keyword: var(--color-foreground-primary);\n --color-highlight-on-target: #ffffcc;\n\n // Inline code background\n --color-inline-code-background: var(--color-background-secondary);\n\n // Highlighted text (search)\n --color-highlighted-background: #ddeeff;\n --color-highlighted-text: var(--color-foreground-primary);\n\n // GUI Labels\n --color-guilabel-background: #ddeeff80;\n --color-guilabel-border: #bedaf580;\n --color-guilabel-text: var(--color-foreground-primary);\n\n // Admonitions!\n --color-admonition-background: transparent;\n\n //////////////////////////////////////////////////////////////////////////////\n // Everything below this should be one of:\n // - var(...)\n // - *-gradient(...)\n // - special literal values (eg: transparent, none)\n //////////////////////////////////////////////////////////////////////////////\n\n // Tables\n --color-table-header-background: var(--color-background-secondary);\n --color-table-border: var(--color-background-border);\n\n // Cards\n --color-card-border: var(--color-background-secondary);\n --color-card-background: transparent;\n --color-card-marginals-background: var(--color-background-secondary);\n\n // Header\n --color-header-background: var(--color-background-primary);\n --color-header-border: var(--color-background-border);\n --color-header-text: var(--color-foreground-primary);\n\n // Sidebar (left)\n --color-sidebar-background: var(--color-background-secondary);\n --color-sidebar-background-border: var(--color-background-border);\n\n --color-sidebar-brand-text: var(--color-foreground-primary);\n --color-sidebar-caption-text: var(--color-foreground-muted);\n --color-sidebar-link-text: var(--color-foreground-secondary);\n --color-sidebar-link-text--top-level: var(--color-brand-primary);\n\n --color-sidebar-item-background: var(--color-sidebar-background);\n --color-sidebar-item-background--current: var(\n --color-sidebar-item-background\n );\n --color-sidebar-item-background--hover: linear-gradient(\n 90deg,\n var(--color-background-hover--transparent) 0%,\n var(--color-background-hover) var(--sidebar-item-spacing-horizontal),\n var(--color-background-hover) 100%\n );\n\n --color-sidebar-item-expander-background: transparent;\n --color-sidebar-item-expander-background--hover: var(\n --color-background-hover\n );\n\n --color-sidebar-search-text: var(--color-foreground-primary);\n --color-sidebar-search-background: var(--color-background-secondary);\n --color-sidebar-search-background--focus: var(--color-background-primary);\n --color-sidebar-search-border: var(--color-background-border);\n --color-sidebar-search-icon: var(--color-foreground-muted);\n\n // Table of Contents (right)\n --color-toc-background: var(--color-background-primary);\n --color-toc-title-text: var(--color-foreground-muted);\n --color-toc-item-text: var(--color-foreground-secondary);\n --color-toc-item-text--hover: var(--color-foreground-primary);\n --color-toc-item-text--active: var(--color-brand-primary);\n\n // Actual page contents\n --color-content-foreground: var(--color-foreground-primary);\n --color-content-background: transparent;\n\n // Links\n --color-link: var(--color-brand-content);\n --color-link--hover: var(--color-brand-content);\n --color-link-underline: var(--color-background-border);\n --color-link-underline--hover: var(--color-foreground-border);\n}\n\n@mixin colors-dark {\n --color-problematic: #ee5151;\n\n // Base Colors\n --color-foreground-primary: #ffffffcc; // for main text and headings\n --color-foreground-secondary: #9ca0a5; // for secondary text\n --color-foreground-muted: #81868d; // for muted text\n --color-foreground-border: #666666; // for content borders\n\n --color-background-primary: #131416; // for content\n --color-background-secondary: #1a1c1e; // for navigation + ToC\n --color-background-hover: #1e2124ff; // for navigation-item hover\n --color-background-hover--transparent: #1e212400;\n --color-background-border: #303335; // for UI borders\n --color-background-item: #444; // for \"background\" items (eg: copybutton)\n\n // Announcements\n --color-announcement-background: #000000dd;\n --color-announcement-text: #eeebee;\n\n // Brand colors\n --color-brand-primary: #2b8cee;\n --color-brand-content: #368ce2;\n\n // Highlighted text (search)\n --color-highlighted-background: #083563;\n\n // GUI Labels\n --color-guilabel-background: #08356380;\n --color-guilabel-border: #13395f80;\n\n // API documentation\n --color-api-keyword: var(--color-foreground-secondary);\n --color-highlight-on-target: #333300;\n\n // Admonitions\n --color-admonition-background: #18181a;\n\n // Cards\n --color-card-border: var(--color-background-secondary);\n --color-card-background: #18181a;\n --color-card-marginals-background: var(--color-background-hover);\n}\n","// This file contains the styling for making the content throughout the page,\n// including fonts, paragraphs, headings and spacing among these elements.\n\nbody\n font-family: var(--font-stack)\npre,\ncode,\nkbd,\nsamp\n font-family: var(--font-stack--monospace)\n\n// Make fonts look slightly nicer.\nbody\n -webkit-font-smoothing: antialiased\n -moz-osx-font-smoothing: grayscale\n\n// Line height from Bootstrap 4.1\narticle\n line-height: 1.5\n\n//\n// Headings\n//\nh1,\nh2,\nh3,\nh4,\nh5,\nh6\n line-height: 1.25\n font-weight: bold\n\n border-radius: 0.5rem\n margin-top: 0.5rem\n margin-bottom: 0.5rem\n margin-left: -0.5rem\n margin-right: -0.5rem\n padding-left: 0.5rem\n padding-right: 0.5rem\n\n + p\n margin-top: 0\n\nh1\n font-size: 2.5em\n margin-top: 1.75rem\n margin-bottom: 1rem\nh2\n font-size: 2em\n margin-top: 1.75rem\nh3\n font-size: 1.5em\nh4\n font-size: 1.25em\nh5\n font-size: 1.125em\nh6\n font-size: 1em\n\nsmall\n opacity: 75%\n font-size: 80%\n\n// Paragraph\np\n margin-top: 0.5rem\n margin-bottom: 0.75rem\n\n// Horizontal rules\nhr.docutils\n height: 1px\n padding: 0\n margin: 2rem 0\n background-color: var(--color-background-border)\n border: 0\n\n.centered\n text-align: center\n\n// Links\na\n text-decoration: underline\n\n color: var(--color-link)\n text-decoration-color: var(--color-link-underline)\n\n &:hover\n color: var(--color-link--hover)\n text-decoration-color: var(--color-link-underline--hover)\n &.muted-link\n color: inherit\n &:hover\n color: var(--color-link)\n text-decoration-color: var(--color-link-underline--hover)\n","// This file contains the styles for the overall layouting of the documentation\n// skeleton, including the responsive changes as well as sidebar toggles.\n//\n// This is implemented as a mobile-last design, which isn't ideal, but it is\n// reasonably good-enough and I got pretty tired by the time I'd finished this\n// to move the rules around to fix this. Shouldn't take more than 3-4 hours,\n// if you know what you're doing tho.\n\n// HACK: Not all browsers account for the scrollbar width in media queries.\n// This results in horizontal scrollbars in the breakpoint where we go\n// from displaying everything to hiding the ToC. We accomodate for this by\n// adding a bit of padding to the TOC drawer, disabling the horizontal\n// scrollbar and allowing the scrollbars to cover the padding.\n// https://www.456bereastreet.com/archive/201301/media_query_width_and_vertical_scrollbars/\n\n// HACK: Always having the scrollbar visible, prevents certain browsers from\n// causing the content to stutter horizontally between taller-than-viewport and\n// not-taller-than-viewport pages.\n\nhtml\n overflow-x: hidden\n overflow-y: scroll\n scroll-behavior: smooth\n\n.sidebar-scroll, .toc-scroll, article[role=main] *\n // Override Firefox scrollbar style\n scrollbar-width: thin\n scrollbar-color: var(--color-foreground-border) transparent\n\n // Override Chrome scrollbar styles\n &::-webkit-scrollbar\n width: 0.25rem\n height: 0.25rem\n &::-webkit-scrollbar-thumb\n background-color: var(--color-foreground-border)\n border-radius: 0.125rem\n\n//\n// Overalls\n//\nhtml,\nbody\n height: 100%\n color: var(--color-foreground-primary)\n background: var(--color-background-primary)\n\narticle\n color: var(--color-content-foreground)\n background: var(--color-content-background)\n overflow-wrap: break-word\n\n.page\n display: flex\n // fill the viewport for pages with little content.\n min-height: 100%\n\n.mobile-header\n width: 100%\n height: var(--header-height)\n background-color: var(--color-header-background)\n color: var(--color-header-text)\n border-bottom: 1px solid var(--color-header-border)\n\n // Looks like sub-script/super-script have this, and we need this to\n // be \"on top\" of those.\n z-index: 10\n\n // We don't show the header on large screens.\n display: none\n\n // Add shadow when scrolled\n &.scrolled\n border-bottom: none\n box-shadow: 0 0 0.2rem rgba(0, 0, 0, 0.1), 0 0.2rem 0.4rem rgba(0, 0, 0, 0.2)\n\n .header-center\n a\n color: var(--color-header-text)\n text-decoration: none\n\n.main\n display: flex\n flex: 1\n\n// Sidebar (left) also covers the entire left portion of screen.\n.sidebar-drawer\n box-sizing: border-box\n\n border-right: 1px solid var(--color-sidebar-background-border)\n background: var(--color-sidebar-background)\n\n display: flex\n justify-content: flex-end\n // These next two lines took me two days to figure out.\n width: calc((100% - #{$full-width}) / 2 + #{$sidebar-width})\n min-width: $sidebar-width\n\n// Scroll-along sidebars\n.sidebar-container,\n.toc-drawer\n box-sizing: border-box\n width: $sidebar-width\n\n.toc-drawer\n background: var(--color-toc-background)\n // See HACK described on top of this document\n padding-right: 1rem\n\n.sidebar-sticky,\n.toc-sticky\n position: sticky\n top: 0\n height: min(100%, 100vh)\n height: 100vh\n\n display: flex\n flex-direction: column\n\n.sidebar-scroll,\n.toc-scroll\n flex-grow: 1\n flex-shrink: 1\n\n overflow: auto\n scroll-behavior: smooth\n\n// Central items.\n.content\n padding: 0 $content-padding\n width: $content-width\n\n display: flex\n flex-direction: column\n justify-content: space-between\n\n.icon\n display: inline-block\n height: 1rem\n width: 1rem\n svg\n width: 100%\n height: 100%\n\n//\n// Accommodate announcement banner\n//\n.announcement\n background-color: var(--color-announcement-background)\n color: var(--color-announcement-text)\n\n height: var(--header-height)\n display: flex\n align-items: center\n overflow-x: auto\n & + .page\n min-height: calc(100% - var(--header-height))\n\n.announcement-content\n box-sizing: border-box\n padding: 0.5rem\n min-width: 100%\n white-space: nowrap\n text-align: center\n\n a\n color: var(--color-announcement-text)\n text-decoration-color: var(--color-announcement-text)\n\n &:hover\n color: var(--color-announcement-text)\n text-decoration-color: var(--color-link--hover)\n\n////////////////////////////////////////////////////////////////////////////////\n// Toggles for theme\n////////////////////////////////////////////////////////////////////////////////\n.no-js .theme-toggle-container // don't show theme toggle if there's no JS\n display: none\n\n.theme-toggle-container\n vertical-align: middle\n\n.theme-toggle\n cursor: pointer\n border: none\n padding: 0\n background: transparent\n\n.theme-toggle svg\n vertical-align: middle\n height: 1rem\n width: 1rem\n color: var(--color-foreground-primary)\n display: none\n\n.theme-toggle-header\n float: left\n padding: 1rem 0.5rem\n\n////////////////////////////////////////////////////////////////////////////////\n// Toggles for elements\n////////////////////////////////////////////////////////////////////////////////\n.toc-overlay-icon, .nav-overlay-icon\n display: none\n cursor: pointer\n\n .icon\n color: var(--color-foreground-secondary)\n height: 1rem\n width: 1rem\n\n.toc-header-icon, .nav-overlay-icon\n // for when we set display: flex\n justify-content: center\n align-items: center\n\n.toc-content-icon\n height: 1.5rem\n width: 1.5rem\n\n.content-icon-container\n float: right\n display: flex\n margin-top: 1.5rem\n margin-left: 1rem\n margin-bottom: 1rem\n gap: 0.5rem\n\n .edit-this-page svg\n color: inherit\n height: 1rem\n width: 1rem\n\n.sidebar-toggle\n position: absolute\n display: none\n// \n.sidebar-toggle[name=\"__toc\"]\n left: 20px\n.sidebar-toggle:checked\n left: 40px\n// \n\n.overlay\n position: fixed\n top: 0\n width: 0\n height: 0\n\n transition: width 0ms, height 0ms, opacity 250ms ease-out\n\n opacity: 0\n background-color: rgba(0, 0, 0, 0.54)\n.sidebar-overlay\n z-index: 20\n.toc-overlay\n z-index: 40\n\n// Keep things on top and smooth.\n.sidebar-drawer\n z-index: 30\n transition: left 250ms ease-in-out\n.toc-drawer\n z-index: 50\n transition: right 250ms ease-in-out\n\n// Show the Sidebar\n#__navigation:checked\n & ~ .sidebar-overlay\n width: 100%\n height: 100%\n opacity: 1\n & ~ .page\n .sidebar-drawer\n top: 0\n left: 0\n // Show the toc sidebar\n#__toc:checked\n & ~ .toc-overlay\n width: 100%\n height: 100%\n opacity: 1\n & ~ .page\n .toc-drawer\n top: 0\n right: 0\n\n////////////////////////////////////////////////////////////////////////////////\n// Back to top\n////////////////////////////////////////////////////////////////////////////////\n.back-to-top\n text-decoration: none\n\n display: none\n position: fixed\n left: 0\n top: 1rem\n padding: 0.5rem\n padding-right: 0.75rem\n border-radius: 1rem\n font-size: 0.8125rem\n\n background: var(--color-background-primary)\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), #6b728080 0px 0px 1px 0px\n\n z-index: 10\n\n margin-left: 50%\n transform: translateX(-50%)\n svg\n height: 1rem\n width: 1rem\n fill: currentColor\n display: inline-block\n\n span\n margin-left: 0.25rem\n\n .show-back-to-top &\n display: flex\n align-items: center\n\n////////////////////////////////////////////////////////////////////////////////\n// Responsive layouting\n////////////////////////////////////////////////////////////////////////////////\n// Make things a bit bigger on bigger screens.\n@media (min-width: $full-width + $sidebar-width)\n html\n font-size: 110%\n\n@media (max-width: $full-width)\n // Collapse \"toc\" into the icon.\n .toc-content-icon\n display: flex\n .toc-drawer\n position: fixed\n height: 100vh\n top: 0\n right: -$sidebar-width\n border-left: 1px solid var(--color-background-muted)\n .toc-tree\n border-left: none\n font-size: var(--toc-font-size--mobile)\n\n // Accomodate for a changed content width.\n .sidebar-drawer\n width: calc((100% - #{$full-width - $sidebar-width}) / 2 + #{$sidebar-width})\n\n@media (max-width: $full-width - $sidebar-width)\n // Collapse \"navigation\".\n .nav-overlay-icon\n display: flex\n .sidebar-drawer\n position: fixed\n height: 100vh\n width: $sidebar-width\n\n top: 0\n left: -$sidebar-width\n\n // Swap which icon is visible.\n .toc-header-icon\n display: flex\n .toc-content-icon, .theme-toggle-content\n display: none\n .theme-toggle-header\n display: block\n\n // Show the header.\n .mobile-header\n position: sticky\n top: 0\n display: flex\n justify-content: space-between\n align-items: center\n\n .header-left,\n .header-right\n display: flex\n height: var(--header-height)\n padding: 0 var(--header-padding)\n label\n height: 100%\n width: 100%\n user-select: none\n\n .nav-overlay-icon .icon,\n .theme-toggle svg\n height: 1.25rem\n width: 1.25rem\n\n // Add a scroll margin for the content\n :target\n scroll-margin-top: var(--header-height)\n\n // Show back-to-top below the header\n .back-to-top\n top: calc(var(--header-height) + 0.5rem)\n\n // Center the page, and accommodate for the header.\n .page\n flex-direction: column\n justify-content: center\n .content\n margin-left: auto\n margin-right: auto\n\n@media (max-width: $content-width + 2* $content-padding)\n // Content should respect window limits.\n .content\n width: 100%\n overflow-x: auto\n\n@media (max-width: $content-width)\n .content\n padding: 0 $content-padding--small\n // Don't float sidebars to the right.\n article aside.sidebar\n float: none\n width: 100%\n margin: 1rem 0\n","//\n// The design here is strongly inspired by mkdocs-material.\n.admonition, .topic\n margin: 1rem auto\n padding: 0 0.5rem 0.5rem 0.5rem\n\n background: var(--color-admonition-background)\n\n border-radius: 0.2rem\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n font-size: var(--admonition-font-size)\n\n overflow: hidden\n page-break-inside: avoid\n\n // First element should have no margin, since the title has it.\n > :nth-child(2)\n margin-top: 0\n\n // Last item should have no margin, since we'll control that w/ padding\n > :last-child\n margin-bottom: 0\n\n.admonition p.admonition-title,\np.topic-title\n position: relative\n margin: 0 -0.5rem 0.5rem\n padding-left: 2rem\n padding-right: .5rem\n padding-top: .4rem\n padding-bottom: .4rem\n\n font-weight: 500\n font-size: var(--admonition-title-font-size)\n line-height: 1.3\n\n // Our fancy icon\n &::before\n content: \"\"\n position: absolute\n left: 0.5rem\n width: 1rem\n height: 1rem\n\n// Default styles\np.admonition-title\n background-color: var(--color-admonition-title-background)\n &::before\n background-color: var(--color-admonition-title)\n mask-image: var(--icon-admonition-default)\n mask-repeat: no-repeat\n\np.topic-title\n background-color: var(--color-topic-title-background)\n &::before\n background-color: var(--color-topic-title)\n mask-image: var(--icon-topic-default)\n mask-repeat: no-repeat\n\n//\n// Variants\n//\n.admonition\n border-left: 0.2rem solid var(--color-admonition-title)\n\n @each $type, $value in $admonitions\n &.#{$type}\n border-left-color: var(--color-admonition-title--#{$type})\n > .admonition-title\n background-color: var(--color-admonition-title-background--#{$type})\n &::before\n background-color: var(--color-admonition-title--#{$type})\n mask-image: var(--icon-#{nth($value, 2)})\n\n.admonition-todo > .admonition-title\n text-transform: uppercase\n","// This file stylizes the API documentation (stuff generated by autodoc). It's\n// deeply nested due to how autodoc structures the HTML without enough classes\n// to select the relevant items.\n\n// API docs!\ndl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)\n // Tweak the spacing of all the things!\n dd\n margin-left: 2rem\n > :first-child\n margin-top: 0.125rem\n > :last-child\n margin-bottom: 0.75rem\n\n // This is used for the arguments\n .field-list\n margin-bottom: 0.75rem\n\n // \"Headings\" (like \"Parameters\" and \"Return\")\n > dt\n text-transform: uppercase\n font-size: var(--font-size--small)\n\n dd:empty\n margin-bottom: 0.5rem\n dd > ul\n margin-left: -1.2rem\n > li\n > p:nth-child(2)\n margin-top: 0\n // When the last-empty-paragraph follows a paragraph, it doesn't need\n // to augument the existing spacing.\n > p + p:last-child:empty\n margin-top: 0\n margin-bottom: 0\n\n // Colorize the elements\n > dt\n color: var(--color-api-overall)\n\n.sig:not(.sig-inline)\n font-weight: bold\n\n font-size: var(--api-font-size)\n font-family: var(--font-stack--monospace)\n\n margin-left: -0.25rem\n margin-right: -0.25rem\n padding-top: 0.25rem\n padding-bottom: 0.25rem\n padding-right: 0.5rem\n\n // These are intentionally em, to properly match the font size.\n padding-left: 3em\n text-indent: -2.5em\n\n border-radius: 0.25rem\n\n background: var(--color-api-background)\n transition: background 100ms ease-out\n\n &:hover\n background: var(--color-api-background-hover)\n\n // adjust the size of the [source] link on the right.\n a.reference\n .viewcode-link\n font-weight: normal\n width: 3.5rem\n\nem.property\n font-style: normal\n &:first-child\n color: var(--color-api-keyword)\n.sig-name\n color: var(--color-api-name)\n.sig-prename\n font-weight: normal\n color: var(--color-api-pre-name)\n.sig-paren\n color: var(--color-api-paren)\n.sig-param\n font-style: normal\n\n.versionmodified\n font-style: italic\ndiv.versionadded, div.versionchanged, div.deprecated\n p\n margin-top: 0.125rem\n margin-bottom: 0.125rem\n\n// Align the [docs] and [source] to the right.\n.viewcode-link, .viewcode-back\n float: right\n text-align: right\n",".line-block\n margin-top: 0.5rem\n margin-bottom: 0.75rem\n .line-block\n margin-top: 0rem\n margin-bottom: 0rem\n padding-left: 1rem\n","// Captions\narticle p.caption,\ntable > caption,\n.code-block-caption\n font-size: var(--font-size--small)\n text-align: center\n\n// Caption above a TOCTree\n.toctree-wrapper.compound\n .caption, :not(.caption) > .caption-text\n font-size: var(--font-size--small)\n text-transform: uppercase\n\n text-align: initial\n margin-bottom: 0\n\n > ul\n margin-top: 0\n margin-bottom: 0\n","// Inline code\ncode.literal, .sig-inline\n background: var(--color-inline-code-background)\n border-radius: 0.2em\n // Make the font smaller, and use padding to recover.\n font-size: var(--font-size--small--2)\n padding: 0.1em 0.2em\n\n pre.literal-block &\n font-size: inherit\n padding: 0\n\n p &\n border: 1px solid var(--color-background-border)\n\n.sig-inline\n font-family: var(--font-stack--monospace)\n\n// Code and Literal Blocks\n$code-spacing-vertical: 0.625rem\n$code-spacing-horizontal: 0.875rem\n\n// Wraps every literal block + line numbers.\ndiv[class*=\" highlight-\"],\ndiv[class^=\"highlight-\"]\n margin: 1em 0\n display: flex\n\n .table-wrapper\n margin: 0\n padding: 0\n\npre\n margin: 0\n padding: 0\n overflow: auto\n\n // Needed to have more specificity than pygments' \"pre\" selector. :(\n article[role=\"main\"] .highlight &\n line-height: 1.5\n\n &.literal-block,\n .highlight &\n font-size: var(--code-font-size)\n padding: $code-spacing-vertical $code-spacing-horizontal\n\n // Make it look like all the other blocks.\n &.literal-block\n margin-top: 1rem\n margin-bottom: 1rem\n\n border-radius: 0.2rem\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n\n// All code is always contained in this.\n.highlight\n width: 100%\n border-radius: 0.2rem\n\n // Make line numbers and prompts un-selectable.\n .gp, span.linenos\n user-select: none\n pointer-events: none\n\n // Expand the line-highlighting.\n .hll\n display: block\n margin-left: -$code-spacing-horizontal\n margin-right: -$code-spacing-horizontal\n padding-left: $code-spacing-horizontal\n padding-right: $code-spacing-horizontal\n\n/* Make code block captions be nicely integrated */\n.code-block-caption\n display: flex\n padding: $code-spacing-vertical $code-spacing-horizontal\n\n border-radius: 0.25rem\n border-bottom-left-radius: 0\n border-bottom-right-radius: 0\n font-weight: 300\n border-bottom: 1px solid\n\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n border-color: var(--color-background-border)\n\n + div[class]\n margin-top: 0\n pre\n border-top-left-radius: 0\n border-top-right-radius: 0\n\n// When `html_codeblock_linenos_style` is table.\n.highlighttable\n width: 100%\n display: block\n tbody\n display: block\n\n tr\n display: flex\n\n // Line numbers\n td.linenos\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n padding: $code-spacing-vertical $code-spacing-horizontal\n padding-right: 0\n border-top-left-radius: 0.2rem\n border-bottom-left-radius: 0.2rem\n\n .linenodiv\n padding-right: $code-spacing-horizontal\n font-size: var(--code-font-size)\n box-shadow: -0.0625rem 0 var(--color-foreground-border) inset\n\n // Actual code\n td.code\n padding: 0\n display: block\n flex: 1\n overflow: hidden\n\n .highlight\n border-top-left-radius: 0\n border-bottom-left-radius: 0\n\n// When `html_codeblock_linenos_style` is inline.\n.highlight\n span.linenos\n display: inline-block\n padding-left: 0\n padding-right: $code-spacing-horizontal\n margin-right: $code-spacing-horizontal\n box-shadow: -0.0625rem 0 var(--color-foreground-border) inset\n","// Inline Footnote Reference\n.footnote-reference\n font-size: var(--font-size--small--4)\n vertical-align: super\n\n// Definition list, listing the content of each note.\n// docutils <= 0.17\ndl.footnote.brackets\n font-size: var(--font-size--small)\n color: var(--color-foreground-secondary)\n\n display: grid\n grid-template-columns: max-content auto\n dt\n margin: 0\n > .fn-backref\n margin-left: 0.25rem\n\n &:after\n content: \":\"\n\n .brackets\n &:before\n content: \"[\"\n &:after\n content: \"]\"\n\n dd\n margin: 0\n padding: 0 1rem\n\n// docutils >= 0.18\naside.footnote\n font-size: var(--font-size--small)\n color: var(--color-foreground-secondary)\n\naside.footnote > span,\ndiv.citation > span\n float: left\n font-weight: 500\n padding-right: 0.25rem\n\naside.footnote > p,\ndiv.citation > p\n margin-left: 2rem\n","//\n// Figures\n//\nimg\n box-sizing: border-box\n max-width: 100%\n height: auto\n\narticle\n figure, .figure\n border-radius: 0.2rem\n\n margin: 0\n :last-child\n margin-bottom: 0\n\n .align-left\n float: left\n clear: left\n margin: 0 1rem 1rem\n\n .align-right\n float: right\n clear: right\n margin: 0 1rem 1rem\n\n .align-default,\n .align-center\n display: block\n text-align: center\n margin-left: auto\n margin-right: auto\n\n // WELL, table needs to be stylised like a table.\n table.align-default\n display: table\n text-align: initial\n",".genindex-jumpbox, .domainindex-jumpbox\n border-top: 1px solid var(--color-background-border)\n border-bottom: 1px solid var(--color-background-border)\n padding: 0.25rem\n\n.genindex-section, .domainindex-section\n h2\n margin-top: 0.75rem\n margin-bottom: 0.5rem\n ul\n margin-top: 0\n margin-bottom: 0\n","ul,\nol\n padding-left: 1.2rem\n\n // Space lists out like paragraphs\n margin-top: 1rem\n margin-bottom: 1rem\n // reduce margins within li.\n li\n > p:first-child\n margin-top: 0.25rem\n margin-bottom: 0.25rem\n\n > p:last-child\n margin-top: 0.25rem\n\n > ul,\n > ol\n margin-top: 0.5rem\n margin-bottom: 0.5rem\n\nol\n &.arabic\n list-style: decimal\n &.loweralpha\n list-style: lower-alpha\n &.upperalpha\n list-style: upper-alpha\n &.lowerroman\n list-style: lower-roman\n &.upperroman\n list-style: upper-roman\n\n// Don't space lists out when they're \"simple\" or in a `.. toctree::`\n.simple,\n.toctree-wrapper\n li\n > ul,\n > ol\n margin-top: 0\n margin-bottom: 0\n\n// Definition Lists\n.field-list,\n.option-list,\ndl:not([class]),\ndl.simple,\ndl.footnote,\ndl.glossary\n dt\n font-weight: 500\n margin-top: 0.25rem\n + dt\n margin-top: 0\n\n .classifier::before\n content: \":\"\n margin-left: 0.2rem\n margin-right: 0.2rem\n\n dd\n > p:first-child,\n ul\n margin-top: 0.125rem\n\n ul\n margin-bottom: 0.125rem\n",".math-wrapper\n width: 100%\n overflow-x: auto\n\ndiv.math\n position: relative\n text-align: center\n\n .headerlink,\n &:focus .headerlink\n display: none\n\n &:hover .headerlink\n display: inline-block\n\n span.eqno\n position: absolute\n right: 0.5rem\n top: 50%\n transform: translate(0, -50%)\n z-index: 1\n","// Abbreviations\nabbr[title]\n cursor: help\n\n// \"Problematic\" content, as identified by Sphinx\n.problematic\n color: var(--color-problematic)\n\n// Keyboard / Mouse \"instructions\"\nkbd:not(.compound)\n margin: 0 0.2rem\n padding: 0 0.2rem\n border-radius: 0.2rem\n border: 1px solid var(--color-foreground-border)\n color: var(--color-foreground-primary)\n vertical-align: text-bottom\n\n font-size: var(--font-size--small--3)\n display: inline-block\n\n box-shadow: 0 0.0625rem 0 rgba(0, 0, 0, 0.2), inset 0 0 0 0.125rem var(--color-background-primary)\n\n background-color: var(--color-background-secondary)\n\n// Blockquote\nblockquote\n border-left: 4px solid var(--color-background-border)\n background: var(--color-background-secondary)\n\n margin-left: 0\n margin-right: 0\n padding: 0.5rem 1rem\n\n .attribution\n font-weight: 600\n text-align: right\n\n &.pull-quote,\n &.highlights\n font-size: 1.25em\n\n &.epigraph,\n &.pull-quote\n border-left-width: 0\n border-radius: 0.5rem\n\n &.highlights\n border-left-width: 0\n background: transparent\n\n// Center align embedded-in-text images\np .reference img\n vertical-align: middle\n","p.rubric\n line-height: 1.25\n font-weight: bold\n font-size: 1.125em\n\n // For Numpy-style documentation that's got rubrics within it.\n // https://github.com/pradyunsg/furo/discussions/505\n dd &\n line-height: inherit\n font-weight: inherit\n\n font-size: var(--font-size--small)\n text-transform: uppercase\n","article .sidebar\n float: right\n clear: right\n width: 30%\n\n margin-left: 1rem\n margin-right: 0\n\n border-radius: 0.2rem\n background-color: var(--color-background-secondary)\n border: var(--color-background-border) 1px solid\n\n > *\n padding-left: 1rem\n padding-right: 1rem\n\n > ul, > ol // lists need additional padding, because bullets.\n padding-left: 2.2rem\n\n .sidebar-title\n margin: 0\n padding: 0.5rem 1rem\n border-bottom: var(--color-background-border) 1px solid\n\n font-weight: 500\n\n// TODO: subtitle\n// TODO: dedicated variables?\n",".table-wrapper\n width: 100%\n overflow-x: auto\n margin-top: 1rem\n margin-bottom: 0.5rem\n padding: 0.2rem 0.2rem 0.75rem\n\ntable.docutils\n border-radius: 0.2rem\n border-spacing: 0\n border-collapse: collapse\n\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n th\n background: var(--color-table-header-background)\n\n td,\n th\n // Space things out properly\n padding: 0 0.25rem\n\n // Get the borders looking just-right.\n border-left: 1px solid var(--color-table-border)\n border-right: 1px solid var(--color-table-border)\n border-bottom: 1px solid var(--color-table-border)\n\n p\n margin: 0.25rem\n\n &:first-child\n border-left: none\n &:last-child\n border-right: none\n\n // MyST-parser tables set these classes for control of column alignment\n &.text-left\n text-align: left\n &.text-right\n text-align: right\n &.text-center\n text-align: center\n",":target\n scroll-margin-top: 0.5rem\n\n@media (max-width: $full-width - $sidebar-width)\n :target\n scroll-margin-top: calc(0.5rem + var(--header-height))\n\n // When a heading is selected\n section > span:target\n scroll-margin-top: calc(0.8rem + var(--header-height))\n\n// Permalinks\n.headerlink\n font-weight: 100\n user-select: none\n\nh1,\nh2,\nh3,\nh4,\nh5,\nh6,\ndl dt,\np.caption,\nfigcaption p,\ntable > caption,\n.code-block-caption\n > .headerlink\n margin-left: 0.5rem\n visibility: hidden\n &:hover > .headerlink\n visibility: visible\n\n // Don't change to link-like, if someone adds the contents directive.\n > .toc-backref\n color: inherit\n text-decoration-line: none\n\n// Figure and table captions are special.\nfigure:hover > figcaption > p > .headerlink,\ntable:hover > caption > .headerlink\n visibility: visible\n\n:target >, // Regular section[id] style anchors\nspan:target ~ // Non-regular span[id] style \"extra\" anchors\n h1,\n h2,\n h3,\n h4,\n h5,\n h6\n &:nth-of-type(1)\n background-color: var(--color-highlight-on-target)\n // .headerlink\n // visibility: visible\n code.literal\n background-color: transparent\n\ntable:target > caption,\nfigure:target\n background-color: var(--color-highlight-on-target)\n\n// Inline page contents\n.this-will-duplicate-information-and-it-is-still-useful-here li :target\n background-color: var(--color-highlight-on-target)\n\n// Code block permalinks\n.literal-block-wrapper:target .code-block-caption\n background-color: var(--color-highlight-on-target)\n\n// When a definition list item is selected\n//\n// There isn't really an alternative to !important here, due to the\n// high-specificity of API documentation's selector.\ndt:target\n background-color: var(--color-highlight-on-target) !important\n\n// When a footnote reference is selected\n.footnote > dt:target + dd,\n.footnote-reference:target\n background-color: var(--color-highlight-on-target)\n",".guilabel\n background-color: var(--color-guilabel-background)\n border: 1px solid var(--color-guilabel-border)\n color: var(--color-guilabel-text)\n\n padding: 0 0.3em\n border-radius: 0.5em\n font-size: 0.9em\n","// This file contains the styles used for stylizing the footer that's shown\n// below the content.\n\nfooter\n font-size: var(--font-size--small)\n display: flex\n flex-direction: column\n\n margin-top: 2rem\n\n// Bottom of page information\n.bottom-of-page\n display: flex\n align-items: center\n justify-content: space-between\n\n margin-top: 1rem\n padding-top: 1rem\n padding-bottom: 1rem\n\n color: var(--color-foreground-secondary)\n border-top: 1px solid var(--color-background-border)\n\n line-height: 1.5\n\n @media (max-width: $content-width)\n text-align: center\n flex-direction: column-reverse\n gap: 0.25rem\n\n .left-details\n font-size: var(--font-size--small)\n\n .right-details\n display: flex\n flex-direction: column\n gap: 0.25rem\n text-align: right\n\n .icons\n display: flex\n justify-content: flex-end\n gap: 0.25rem\n font-size: 1rem\n\n a\n text-decoration: none\n\n svg,\n img\n font-size: 1.125rem\n height: 1em\n width: 1em\n\n// Next/Prev page information\n.related-pages\n a\n display: flex\n align-items: center\n\n text-decoration: none\n &:hover .page-info .title\n text-decoration: underline\n color: var(--color-link)\n text-decoration-color: var(--color-link-underline)\n\n svg.furo-related-icon,\n svg.furo-related-icon > use\n flex-shrink: 0\n\n color: var(--color-foreground-border)\n\n width: 0.75rem\n height: 0.75rem\n margin: 0 0.5rem\n\n &.next-page\n max-width: 50%\n\n float: right\n clear: right\n text-align: right\n\n &.prev-page\n max-width: 50%\n\n float: left\n clear: left\n\n svg\n transform: rotate(180deg)\n\n.page-info\n display: flex\n flex-direction: column\n overflow-wrap: anywhere\n\n .next-page &\n align-items: flex-end\n\n .context\n display: flex\n align-items: center\n\n padding-bottom: 0.1rem\n\n color: var(--color-foreground-muted)\n font-size: var(--font-size--small)\n text-decoration: none\n","// This file contains the styles for the contents of the left sidebar, which\n// contains the navigation tree, logo, search etc.\n\n////////////////////////////////////////////////////////////////////////////////\n// Brand on top of the scrollable tree.\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-brand\n display: flex\n flex-direction: column\n flex-shrink: 0\n\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n text-decoration: none\n\n.sidebar-brand-text\n color: var(--color-sidebar-brand-text)\n overflow-wrap: break-word\n margin: var(--sidebar-item-spacing-vertical) 0\n font-size: 1.5rem\n\n.sidebar-logo-container\n margin: var(--sidebar-item-spacing-vertical) 0\n\n.sidebar-logo\n margin: 0 auto\n display: block\n max-width: 100%\n\n////////////////////////////////////////////////////////////////////////////////\n// Search\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-search-container\n display: flex\n align-items: center\n margin-top: var(--sidebar-search-space-above)\n\n position: relative\n\n background: var(--color-sidebar-search-background)\n &:hover,\n &:focus-within\n background: var(--color-sidebar-search-background--focus)\n\n &::before\n content: \"\"\n position: absolute\n left: var(--sidebar-item-spacing-horizontal)\n width: var(--sidebar-search-icon-size)\n height: var(--sidebar-search-icon-size)\n\n background-color: var(--color-sidebar-search-icon)\n mask-image: var(--icon-search)\n\n.sidebar-search\n box-sizing: border-box\n\n border: none\n border-top: 1px solid var(--color-sidebar-search-border)\n border-bottom: 1px solid var(--color-sidebar-search-border)\n\n padding-top: var(--sidebar-search-input-spacing-vertical)\n padding-bottom: var(--sidebar-search-input-spacing-vertical)\n padding-right: var(--sidebar-search-input-spacing-horizontal)\n padding-left: calc(var(--sidebar-item-spacing-horizontal) + var(--sidebar-search-input-spacing-horizontal) + var(--sidebar-search-icon-size))\n\n width: 100%\n\n color: var(--color-sidebar-search-foreground)\n background: transparent\n z-index: 10\n\n &:focus\n outline: none\n\n &::placeholder\n font-size: var(--sidebar-search-input-font-size)\n\n//\n// Hide Search Matches link\n//\n#searchbox .highlight-link\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal) 0\n margin: 0\n text-align: center\n\n a\n color: var(--color-sidebar-search-icon)\n font-size: var(--font-size--small--2)\n\n////////////////////////////////////////////////////////////////////////////////\n// Structure/Skeleton of the navigation tree (left)\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-tree\n font-size: var(--sidebar-item-font-size)\n margin-top: var(--sidebar-tree-space-above)\n margin-bottom: var(--sidebar-item-spacing-vertical)\n\n ul\n padding: 0\n margin-top: 0\n margin-bottom: 0\n\n display: flex\n flex-direction: column\n\n list-style: none\n\n li\n position: relative\n margin: 0\n\n > ul\n margin-left: var(--sidebar-item-spacing-horizontal)\n\n .icon\n color: var(--color-sidebar-link-text)\n\n .reference\n box-sizing: border-box\n color: var(--color-sidebar-link-text)\n\n // Fill the parent.\n display: inline-block\n line-height: var(--sidebar-item-line-height)\n text-decoration: none\n\n // Don't allow long words to cause wrapping.\n overflow-wrap: anywhere\n\n height: 100%\n width: 100%\n\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n\n &:hover\n background: var(--color-sidebar-item-background--hover)\n\n // Add a nice little \"external-link\" arrow here.\n &.external::after\n content: url('data:image/svg+xml,')\n margin: 0 0.25rem\n vertical-align: middle\n color: var(--color-sidebar-link-text)\n\n // Make the current page reference bold.\n .current-page > .reference\n font-weight: bold\n\n label\n position: absolute\n top: 0\n right: 0\n height: var(--sidebar-item-height)\n width: var(--sidebar-expander-width)\n\n cursor: pointer\n user-select: none\n\n display: flex\n justify-content: center\n align-items: center\n\n .caption, :not(.caption) > .caption-text\n font-size: var(--sidebar-caption-font-size)\n color: var(--color-sidebar-caption-text)\n\n font-weight: bold\n text-transform: uppercase\n\n margin: var(--sidebar-caption-space-above) 0 0 0\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n\n // If it has children, add a bit more padding to wrap the content to avoid\n // overlapping with the