From 6b828b366f39c8f92f20e5b066764412432a9616 Mon Sep 17 00:00:00 2001 From: yurakuratov <9271630+yurakuratov@users.noreply.github.com> Date: Thu, 30 Dec 2021 09:32:59 +0300 Subject: [PATCH 01/11] fix: upload DeepPavlov BERT models with MLM & NSP heads parameters (#1502) * fix: update urls to fixed BERT models * fix: table in pretrained_vectors docpage * fix: change bert_config_path to .../config.json * fix: add tokenizer_config.json, config.json to BERT-sentence models --- .../sentiment_twitter_bert_emb.json | 6 +-- .../embedder/bert_sentence_embedder.json | 6 +-- docs/features/models/bert.rst | 12 +++--- docs/features/pretrained_vectors.rst | 42 +++++++++---------- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json b/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json index 6a4fb9756a..e7b835e9a1 100644 --- a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json +++ b/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json @@ -39,7 +39,7 @@ { "class_name": "transformers_bert_embedder", "id": "my_embedder", - "bert_config_path": "{BERT_PATH}/bert_config.json", + "bert_config_path": "{BERT_PATH}/config.json", "truncate": false, "load_path": "{BERT_PATH}", "in": ["subword_tok_ids", "startofword_markers", "attention_mask"], @@ -128,7 +128,7 @@ "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_bert_emb", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_pt" + "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_pt_v1" }, "download": [ { @@ -136,7 +136,7 @@ "subdir": "{DOWNLOADS_PATH}" }, { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_pt.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_pt_v1.tar.gz", "subdir": "{DOWNLOADS_PATH}/bert_models" } ] diff --git a/deeppavlov/configs/embedder/bert_sentence_embedder.json b/deeppavlov/configs/embedder/bert_sentence_embedder.json index 348616ae27..b8c29a67f2 100644 --- a/deeppavlov/configs/embedder/bert_sentence_embedder.json +++ b/deeppavlov/configs/embedder/bert_sentence_embedder.json @@ -12,7 +12,7 @@ }, { "class_name": "transformers_bert_embedder", - "bert_config_path": "{BERT_PATH}/bert_config.json", + "bert_config_path": "{BERT_PATH}/config.json", "load_path": "{BERT_PATH}", "truncate": false, "in": ["subword_tok_ids", "startofword_markers", "attention_mask"], @@ -26,12 +26,12 @@ "variables": { "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt" + "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt_v1" }, "labels": {}, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/sentence_multi_cased_L-12_H-768_A-12_pt.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/sentence_multi_cased_L-12_H-768_A-12_pt_v1.tar.gz", "subdir": "{DOWNLOADS_PATH}/bert_models" } ] diff --git a/docs/features/models/bert.rst b/docs/features/models/bert.rst index 9e68437742..dbb86ed5e7 100644 --- a/docs/features/models/bert.rst +++ b/docs/features/models/bert.rst @@ -22,19 +22,19 @@ There are several pre-trained BERT models released by Google Research, more deta We have trained BERT-base model for other languages and domains: - RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ - SlavicBERT, Slavic (bg, cs, pl, ru), cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ - Conversational BERT, English, cased, 12-layer, 768-hidden, 12-heads, 110M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ - Conversational RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ - Conversational DistilRuBERT, Russian, cased, 6-layer, 768-hidden, 12-heads, 135.4M parameters: `[deeppavlov_pytorch] `__ - Conversational DistilRuBERT-tiny, Russian, cased, 2-layer, 768-hidden, 12-heads, 107M parameters: `[deeppavlov_pytorch] `__ - Sentence Multilingual BERT, 101 languages, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ - Sentence RuBERT, Russian, cased, 12-layer, 768-hidden, 12-heads, 180M parameters: `[deeppavlov] `__, - `[deeppavlov_pytorch] `__ + `[deeppavlov_pytorch] `__ The ``deeppavlov_pytorch`` models are designed to be run with the `HuggingFace's Transformers `__ library. diff --git a/docs/features/pretrained_vectors.rst b/docs/features/pretrained_vectors.rst index ee8d6d01e0..30d06211d7 100644 --- a/docs/features/pretrained_vectors.rst +++ b/docs/features/pretrained_vectors.rst @@ -28,27 +28,27 @@ The ``TensorFlow`` models can be run with the original `BERT repo `__ library. The download links are: -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Description | Model parameters | Download links | -+============================+=======================================+====================================================================================================================+ -| RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | -| | size = 632MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Slavic BERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | -| | size = 632MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Conversational BERT | vocab size = 30K, parameters = 110M, | `[tensorflow] `__, | -| | size = 385MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Conversational RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | -| | size = 630MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Sentence Multilingual BERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | -| | size = 630MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Sentence RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | -| | size = 630MB | `[pytorch] `__ | -+----------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------+ ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Description | Model parameters | Download links | ++============================+=======================================+======================================================================================================================+ +| RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | +| | size = 632MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Slavic BERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | +| | size = 632MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Conversational BERT | vocab size = 30K, parameters = 110M, | `[tensorflow] `__, | +| | size = 385MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Conversational RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | +| | size = 630MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Sentence Multilingual BERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | +| | size = 630MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ +| Sentence RuBERT | vocab size = 120K, parameters = 180M, | `[tensorflow] `__, | +| | size = 630MB | `[pytorch] `__ | ++----------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------+ ELMo From 046377f7eaa21e9fe7b163c8c80a4aec9ab90667 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Wed, 2 Feb 2022 09:26:42 +0300 Subject: [PATCH 02/11] fix: en_core_web_sm load error during tests (#1524) * fix: en_core_web_sm loading error * docs: fix comments grammar --- deeppavlov/models/kbqa/sentence_answer.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/deeppavlov/models/kbqa/sentence_answer.py b/deeppavlov/models/kbqa/sentence_answer.py index ea7042c819..d3dabf1614 100644 --- a/deeppavlov/models/kbqa/sentence_answer.py +++ b/deeppavlov/models/kbqa/sentence_answer.py @@ -12,13 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import importlib import re from logging import getLogger +import pkg_resources import spacy log = getLogger(__name__) +# en_core_web_sm is installed and used by test_inferring_pretrained_model in the same interpreter session during tests. +# Spacy checks en_core_web_sm package presence with pkg_resources, but pkg_resources is initialized with interpreter, +# sot it doesn't see en_core_web_sm installed after interpreter initialization, so we use importlib.reload below. + +if 'en-core-web-sm' not in pkg_resources.working_set.by_key.keys(): + importlib.reload(pkg_resources) + +# TODO: move nlp to sentence_answer, sentence_answer to rel_ranking_bert_infer and revise en_core_web_sm requirement nlp = spacy.load('en_core_web_sm') pronouns = ["who", "what", "when", "where", "how"] From ee7bfd01390a4d49e5053ee140f15fa1fbd95f98 Mon Sep 17 00:00:00 2001 From: mak Date: Fri, 4 Feb 2022 11:05:48 +0100 Subject: [PATCH 03/11] Minor change: fix typo (#1517) --- docs/intro/quick_start.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/intro/quick_start.rst b/docs/intro/quick_start.rst index 9f31ee475f..45c3859ab4 100644 --- a/docs/intro/quick_start.rst +++ b/docs/intro/quick_start.rst @@ -111,7 +111,7 @@ You can train it in the same simple way: model = train_model(, download=True) * ``download=True`` downloads pretrained model, therefore the pretrained - model will be, first, loaded and then train (optional). + model will be, first, loaded and then trained (optional). Dataset will be downloaded regardless of whether there was ``-d`` flag or not. From 1c88f03d34592f840828de113012908ff742eaab Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Tue, 15 Feb 2022 16:44:20 +0300 Subject: [PATCH 04/11] remove: Unnecessary models and components (#1523) * remove: unpopular configs and components * refactor: squad_ru_torch_bert -> squad_ru_bert + squad_ru_bert_infer moved to torch * refactor: squad_torch_bert[_infer] -> squad_bert[_infer] * refactor: ner_rus_bert_torch -> ner_rus_bert * refactor: ner_ontonotes_bert[_mult]_torch -> ner_ontonotes_bert[_mult] * refactor: ner_conll2003_torch_bert -> ner_conll2003_bert * insults_kaggle_bert_torch -> insults_kaggle_bert * refactor: ranking_ubuntu_v2_torch_bert_uncased -> ranking_ubuntu_v2_bert_uncased * Revert "refactor: squad_torch_bert[_infer] -> squad_bert[_infer]" This reverts commit fb5bfed8a12519c83e718015ea564ad7176956d0. * Revert "refactor: ner_conll2003_torch_bert -> ner_conll2003_bert" This reverts commit 7fee5e57feb5970f4019fec73d98f026a05d3f6a. * Revert "refactor: ranking_ubuntu_v2_torch_bert_uncased -> ranking_ubuntu_v2_bert_uncased" This reverts commit e656079b70ae6260af065ea47ce37d9ff46c73dc. * Revert "refactor: ner_ontonotes_bert[_mult]_torch -> ner_ontonotes_bert[_mult]" This reverts commit bb41a588170ac79700779442b793b4a46de4318d. * remove: jupyter example notebooks --- README.md | 6 +- .../configs/classifiers/insults_kaggle.json | 155 - .../classifiers/insults_kaggle_bert.json | 64 +- .../insults_kaggle_bert_torch.json | 149 - .../classifiers/insults_kaggle_conv_bert.json | 153 - .../configs/classifiers/intents_dstc2.json | 156 - .../classifiers/intents_dstc2_bert.json | 121 - .../classifiers/intents_dstc2_big.json | 155 - .../classifiers/intents_sample_csv.json | 160 - .../classifiers/intents_sample_json.json | 155 - .../configs/classifiers/intents_snips.json | 141 - .../classifiers/intents_snips_big.json | 141 - .../classifiers/intents_snips_sklearn.json | 164 - .../intents_snips_tfidf_weighted.json | 182 - .../classifiers/relation_prediction_rus.json | 132 - .../classifiers/ru_obscenity_classifier.json | 30 - .../rusentiment_bigru_superconv.json | 165 - .../configs/classifiers/rusentiment_cnn.json | 167 - .../classifiers/sentiment_imdb_bert.json | 142 - .../classifiers/sentiment_imdb_conv_bert.json | 142 - .../classifiers/sentiment_sst_multi_bert.json | 135 - .../sentiment_twitter_bert_emb.json | 144 - .../sentiment_twitter_preproc.json | 159 - .../classifiers/sentiment_yelp_conv_bert.json | 149 - .../sentiment_yelp_multi_bert.json | 149 - .../configs/classifiers/sst_torch_swcnn.json | 148 - .../configs/classifiers/topic_ag_news.json | 154 - .../classifiers/yahoo_convers_vs_info.json | 167 - .../yahoo_convers_vs_info_bert.json | 160 - .../en_ranker_tfidf_enwiki20161221.json | 80 - deeppavlov/configs/go_bot/database_dstc2.json | 44 - deeppavlov/configs/go_bot/gobot_dstc2.json | 125 - .../configs/go_bot/gobot_dstc2_best.json | 133 - .../go_bot/gobot_dstc2_best_json_nlg.json | 133 - .../configs/go_bot/gobot_simple_dstc2.json | 125 - .../morpho_tagger/UD2.0/morpho_ar.json | 173 - .../morpho_tagger/UD2.0/morpho_cs.json | 173 - .../morpho_tagger/UD2.0/morpho_de.json | 173 - .../morpho_tagger/UD2.0/morpho_en.json | 173 - .../morpho_tagger/UD2.0/morpho_es_ancora.json | 173 - .../morpho_tagger/UD2.0/morpho_fr.json | 173 - .../morpho_tagger/UD2.0/morpho_hi.json | 173 - .../morpho_tagger/UD2.0/morpho_hu.json | 173 - .../morpho_tagger/UD2.0/morpho_it.json | 173 - .../UD2.0/morpho_ru_syntagrus.json | 173 - .../UD2.0/morpho_ru_syntagrus_pymorphy.json | 193 - ...orpho_ru_syntagrus_pymorphy_lemmatize.json | 201 - .../morpho_tagger/UD2.0/morpho_tr.json | 174 - deeppavlov/configs/ner/ner_conll2003_pos.json | 189 - deeppavlov/configs/ner/ner_dstc2.json | 126 - deeppavlov/configs/ner/ner_few_shot_ru.json | 104 - .../configs/ner/ner_few_shot_ru_simulate.json | 140 - deeppavlov/configs/ner/ner_kb_rus.json | 164 - .../configs/ner/ner_lcquad_bert_probas.json | 119 - deeppavlov/configs/ner/ner_ontonotes_m1.json | 131 - deeppavlov/configs/ner/ner_rus_bert.json | 135 +- .../configs/ner/ner_rus_bert_torch.json | 155 - deeppavlov/configs/ner/slotfill_dstc2.json | 64 - .../configs/ner/slotfill_dstc2_raw.json | 54 - .../ner/slotfill_simple_dstc2_raw.json | 54 - .../configs/ner/slotfill_simple_rasa_raw.json | 43 - deeppavlov/configs/ner/vlsp2016_full.json | 170 - .../odqa/en_odqa_infer_enwiki20161221.json | 69 - .../ranking/paraphrase_ident_paraphraser.json | 108 - ...paraphrase_ident_paraphraser_interact.json | 121 - .../configs/ranking/ranking_ubuntu_v2_mt.json | 107 - .../ranking_ubuntu_v2_mt_interact.json | 121 - .../brillmoore_kartaslov_ru.json | 82 - .../brillmoore_kartaslov_ru_custom_vocab.json | 84 - .../brillmoore_kartaslov_ru_nolm.json | 77 - .../configs/squad/squad_bert_uncased.json | 103 - deeppavlov/configs/squad/squad_ru_bert.json | 163 +- .../configs/squad/squad_ru_bert_infer.json | 65 +- .../configs/squad/squad_ru_torch_bert.json | 175 - deeppavlov/core/common/registry.json | 9 - .../core/common/requirements_registry.json | 29 +- .../ner_few_shot_iterator.py | 144 - .../snips_intents_iterator.py | 30 - .../dataset_iterators/snips_ner_iterator.py | 42 - deeppavlov/dataset_readers/snips_reader.py | 93 - .../classifiers/ru_obscenity_classifier.py | 144 - .../models/morpho_tagger/common_tagger.py | 6 - .../models/morpho_tagger/morpho_tagger.py | 352 -- .../ranking/bilstm_gru_siamese_network.py | 110 - .../models/ranking/mpm_siamese_network.py | 180 - .../models/ranking/siamese_predictor.py | 146 - docs/apiref/models/morpho_tagger.rst | 5 - docs/apiref/models/ranking.rst | 8 - docs/features/models/bert.rst | 4 +- docs/features/models/classifiers.rst | 150 +- docs/features/models/morphotagger.rst | 583 +-- docs/features/models/multitask_bert.rst | 1 - docs/features/models/ner.rst | 83 +- docs/features/models/neural_ranking.rst | 19 +- docs/features/models/slot_filling.rst | 264 -- docs/features/models/spelling_correction.rst | 7 +- docs/features/models/squad.rst | 2 +- docs/features/models/tfidf_ranking.rst | 4 +- docs/features/overview.rst | 169 +- docs/features/skills/go_bot.rst | 381 +- docs/features/skills/odqa.rst | 40 +- docs/index.rst | 1 - docs/integrations/rest_api.rst | 2 +- docs/integrations/socket_api.rst | 2 +- docs/intro/choose_framework.rst | 4 +- docs/intro/configuration.rst | 7 +- docs/intro/quick_start.rst | 6 +- .../Pseudo-labeling for classification.ipynb | 210 - examples/README.md | 19 - examples/classification_tutorial.ipynb | 2961 -------------- examples/gobot_extended_tutorial.ipynb | 1387 ------- examples/gobot_formfilling_tutorial.ipynb | 1412 ------- examples/gobot_md_yaml_configs_tutorial.ipynb | 3490 ----------------- examples/gobot_tutorial.ipynb | 799 ---- examples/img/gobot_database.png | Bin 8149 -> 0 bytes examples/img/gobot_example.png | Bin 586959 -> 0 bytes examples/img/gobot_pipeline.png | Bin 84393 -> 0 bytes examples/img/gobot_policy.png | Bin 23464 -> 0 bytes examples/img/gobot_simple_example.png | Bin 112550 -> 0 bytes examples/img/gobot_simple_pipeline.png | Bin 63816 -> 0 bytes examples/img/gobot_simple_policy.png | Bin 22437 -> 0 bytes examples/img/gobot_simple_templates.png | Bin 5270 -> 0 bytes examples/img/gobot_slotfiller.png | Bin 4756 -> 0 bytes examples/img/gobot_templates.png | Bin 6118 -> 0 bytes examples/img/sc_loss_comparison.png | Bin 88085 -> 0 bytes examples/img/sc_ner_lr_cosine.png | Bin 16075 -> 0 bytes examples/img/sc_ner_lr_exponential.png | Bin 15694 -> 0 bytes examples/img/sc_ner_lr_linear.png | Bin 16102 -> 0 bytes examples/img/sc_ner_lr_linear2.png | Bin 16220 -> 0 bytes examples/img/sc_ner_lr_no.png | Bin 12288 -> 0 bytes examples/img/sc_ner_lr_onecycle.png | Bin 23861 -> 0 bytes examples/img/sc_ner_lr_polynomial.png | Bin 16309 -> 0 bytes examples/img/sc_ner_lr_polynomial1.png | Bin 15769 -> 0 bytes examples/img/sc_ner_lr_polynomial2.png | Bin 15410 -> 0 bytes examples/img/sc_ner_lr_sc.png | Bin 36522 -> 0 bytes examples/img/sc_ner_lr_sc1.png | Bin 33542 -> 0 bytes examples/img/sc_ner_lr_trapezoid.png | Bin 22307 -> 0 bytes examples/morphotagger_example.ipynb | 315 -- examples/super_convergence_tutorial.ipynb | 629 --- .../classifiers/intents_snips_bigru.json | 138 - .../classifiers/intents_snips_bilstm.json | 138 - .../intents_snips_bilstm_bilstm.json | 139 - .../classifiers/intents_snips_bilstm_cnn.json | 145 - .../intents_snips_bilstm_proj_layer.json | 140 - ...tents_snips_bilstm_self_add_attention.json | 141 - ...ents_snips_bilstm_self_mult_attention.json | 141 - .../classifiers/intents_snips_cnn_bilstm.json | 145 - tests/test_quick_start.py | 75 +- 148 files changed, 417 insertions(+), 24940 deletions(-) delete mode 100644 deeppavlov/configs/classifiers/insults_kaggle.json delete mode 100644 deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json delete mode 100644 deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json delete mode 100644 deeppavlov/configs/classifiers/intents_dstc2.json delete mode 100644 deeppavlov/configs/classifiers/intents_dstc2_bert.json delete mode 100644 deeppavlov/configs/classifiers/intents_dstc2_big.json delete mode 100644 deeppavlov/configs/classifiers/intents_sample_csv.json delete mode 100644 deeppavlov/configs/classifiers/intents_sample_json.json delete mode 100644 deeppavlov/configs/classifiers/intents_snips.json delete mode 100644 deeppavlov/configs/classifiers/intents_snips_big.json delete mode 100644 deeppavlov/configs/classifiers/intents_snips_sklearn.json delete mode 100644 deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json delete mode 100644 deeppavlov/configs/classifiers/relation_prediction_rus.json delete mode 100644 deeppavlov/configs/classifiers/ru_obscenity_classifier.json delete mode 100644 deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json delete mode 100644 deeppavlov/configs/classifiers/rusentiment_cnn.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_imdb_bert.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_twitter_preproc.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json delete mode 100644 deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json delete mode 100644 deeppavlov/configs/classifiers/sst_torch_swcnn.json delete mode 100644 deeppavlov/configs/classifiers/topic_ag_news.json delete mode 100644 deeppavlov/configs/classifiers/yahoo_convers_vs_info.json delete mode 100644 deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json delete mode 100644 deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json delete mode 100644 deeppavlov/configs/go_bot/database_dstc2.json delete mode 100644 deeppavlov/configs/go_bot/gobot_dstc2.json delete mode 100644 deeppavlov/configs/go_bot/gobot_dstc2_best.json delete mode 100644 deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json delete mode 100644 deeppavlov/configs/go_bot/gobot_simple_dstc2.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json delete mode 100644 deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json delete mode 100644 deeppavlov/configs/ner/ner_conll2003_pos.json delete mode 100644 deeppavlov/configs/ner/ner_dstc2.json delete mode 100644 deeppavlov/configs/ner/ner_few_shot_ru.json delete mode 100644 deeppavlov/configs/ner/ner_few_shot_ru_simulate.json delete mode 100644 deeppavlov/configs/ner/ner_kb_rus.json delete mode 100644 deeppavlov/configs/ner/ner_lcquad_bert_probas.json delete mode 100644 deeppavlov/configs/ner/ner_ontonotes_m1.json delete mode 100644 deeppavlov/configs/ner/ner_rus_bert_torch.json delete mode 100644 deeppavlov/configs/ner/slotfill_dstc2.json delete mode 100644 deeppavlov/configs/ner/slotfill_dstc2_raw.json delete mode 100644 deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json delete mode 100644 deeppavlov/configs/ner/slotfill_simple_rasa_raw.json delete mode 100644 deeppavlov/configs/ner/vlsp2016_full.json delete mode 100644 deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json delete mode 100644 deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json delete mode 100644 deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json delete mode 100644 deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json delete mode 100644 deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json delete mode 100644 deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json delete mode 100644 deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json delete mode 100644 deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json delete mode 100644 deeppavlov/configs/squad/squad_bert_uncased.json delete mode 100644 deeppavlov/configs/squad/squad_ru_torch_bert.json delete mode 100644 deeppavlov/dataset_iterators/ner_few_shot_iterator.py delete mode 100644 deeppavlov/dataset_iterators/snips_intents_iterator.py delete mode 100644 deeppavlov/dataset_iterators/snips_ner_iterator.py delete mode 100644 deeppavlov/dataset_readers/snips_reader.py delete mode 100644 deeppavlov/models/classifiers/ru_obscenity_classifier.py delete mode 100644 deeppavlov/models/morpho_tagger/morpho_tagger.py delete mode 100644 deeppavlov/models/ranking/bilstm_gru_siamese_network.py delete mode 100644 deeppavlov/models/ranking/mpm_siamese_network.py delete mode 100644 deeppavlov/models/ranking/siamese_predictor.py delete mode 100644 docs/features/models/slot_filling.rst delete mode 100644 examples/Pseudo-labeling for classification.ipynb delete mode 100644 examples/README.md delete mode 100644 examples/classification_tutorial.ipynb delete mode 100644 examples/gobot_extended_tutorial.ipynb delete mode 100644 examples/gobot_formfilling_tutorial.ipynb delete mode 100644 examples/gobot_md_yaml_configs_tutorial.ipynb delete mode 100644 examples/gobot_tutorial.ipynb delete mode 100644 examples/img/gobot_database.png delete mode 100644 examples/img/gobot_example.png delete mode 100644 examples/img/gobot_pipeline.png delete mode 100644 examples/img/gobot_policy.png delete mode 100644 examples/img/gobot_simple_example.png delete mode 100644 examples/img/gobot_simple_pipeline.png delete mode 100644 examples/img/gobot_simple_policy.png delete mode 100644 examples/img/gobot_simple_templates.png delete mode 100644 examples/img/gobot_slotfiller.png delete mode 100644 examples/img/gobot_templates.png delete mode 100644 examples/img/sc_loss_comparison.png delete mode 100644 examples/img/sc_ner_lr_cosine.png delete mode 100644 examples/img/sc_ner_lr_exponential.png delete mode 100644 examples/img/sc_ner_lr_linear.png delete mode 100644 examples/img/sc_ner_lr_linear2.png delete mode 100644 examples/img/sc_ner_lr_no.png delete mode 100644 examples/img/sc_ner_lr_onecycle.png delete mode 100644 examples/img/sc_ner_lr_polynomial.png delete mode 100644 examples/img/sc_ner_lr_polynomial1.png delete mode 100644 examples/img/sc_ner_lr_polynomial2.png delete mode 100644 examples/img/sc_ner_lr_sc.png delete mode 100644 examples/img/sc_ner_lr_sc1.png delete mode 100644 examples/img/sc_ner_lr_trapezoid.png delete mode 100644 examples/morphotagger_example.ipynb delete mode 100644 examples/super_convergence_tutorial.ipynb delete mode 100644 tests/test_configs/classifiers/intents_snips_bigru.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm_bilstm.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm_cnn.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm_proj_layer.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm_self_add_attention.json delete mode 100644 tests/test_configs/classifiers/intents_snips_bilstm_self_mult_attention.json delete mode 100644 tests/test_configs/classifiers/intents_snips_cnn_bilstm.json diff --git a/README.md b/README.md index bdb0a4022d..5b967b6a94 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c **Models** -[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html) | [Slot filling](http://docs.deeppavlov.ai/en/master/features/models/slot_filling.html) +[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html) [Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) | [Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/squad.html) @@ -132,8 +132,8 @@ python -m deeppavlov install ``` * where `` is path to the chosen model's config file (e.g. - `deeppavlov/configs/ner/slotfill_dstc2.json`) or just name without - *.json* extension (e.g. `slotfill_dstc2`) + `deeppavlov/configs/classifiers/insults_kaggle_bert.json`) or just name without + *.json* extension (e.g. `insults_kaggle_bert`) ### Command line interface (CLI) diff --git a/deeppavlov/configs/classifiers/insults_kaggle.json b/deeppavlov/configs/classifiers/insults_kaggle.json deleted file mode 100644 index 8627589eca..0000000000 --- a/deeppavlov/configs/classifiers/insults_kaggle.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Comment", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/insults_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor" - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wordpunct_tok_reddit_comments_2017_11_300.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_cnn": 1e-3, - "coef_reg_den": 1e-2, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - "log_every_n_epochs": 5, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/reddit_fastText/wordpunct_tok_reddit_comments_2017_11_300.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_v2.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/insults_kaggle_bert.json b/deeppavlov/configs/classifiers/insults_kaggle_bert.json index d64f2363b4..a9ff62015e 100644 --- a/deeppavlov/configs/classifiers/insults_kaggle_bert.json +++ b/deeppavlov/configs/classifiers/insults_kaggle_bert.json @@ -18,9 +18,9 @@ ], "pipe": [ { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": true, "max_seq_length": 64, "in": [ "x" @@ -37,48 +37,64 @@ ], "save_path": "{MODEL_PATH}/classes.dict", "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" + "in": [ + "y" + ], + "out": [ + "y_ids" + ] }, { - "in": "y_ids", - "out": "y_onehot", + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], "class_name": "one_hotter", "depth": "#classes_vocab.len", "single_vector": true }, { - "class_name": "bert_classifier", + "class_name": "torch_transformers_classifier", "n_classes": "#classes_vocab.len", "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_model.ckpt", + "pretrained_bert": "{TRANSFORMER}", "save_path": "{MODEL_PATH}/model", "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 1e-05 + }, "learning_rate_drop_patience": 5, "learning_rate_drop_div": 2.0, "in": [ "bert_features" ], "in_y": [ - "y_onehot" + "y_ids" ], "out": [ "y_pred_probas" ] }, { - "in": "y_pred_probas", - "out": "y_pred_ids", + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], "class_name": "proba2labels", "max_proba": true }, { - "in": "y_pred_ids", - "out": "y_pred_labels", + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], "ref": "classes_vocab" } ], @@ -109,15 +125,15 @@ "valid", "test" ], - "class_name": "nn_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/" + "class_name": "torch_trainer" }, "metadata": { "variables": { + "TRANSFORMER": "bert-base-uncased", "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v3" + "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_torch_bert" }, "download": [ { @@ -125,11 +141,7 @@ "subdir": "{DOWNLOADS_PATH}" }, { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_v3.tar.gz", + "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz", "subdir": "{MODELS_PATH}/classifiers" } ] diff --git a/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json b/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json deleted file mode 100644 index a9ff62015e..0000000000 --- a/deeppavlov/configs/classifiers/insults_kaggle_bert_torch.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Comment", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/insults_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "torch_transformers_preprocessor", - "vocab_file": "{TRANSFORMER}", - "do_lower_case": true, - "max_seq_length": 64, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": [ - "y" - ], - "out": [ - "y_ids" - ] - }, - { - "in": [ - "y_ids" - ], - "out": [ - "y_onehot" - ], - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "torch_transformers_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "pretrained_bert": "{TRANSFORMER}", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "optimizer": "AdamW", - "optimizer_parameters": { - "lr": 1e-05 - }, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_ids" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": [ - "y_pred_probas" - ], - "out": [ - "y_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_pred_ids" - ], - "out": [ - "y_pred_labels" - ], - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - "accuracy", - "f1_macro" - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "TRANSFORMER": "bert-base-uncased", - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_torch_bert" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json b/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json deleted file mode 100644 index 01f13affca..0000000000 --- a/deeppavlov/configs/classifiers/insults_kaggle_conv_bert.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Comment", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/insults_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 64, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": [ - "y" - ], - "out": [ - "y_ids" - ] - }, - { - "in": [ - "y_ids" - ], - "out": [ - "y_onehot" - ], - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": [ - "y_pred_probas" - ], - "out": [ - "y_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_pred_ids" - ], - "out": [ - "y_pred_labels" - ], - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - "accuracy", - "f1_macro" - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_v4" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_v4.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_dstc2.json b/deeppavlov/configs/classifiers/intents_dstc2.json deleted file mode 100644 index 828c01d634..0000000000 --- a/deeppavlov/configs/classifiers/intents_dstc2.json +++ /dev/null @@ -1,156 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DOWNLOADS_PATH}/dstc2" - }, - "dataset_iterator": { - "class_name": "dstc2_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids", - "special_tokens": [""] - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/dstc2_fastText_model.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "classes": "#classes_vocab.keys()", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 512, - "optimizer": "Adam", - "learning_rate": 0.1, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "confidence_threshold": 0.5 - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - }, - { - "ref": "my_one_hotter", - "in": "y_pred_ids", - "out": "y_pred_onehot" - } - ], - "out": [ - "y_pred_labels", - "y_pred_probas" - ] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - { - "name": "sets_accuracy", - "inputs": [ - "y", - "y_pred_labels" - ] - }, - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - "log_every_n_batches": 100, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_v10" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_dstc2_v10.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_dstc2_bert.json b/deeppavlov/configs/classifiers/intents_dstc2_bert.json deleted file mode 100644 index 0143446b53..0000000000 --- a/deeppavlov/configs/classifiers/intents_dstc2_bert.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DOWNLOADS_PATH}/dstc2" - }, - "dataset_iterator": { - "class_name": "dstc2_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids", - "special_tokens": [""] - }, - { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 64, - "in": ["x"], - "out": ["bert_features"] - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "multilabel": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 2e-05, - "learning_rate_drop_patience": 3, - "learning_rate_drop_div": 2.0, - "in": ["bert_features"], - "in_y": ["y_onehot"], - "out": ["y_pred_probas"] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "confidence_threshold": 0.5 - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - }, - { - "ref": "my_one_hotter", - "in": "y_pred_ids", - "out": "y_pred_onehot" - } - ], - "out": ["y_pred_probas", "y_pred_labels"] - }, - "train": { - "metrics": [ - { - "name": "sets_accuracy", - "inputs": ["y", "y_pred_labels"] - }, - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "show_examples": false, - "batch_size": 32, - "pytest_max_batches": 2, - "validation_patience": 10, - "val_every_n_batches": 100, - "log_every_n_batches": 100, - "validate_best": true, - "test_best": true, - "tensorboard_log_dir": "{MODEL_PATH}/logs" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_bert_v0" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz", - "subdir": "{DOWNLOADS_PATH}/dstc2" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_dstc2_bert_v0.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_dstc2_big.json b/deeppavlov/configs/classifiers/intents_dstc2_big.json deleted file mode 100644 index d6a458dcab..0000000000 --- a/deeppavlov/configs/classifiers/intents_dstc2_big.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DOWNLOADS_PATH}/dstc2" - }, - "dataset_iterator": { - "class_name": "dstc2_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids", - "special_tokens": [""] - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "classes": "#classes_vocab.keys()", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 512, - "optimizer": "Adam", - "learning_rate": 0.1, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "confidence_threshold": 0.5 - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - }, - { - "ref": "my_one_hotter", - "in": "y_pred_ids", - "out": "y_pred_onehot" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - { - "name": "sets_accuracy", - "inputs": [ - "y", - "y_pred_labels" - ] - }, - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - "log_every_n_batches": 100, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_dstc2_v11" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_dstc2_v11.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_sample_csv.json b/deeppavlov/configs/classifiers/intents_sample_csv.json deleted file mode 100644 index 4b01a2d301..0000000000 --- a/deeppavlov/configs/classifiers/intents_sample_csv.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "dataset": { - "type": "classification", - "format": "csv", - "sep": ",", - "header": 0, - "names": [ - "text", - "classes" - ], - "class_sep": ",", - "train": "sample.csv", - "data_path": "{DOWNLOADS_PATH}/sample", - "x": "text", - "y": "classes", - "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.csv", - "seed": 42, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/dstc2_fastText_model.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 1, - 2, - 3 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.csv", - "subdir": "{DOWNLOADS_PATH}/sample" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_v9.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_sample_json.json b/deeppavlov/configs/classifiers/intents_sample_json.json deleted file mode 100644 index b87d3274be..0000000000 --- a/deeppavlov/configs/classifiers/intents_sample_json.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "dataset": { - "type": "classification", - "format": "json", - "orient": "records", - "lines": true, - "data_path": "{DOWNLOADS_PATH}/sample", - "train": "sample.json", - "x": "text", - "y": "intents", - "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.json", - "seed": 42, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/dstc2_fastText_model.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 1, - 2, - 3 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/snips_intents/train.json", - "subdir": "{DOWNLOADS_PATH}/sample" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_v9.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_snips.json b/deeppavlov/configs/classifiers/intents_snips.json deleted file mode 100644 index 5f0aa89cd0..0000000000 --- a/deeppavlov/configs/classifiers/intents_snips.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "dataset_reader": { - "class_name": "snips_reader", - "x": "text", - "y": "intents", - "data_path": "{DOWNLOADS_PATH}/snips" - }, - "dataset_iterator": { - "class_name": "snips_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "level": "token", - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/dstc2_fastText_model.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 1, - 2, - 3 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - "log_every_n_epochs": 5, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v9" - }, - "download": [ -{ - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/dstc2_fastText_model.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_v9.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_snips_big.json b/deeppavlov/configs/classifiers/intents_snips_big.json deleted file mode 100644 index 15b5adc648..0000000000 --- a/deeppavlov/configs/classifiers/intents_snips_big.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "dataset_reader": { - "class_name": "snips_reader", - "x": "text", - "y": "intents", - "data_path": "{DOWNLOADS_PATH}/snips" - }, - "dataset_iterator": { - "class_name": "snips_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "level": "token", - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "categorical_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_v10" - }, - "download": [ -{ - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_v10.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_snips_sklearn.json b/deeppavlov/configs/classifiers/intents_snips_sklearn.json deleted file mode 100644 index 7847aa3e15..0000000000 --- a/deeppavlov/configs/classifiers/intents_snips_sklearn.json +++ /dev/null @@ -1,164 +0,0 @@ -{ - "dataset_reader": { - "class_name": "snips_reader", - "x": "text", - "y": "intents", - "data_path": "{DOWNLOADS_PATH}/snips" - }, - "dataset_iterator": { - "class_name": "snips_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_vec" - ], - "fit_on": [ - "x", - "y_ids" - ], - "id": "tfidf_vec", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/tfidf.pkl", - "load_path": "{MODEL_PATH}/tfidf.pkl", - "model_class": "sklearn.feature_extraction.text:TfidfVectorizer", - "infer_method": "transform", - "lowercase": true, - "analyzer": "word", - "ngram_range": [ - 1, - 5 - ], - "max_features": 10000, - "norm": null - }, - { - "in": [ - "x_vec" - ], - "out": [ - "x_sel" - ], - "fit_on": [ - "x_vec", - "y_ids" - ], - "id": "selector", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/selectkbest.pkl", - "load_path": "{MODEL_PATH}/selectkbest.pkl", - "model_class": "sklearn.feature_selection:SelectKBest", - "infer_method": "transform", - "score_func": "sklearn.feature_selection:chi2", - "k": 1000 - }, - { - "in": [ - "x_sel" - ], - "out": [ - "x_pca" - ], - "fit_on": [ - "x_sel" - ], - "id": "pca", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/pca.pkl", - "load_path": "{MODEL_PATH}/pca.pkl", - "model_class": "sklearn.decomposition:PCA", - "infer_method": "transform", - "n_components": 300 - }, - { - "class_name": "one_hotter", - "id": "onehotter", - "depth": "#classes_vocab.len", - "in": "y_ids", - "out": "y_onehot", - "single_vector": true - }, - { - "in": [ - "x_pca" - ], - "out": [ - "y_pred_onehot" - ], - "fit_on": [ - "x_pca", - "y_onehot" - ], - "class_name": "sklearn_component", - "main": true, - "save_path": "{MODEL_PATH}/model.pkl", - "load_path": "{MODEL_PATH}/model.pkl", - "model_class": "sklearn.neighbors:KNeighborsClassifier", - "infer_method": "predict", - "ensure_list_output": true - }, - { - "class_name": "proba2labels", - "in": "y_pred_onehot", - "out": "y_pred_ids", - "max_proba": true - }, - { - "ref": "classes_vocab", - "in": "y_pred_ids", - "out": "y_pred_labels" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "batch_size": 64, - "metrics": [ - "accuracy" - ], - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_sklearn_v11" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_sklearn_v11.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json b/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json deleted file mode 100644 index b7f4e70712..0000000000 --- a/deeppavlov/configs/classifiers/intents_snips_tfidf_weighted.json +++ /dev/null @@ -1,182 +0,0 @@ -{ - "dataset_reader": { - "class_name": "snips_reader", - "x": "text", - "y": "intents", - "data_path": "{DOWNLOADS_PATH}/snips" - }, - "dataset_iterator": { - "class_name": "snips_intents_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_vec" - ], - "fit_on": [ - "x", - "y_ids" - ], - "id": "my_tfidf_vectorizer", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/tfidf.pkl", - "load_path": "{MODEL_PATH}/tfidf.pkl", - "model_class": "sklearn.feature_extraction.text:TfidfVectorizer", - "infer_method": "transform", - "lowercase": true, - "analyzer": "word" - }, - { - "in": [ - "x_vec" - ], - "out": [ - "x_sel" - ], - "fit_on": [ - "x_vec", - "y_ids" - ], - "id": "my_selector", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/selectkbest.pkl", - "load_path": "{MODEL_PATH}/selectkbest.pkl", - "model_class": "sklearn.feature_selection:SelectKBest", - "infer_method": "transform", - "score_func": "sklearn.feature_selection:chi2", - "k": 1000 - }, - { - "in": [ - "x_sel" - ], - "out": [ - "x_pca" - ], - "fit_on": [ - "x_sel" - ], - "id": "my_pca", - "class_name": "sklearn_component", - "save_path": "{MODEL_PATH}/pca.pkl", - "load_path": "{MODEL_PATH}/pca.pkl", - "model_class": "sklearn.decomposition:PCA", - "infer_method": "transform", - "n_components": 300 - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_moses_tokenizer" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "dim": 300 - }, - { - "class_name": "one_hotter", - "id": "my_onehotter", - "depth": "#classes_vocab.len", - "in": "y_ids", - "out": "y_onehot", - "single_vector": true - }, - { - "in": "x_tok", - "out": "x_weighted_emb", - "class_name": "tfidf_weighted", - "id": "my_weighted_embedder", - "embedder": "#my_embedder", - "tokenizer": "#my_tokenizer", - "vectorizer": "#my_tfidf_vectorizer", - "mean": true - }, - { - "in": [ - "x_pca", - "x_weighted_emb" - ], - "out": [ - "y_pred_ids" - ], - "fit_on": [ - "x_pca", - "x_weighted_emb", - "y_ids" - ], - "class_name": "sklearn_component", - "main": true, - "save_path": "{MODEL_PATH}/model.pkl", - "load_path": "{MODEL_PATH}/model.pkl", - "model_class": "sklearn.linear_model:LogisticRegression", - "infer_method": "predict", - "ensure_list_output": true - }, - { - "ref": "classes_vocab", - "in": "y_pred_ids", - "out": "y_pred_labels" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "batch_size": 64, - "metrics": [ - "accuracy" - ], - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/intents_snips_sklearn_v12" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_sklearn_v12.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/relation_prediction_rus.json b/deeppavlov/configs/classifiers/relation_prediction_rus.json deleted file mode 100644 index 24f16cc159..0000000000 --- a/deeppavlov/configs/classifiers/relation_prediction_rus.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Question", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/relation_prediction" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": ["x_tok"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": "x_lower", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_nltk_word_tokenize.bin" - }, - { - "in": "y_ids", - "out": ["y_onehot"], - "class_name": "one_hotter", - "depth": "#classes_vocab.len" - }, - { - "in": ["x_emb"], - "in_y": ["y_onehot"], - "out": ["y_pred_probas"], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [1, 2, 3], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.05, - "learning_rate_decay": 0.1, - "loss": "categorical_crossentropy", - "last_layer_activation": "softmax", - "text_size": 36, - "coef_reg_cnn": 1e-3, - "coef_reg_den": 1e-2, - "dropout_rate": 0.5, - "dense_size": 300, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "top_n": 5 - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": ["y_pred_probas", "y_pred_labels"] - }, - "train": { - "epochs": 1000, - "batch_size": 64, - "metrics": [ - "sets_accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - "log_every_n_epochs": 5, - "show_examples": false, - "evaluation_targets": ["train", "valid", "test"], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/kbqa_mix_lowercase/relation_prediction" - }, - "labels": { - "telegram_utils": "IntentModel", - "server_utils": "KerasIntentModel" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/relation_prediction_rus.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_nltk_word_tokenize/ft_native_300_ru_wiki_lenta_nltk_word_tokenize.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/relation_prediction_rus.tar.gz", - "subdir": "{MODELS_PATH}/kbqa_mix_lowercase/relation_prediction" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/ru_obscenity_classifier.json b/deeppavlov/configs/classifiers/ru_obscenity_classifier.json deleted file mode 100644 index 2344a71b87..0000000000 --- a/deeppavlov/configs/classifiers/ru_obscenity_classifier.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "chainer": { - "in": [ - "text" - ], - "pipe": [ - { - "class_name": "ru_obscenity_classifier", - "data_path": "{DOWNLOADS_PATH}/obscenity_dataset/", - "in": "text", - "out": "flags_obscenity_or_not" - } - ], - "out": [ - "flags_obscenity_or_not" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/models/obscenity_classifier/ru_obscenity_dataset.zip", - "subdir": "{DOWNLOADS_PATH}/obscenity_dataset" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json b/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json deleted file mode 100644 index ceff4b647a..0000000000 --- a/deeppavlov/configs/classifiers/rusentiment_bigru_superconv.json +++ /dev/null @@ -1,165 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/rusentiment/", - "train": "rusentiment_random_posts.csv", - "test": "rusentiment_test.csv" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "field_to_split": "train", - "split_seed": 23, - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor", - "remove_punctuation": false - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_twitter_nltk_word_tokenize.bin", - "dim": 300, - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "units_gru": 256, - "optimizer": "Adam", - "learning_rate": 0.001, - "learning_rate_decay": "trapezoid", - "learning_rate_decay_batches": 10000, - "fit_batch_size": 64, - "fit_on": ["x_emb", "y_onehot"], - "momentum": [0.95, 0.55], - "momentum_decay": "trapezoid", - "momentum_decay_batches": 10000, - "loss": "categorical_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_gru": 1e-6, - "coef_reg_den": 1e-6, - "dropout_rate": 0.2, - "rec_dropout_rate": 0.2, - "dense_size": 100, - "model_name": "bigru_with_max_aver_pool_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "f1_weighted", - "f1_macro", - "accuracy", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "validate_best": true, - "test_best": true, - "tensorboard_log_dir": "{MODEL_PATH}/logs" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_v14" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_twitter_nltk_word_tokenize.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_v14.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/rusentiment_cnn.json b/deeppavlov/configs/classifiers/rusentiment_cnn.json deleted file mode 100644 index 0706d803e7..0000000000 --- a/deeppavlov/configs/classifiers/rusentiment_cnn.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/rusentiment/", - "train": "rusentiment_random_posts.csv", - "test": "rusentiment_test.csv" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor" - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": [0.01, 1e-4], - "learning_rate_decay": "exponential", - "learning_rate_decay_batches": 5000, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 5.0, - "loss": "binary_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_cnn": 1e-3, - "coef_reg_den": 1e-2, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "f1_weighted", - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "tensorboard_log_dir": "{MODEL_PATH}/logs", - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_v3" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_v3.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_imdb_bert.json b/deeppavlov/configs/classifiers/sentiment_imdb_bert.json deleted file mode 100644 index 8e62aefe8c..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_imdb_bert.json +++ /dev/null @@ -1,142 +0,0 @@ -{ - "dataset_reader": { - "class_name": "imdb_reader", - "data_path": "{DOWNLOADS_PATH}/aclImdb" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "split_seed": 23, - "field_to_split": "train", - "stratify": true, - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 450, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "batch_size": 8, - "epochs": 100, - "metrics": [ - "f1_weighted", - "f1_macro", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "show_examples": false, - "pytest_max_batches": 2, - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_imdb_bert_v0/" - }, - "labels": { - "telegram_utils": "IntentModel", - "server_utils": "KerasIntentModel" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json b/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json deleted file mode 100644 index 4e1a1287b5..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_imdb_conv_bert.json +++ /dev/null @@ -1,142 +0,0 @@ -{ - "dataset_reader": { - "class_name": "imdb_reader", - "data_path": "{DOWNLOADS_PATH}/aclImdb" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "split_seed": 23, - "field_to_split": "train", - "stratify": true, - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 450, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "batch_size": 8, - "epochs": 100, - "metrics": [ - "f1_weighted", - "f1_macro", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "show_examples": false, - "pytest_max_batches": 2, - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_imdb_conv_bert_v0/" - }, - "labels": { - "telegram_utils": "IntentModel", - "server_utils": "KerasIntentModel" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json b/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json deleted file mode 100644 index 95a46ad544..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_sst_multi_bert.json +++ /dev/null @@ -1,135 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "fine_grained_label", - "data_path": "{DOWNLOADS_PATH}/stanfordSentimentTreebank", - "train": "train_fine_grained.csv", - "valid": "valid_fine_grained.csv", - "test": "test_fine_grained.csv" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{MODEL_PATH}/vocab.txt", - "do_lower_case": false, - "max_seq_length": 64, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{MODEL_PATH}/bert_config.json", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - "f1_macro" - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_sst_bert_v1" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/stanfordSentimentTreebank.zip", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/sentiment_sst_bert_v1.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json b/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json deleted file mode 100644 index e7b835e9a1..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_twitter_bert_emb.json +++ /dev/null @@ -1,144 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Twit", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/sentiment_twitter_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "class_name": "transformers_bert_preprocessor", - "vocab_file": "{BERT_PATH}/vocab.txt", - "do_lower_case": false, - "max_seq_length": 512, - "in": ["x"], - "out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"] - }, - { - "class_name": "transformers_bert_embedder", - "id": "my_embedder", - "bert_config_path": "{BERT_PATH}/config.json", - "truncate": false, - "load_path": "{BERT_PATH}", - "in": ["subword_tok_ids", "startofword_markers", "attention_mask"], - "out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"] - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "word_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_cnn": 1e-3, - "coef_reg_den": 1e-2, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_bert_emb", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_pt_v1" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_pt_v1.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json b/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json deleted file mode 100644 index 2d6a250958..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_twitter_preproc.json +++ /dev/null @@ -1,159 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "Twit", - "y": "Class", - "data_path": "{DOWNLOADS_PATH}/sentiment_twitter_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor", - "delete_smile_brackets": true - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "last_layer_activation": "softmax", - "coef_reg_cnn": 1e-3, - "coef_reg_den": 1e-2, - "dropout_rate": 0.5, - "dense_size": 100, - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_v7" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/sentiment_twitter_v7.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json b/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json deleted file mode 100644 index f1b1a40561..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_yelp_conv_bert.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/yelp_review_full_csv", - "train": "train.csv", - "test": "test.csv", - "header": null, - "names": [ - "label", - "text" - ] - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "split_seed": 23, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{MODEL_PATH}/vocab.txt", - "do_lower_case": false, - "max_seq_length": 256, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{MODEL_PATH}/bert_config.json", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 16, - "metrics": [ - "accuracy", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - "f1_macro" - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_yelp_bert_v2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/yelp_review_full_csv.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/sentiment_yelp_bert_v2.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json b/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json deleted file mode 100644 index d18dab7b05..0000000000 --- a/deeppavlov/configs/classifiers/sentiment_yelp_multi_bert.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/yelp_review_full_csv", - "train": "train.csv", - "test": "test.csv", - "header": null, - "names": [ - "label", - "text" - ] - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42, - "split_seed": 23, - "field_to_split": "train", - "split_fields": [ - "train", - "valid" - ], - "split_proportions": [ - 0.9, - 0.1 - ] - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{MODEL_PATH}/vocab.txt", - "do_lower_case": false, - "max_seq_length": 200, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{MODEL_PATH}/bert_config.json", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 16, - "metrics": [ - "accuracy", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - "f1_macro" - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer", - "tensorboard_log_dir": "{MODEL_PATH}/" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_yelp_bert_v1" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/yelp_review_full_csv.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/sentiment_yelp_bert_v1.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/sst_torch_swcnn.json b/deeppavlov/configs/classifiers/sst_torch_swcnn.json deleted file mode 100644 index 9709d333fd..0000000000 --- a/deeppavlov/configs/classifiers/sst_torch_swcnn.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "dataset_reader": { - "class_name": "torchtext_classification_data_reader", - "data_path": "{DOWNLOADS_PATH}", - "dataset_title": "SST" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator", - "seed": 42 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "level": "token", - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": "x", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_ids" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "torch_text_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "model_name": "cnn_model", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 128, - "dropout_rate": 0.5, - "dense_size": 64, - "optimizer": "SGD", - "optimizer_parameters": { - "lr": 0.0001, - "momentum": 0.9, - "weight_decay": 0.0001 - }, - "lr_scheduler": "CyclicLR", - "lr_scheduler_parameters": { - "base_lr": 0.0001, - "max_lr": 0.001 - }, - "loss": "CrossEntropyLoss" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/sst_torch_v0" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/sst_torch_v0.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/topic_ag_news.json b/deeppavlov/configs/classifiers/topic_ag_news.json deleted file mode 100644 index 0e56578b55..0000000000 --- a/deeppavlov/configs/classifiers/topic_ag_news.json +++ /dev/null @@ -1,154 +0,0 @@ -{ - "dataset_reader": { - "class_name": "basic_classification_reader", - "x": "text", - "y": "label", - "data_path": "{DOWNLOADS_PATH}/ag_news_data" - }, - "dataset_iterator": { - "class_name": "basic_classification_iterator" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_lower" - ], - "class_name": "str_lower" - }, - { - "in": "x_lower", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_tokenizer", - "tokenizer": "wordpunct_tokenize" - }, - { - "in": "x_tok", - "out": "x_emb", - "id": "my_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "kernel_sizes_cnn": [ - 3, - 5, - 7 - ], - "filters_cnn": 256, - "optimizer": "Adam", - "learning_rate": 0.01, - "learning_rate_decay": 0.1, - "loss": "binary_crossentropy", - "coef_reg_cnn": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "dense_size": 100, - "last_layer_activation": "softmax", - "model_name": "cnn_model" - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - "accuracy", - "f1_macro", - { - "name": "roc_auc", - "inputs": ["y_onehot", "y_pred_probas"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/topic_ag_news_v3" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/ag_news_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/topic_ag_news_v3.tar.gz", - "subdir": "{MODELS_PATH}/classifiers" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json b/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json deleted file mode 100644 index ec1cd427b0..0000000000 --- a/deeppavlov/configs/classifiers/yahoo_convers_vs_info.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": "y", - "out": "y_ids" - }, - { - "in": [ - "x" - ], - "out": [ - "x_prep" - ], - "class_name": "dirty_comments_preprocessor", - "remove_punctuation": false - }, - { - "in": "x_prep", - "out": "x_tok", - "id": "my_tokenizer", - "class_name": "nltk_moses_tokenizer" - }, - { - "in": [ - "x_tok" - ], - "out": [ - "x_emb" - ], - "id": "my_embedder", - "class_name": "elmo_embedder", - "elmo_output_names": [ - "elmo" - ], - "mini_batch_size": 32, - "spec": "{DOWNLOADS_PATH}/embeddings/yahooo-sber-questions_epoches_n_15/", - "pad_zero": true - }, - { - "in": "y_ids", - "out": "y_onehot", - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "in": [ - "x_emb" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ], - "main": true, - "class_name": "keras_classification_model", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embedding_size": "#my_embedder.dim", - "n_classes": "#classes_vocab.len", - "units_gru": 512, - "optimizer": "Adam", - "learning_rate": 0.001, - "learning_rate_decay": 0.001, - "loss": "categorical_crossentropy", - "coef_reg_gru": 1e-4, - "coef_reg_den": 1e-4, - "dropout_rate": 0.5, - "rec_dropout_rate": 0.5, - "dense_size": 100, - "model_name": "bigru_with_max_aver_pool_model", - "last_layer_activation": "softmax", - "restore_lr": false - }, - { - "in": "y_pred_probas", - "out": "y_pred_ids", - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": "y_pred_ids", - "out": "y_pred_labels", - "ref": "classes_vocab" - }, - { - "ref": "my_one_hotter", - "in": "y_pred_ids", - "out": "y_pred_onehot" - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 32, - "metrics": [ - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_pred_labels" - ] - }, - { - "name": "f1_macro", - "inputs": [ - "y", - "y_pred_labels" - ] - } - ], - "validation_patience": 20, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": true, - "evaluation_targets": [ - "train", - "valid" - ], - "tensorboard_log_dir": "{MODEL_PATH}/", - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/embeddings/yahooo-sber-questions_epoches_n_15.tar.gz", - "subdir": "{DOWNLOADS_PATH}/embeddings/yahooo-sber-questions_epoches_n_15/" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/yahoo_convers_vs_info_v2.tar.gz", - "subdir": "{MODELS_PATH}/classifiers/" - } - ] - } -} diff --git a/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json b/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json deleted file mode 100644 index 7b0e79994d..0000000000 --- a/deeppavlov/configs/classifiers/yahoo_convers_vs_info_bert.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 64, - "in": [ - "x" - ], - "out": [ - "bert_features" - ] - }, - { - "id": "classes_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "save_path": "{MODEL_PATH}/classes.dict", - "load_path": "{MODEL_PATH}/classes.dict", - "in": [ - "y" - ], - "out": [ - "y_ids" - ] - }, - { - "in": [ - "y_ids" - ], - "out": [ - "y_onehot" - ], - "class_name": "one_hotter", - "id": "my_one_hotter", - "depth": "#classes_vocab.len", - "single_vector": true - }, - { - "class_name": "bert_classifier", - "n_classes": "#classes_vocab.len", - "return_probas": true, - "one_hot_labels": true, - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/conversational_cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "keep_prob": 0.5, - "learning_rate": 1e-05, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "y_onehot" - ], - "out": [ - "y_pred_probas" - ] - }, - { - "in": [ - "y_pred_probas" - ], - "out": [ - "y_pred_ids" - ], - "class_name": "proba2labels", - "max_proba": true - }, - { - "in": [ - "y_pred_ids" - ], - "out": [ - "y_pred_labels" - ], - "ref": "classes_vocab" - }, - { - "ref": "my_one_hotter", - "in": [ - "y_pred_ids" - ], - "out": [ - "y_pred_onehot" - ] - } - ], - "out": [ - "y_pred_labels" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "roc_auc", - "inputs": [ - "y_onehot", - "y_pred_probas" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_pred_labels" - ] - }, - { - "name": "f1_macro", - "inputs": [ - "y", - "y_pred_labels" - ] - } - ], - "validation_patience": 20, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "evaluation_targets": [ - "train", - "valid" - ], - "tensorboard_log_dir": "{MODEL_PATH}/", - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/yahoo_convers_vs_info_v3" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/conversational_cased_L-12_H-768_A-12.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/yahoo_convers_vs_info_v3.tar.gz", - "subdir": "{MODELS_PATH}/classifiers/" - } - ] - } -} diff --git a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json b/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json deleted file mode 100644 index 4fe8f63780..0000000000 --- a/deeppavlov/configs/doc_retrieval/en_ranker_tfidf_enwiki20161221.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "dataset_reader": { - "class_name": "odqa_reader", - "data_path": "{DOWNLOADS_PATH}/odqa/enwiki20161221", - "save_path": "{DOWNLOADS_PATH}/odqa/enwiki20161221.db", - "dataset_format": "wiki" - }, - "dataset_iterator": { - "class_name": "sqlite_iterator", - "shuffle": false, - "load_path": "{DOWNLOADS_PATH}/odqa/enwiki20161221.db" - }, - "chainer": { - "in": [ - "docs" - ], - "in_y": [ - "doc_ids", - "doc_nums" - ], - "out": [ - "tfidf_doc_ids" - ], - "pipe": [ - { - "class_name": "hashing_tfidf_vectorizer", - "id": "vectorizer", - "fit_on": [ - "docs", - "doc_ids", - "doc_nums" - ], - "save_path": "{MODELS_PATH}/odqa/enwiki20161221_tfidf_matrix.npz", - "load_path": "{MODELS_PATH}/odqa/enwiki20161221_tfidf_matrix.npz", - "tokenizer": { - "class_name": "stream_spacy_tokenizer", - "lemmas": true, - "ngram_range": [ - 1, - 2 - ] - } - }, - { - "class_name": "tfidf_ranker", - "top_n": 25, - "in": [ - "docs" - ], - "out": [ - "tfidf_doc_ids", - "tfidf_doc_scores" - ], - "vectorizer": "#vectorizer" - } - ] - }, - "train": { - "batch_size": 10000, - "evaluation_targets": [], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/wikipedia/enwiki20161221.tar.gz", - "subdir": "{DOWNLOADS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/en_odqa_enwiki20161221.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/go_bot/database_dstc2.json b/deeppavlov/configs/go_bot/database_dstc2.json deleted file mode 100644 index bcb153a6fd..0000000000 --- a/deeppavlov/configs/go_bot/database_dstc2.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DOWNLOADS_PATH}/dstc2_v3" - }, - "dataset_iterator": { - "class_name": "dialog_db_result_iterator" - }, - "chainer": { - "in": ["db_result"], - "in_y": [], - "out": [], - "pipe": [ - { - "id": "restaurant_database", - "class_name": "sqlite_database", - "fit_on": ["db_result"], - "table_name": "mytable", - "primary_keys": ["name"], - "save_path": "{DOWNLOADS_PATH}/dstc2_v3/resto.sqlite" - } - ] - }, - "train": { - "class_name": "fit_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v3.tar.gz", - "subdir": "{DOWNLOADS_PATH}/dstc2_v3" - } - ] - } -} diff --git a/deeppavlov/configs/go_bot/gobot_dstc2.json b/deeppavlov/configs/go_bot/gobot_dstc2.json deleted file mode 100644 index 2611af6f05..0000000000 --- a/deeppavlov/configs/go_bot/gobot_dstc2.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dialog_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "pipe": [ - { - "class_name": "dialog_component_wrapper", - "component": { "class_name": "split_tokenizer" }, - "in": ["x"], - "out": ["x_tokens"] - }, - { - "id": "word_vocab", - "class_name": "simple_vocab", - "fit_on": ["x_tokens"], - "save_path": "{MODEL_PATH}/word.dict", - "load_path": "{MODEL_PATH}/word.dict" - }, - { - "class_name": "go_bot", - "load_path": "{MODEL_PATH}/model", - "save_path": "{MODEL_PATH}/model", - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "main": true, - "debug": false, - "learning_rate": 0.003, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 10.0, - "momentum": 0.95, - "optimizer": "tensorflow.train:AdamOptimizer", - "clip_norm": 2.0, - "dropout_rate": 0.4, - "l2_reg_coef": 3e-4, - "hidden_size": 128, - "dense_size": 160, - "word_vocab": "#word_vocab", - "database": { - "class_name": "sqlite_database", - "table_name": "mytable", - "primary_keys": ["name"], - "save_path": "{DOWNLOADS_PATH}/dstc2_v3/resto.sqlite" - }, - "nlg_manager": { - "class_name": "gobot_nlg_manager", - "template_path": "{DATA_PATH}/dstc2-templates.txt", - "template_type": "DualTemplate", - "api_call_action": "api_call" - }, - "api_call_action": "api_call", - "use_action_mask": false, - "slot_filler": { - "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json" - }, - "intent_classifier": null, - "embedder": { - "class_name": "glove", - "load_path": "{DOWNLOADS_PATH}/embeddings/glove.6B.100d.txt" - }, - "bow_embedder": { - "class_name": "bow", - "depth": "#word_vocab.__len__()", - "with_counts": true - }, - "tokenizer": { - "class_name": "stream_spacy_tokenizer", - "lowercase": false - }, - "tracker": { - "class_name": "featurized_tracker", - "slot_names": ["pricerange", "this", "area", "food", "name"] - } - } - ] - }, - "train": { - "epochs": 200, - "batch_size": 8, - - "metrics": ["per_item_dialog_accuracy"], - "validation_patience": 10, - "val_every_n_batches": 15, - - "log_every_n_batches": 15, - "show_examples": false, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v3.tar.gz", - "subdir": "{DATA_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_best.json b/deeppavlov/configs/go_bot/gobot_dstc2_best.json deleted file mode 100644 index b13c680f9e..0000000000 --- a/deeppavlov/configs/go_bot/gobot_dstc2_best.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DSTC2_DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dialog_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "pipe": [ - { - "class_name": "dialog_component_wrapper", - "component": { "class_name": "split_tokenizer" }, - "in": ["x"], - "out": ["x_tokens"] - }, - { - "id": "token_vocab", - "fit_on": ["x_tokens"], - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/gobot_dstc2_best/word.dict", - "load_path": "{MODELS_PATH}/gobot_dstc2_best/word.dict" - }, - { - "id": "restaurant_database", - "class_name": "sqlite_database", - "table_name": "mytable", - "primary_keys": ["name"], - "save_path": "{DOWNLOADS_PATH}/dstc2_v3/resto.sqlite" - }, - { - "class_name": "go_bot", - "load_path": "{MODELS_PATH}/gobot_dstc2_best/model", - "save_path": "{MODELS_PATH}/gobot_dstc2_best/model", - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "main": true, - "debug": false, - "learning_rate": 3e-3, - "learning_rate_drop_patience": 10, - "learning_rate_drop_div": 4.0, - "momentum": 0.95, - "optimizer": "tensorflow.train:AdamOptimizer", - "clip_norm": 2.0, - "dropout_rate": 0.75, - "l2_reg_coef": 9e-4, - "hidden_size": 128, - "dense_size": 128, - "attention_mechanism": { - "type": "general", - "hidden_size": 32, - "action_as_key": true, - "intent_as_key": true, - "max_num_tokens": 100, - "projected_align": false - }, - "word_vocab": "#token_vocab", - "database": "#restaurant_database", - "nlg_manager": { - "class_name": "gobot_nlg_manager", - "template_path": "{DSTC2_DATA_PATH}/dstc2-templates.txt", - "template_type": "DualTemplate", - "api_call_action": "api_call" - }, - "use_action_mask": false, - "slot_filler": { - "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json" - }, - "intent_classifier": { - "config_path": "{CONFIGS_PATH}/classifiers/intents_dstc2.json" - }, - "embedder": { - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - }, - "bow_embedder": null, - "tokenizer": { - "class_name": "stream_spacy_tokenizer", - "lowercase": false - }, - "tracker": { - "class_name": "featurized_tracker", - "slot_names": ["pricerange", "this", "area", "food", "name"] - } - } - ] - }, - "train": { - "epochs": 100, - "batch_size": 8, - - "pytest_max_batches": 2, - - "metrics": ["per_item_dialog_accuracy"], - "validation_patience": 15, - "val_every_n_batches": 15, - - "log_every_n_batches": 15, - "show_examples": false, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", - "DSTC2_DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_best_v4.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v3.tar.gz", - "subdir": "{DOWNLOADS_PATH}/dstc2_v3" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json b/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json deleted file mode 100644 index 19202edb74..0000000000 --- a/deeppavlov/configs/go_bot/gobot_dstc2_best_json_nlg.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DSTC2_DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dialog_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "pipe": [ - { - "class_name": "dialog_component_wrapper", - "component": { "class_name": "split_tokenizer" }, - "in": ["x"], - "out": ["x_tokens"] - }, - { - "id": "token_vocab", - "fit_on": ["x_tokens"], - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/gobot_dstc2_best_json/word.dict", - "load_path": "{MODELS_PATH}/gobot_dstc2_best_json/word.dict" - }, - { - "id": "restaurant_database", - "class_name": "sqlite_database", - "table_name": "mytable", - "primary_keys": ["name"], - "save_path": "{DOWNLOADS_PATH}/dstc2/resto.sqlite" - }, - { - "class_name": "go_bot", - "load_path": "{MODELS_PATH}/gobot_dstc2_best_json/model", - "save_path": "{MODELS_PATH}/gobot_dstc2_best_json/model", - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "main": true, - "debug": false, - "learning_rate": 3e-3, - "learning_rate_drop_patience": 10, - "learning_rate_drop_div": 4.0, - "momentum": 0.95, - "optimizer": "tensorflow.train:AdamOptimizer", - "clip_norm": 2.0, - "dropout_rate": 0.75, - "l2_reg_coef": 9e-4, - "hidden_size": 128, - "dense_size": 128, - "attention_mechanism": { - "type": "general", - "hidden_size": 32, - "action_as_key": true, - "intent_as_key": true, - "max_num_tokens": 100, - "projected_align": false - }, - "word_vocab": "#token_vocab", - "database": "#restaurant_database", - "nlg_manager": { - "class_name": "gobot_json_nlg_manager", - "data_path": "{DSTC2_DATA_PATH}", - "actions2slots_path": "{DSTC2_DATA_PATH}/dstc2-actions2slots.json", - "api_call_action": "api_call" - }, - "use_action_mask": false, - "slot_filler": { - "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json" - }, - "intent_classifier": { - "config_path": "{CONFIGS_PATH}/classifiers/intents_dstc2.json" - }, - "embedder": { - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - }, - "bow_embedder": null, - "tokenizer": { - "class_name": "stream_spacy_tokenizer", - "lowercase": false - }, - "tracker": { - "class_name": "featurized_tracker", - "slot_names": ["pricerange", "this", "area", "food", "name"] - } - } - ] - }, - "train": { - "epochs": 100, - "batch_size": 8, - - "pytest_max_batches": 2, - - "metrics": ["per_item_action_accuracy"], - "validation_patience": 15, - "val_every_n_batches": 15, - - "log_every_n_batches": 15, - "show_examples": false, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", - "DSTC2_DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_best_v4.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v3.tar.gz", - "subdir": "{DSTC2_DATA_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/go_bot/gobot_simple_dstc2.json b/deeppavlov/configs/go_bot/gobot_simple_dstc2.json deleted file mode 100644 index 52093d0ecb..0000000000 --- a/deeppavlov/configs/go_bot/gobot_simple_dstc2.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DSTC2_DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dialog_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "pipe": [ - { - "class_name": "dialog_component_wrapper", - "component": { "class_name": "split_tokenizer" }, - "in": ["x"], - "out": ["x_tokens"] - }, - { - "id": "word_vocab", - "class_name": "simple_vocab", - "fit_on": ["x_tokens"], - "save_path": "{MODEL_PATH}/word.dict", - "load_path": "{MODEL_PATH}/word.dict" - }, - { - "class_name": "go_bot", - "load_path": "{MODEL_PATH}/model", - "save_path": "{MODEL_PATH}/model", - "in": ["x"], - "in_y": ["y"], - "out": ["y_predicted"], - "main": true, - "debug": false, - "learning_rate": 0.003, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 10.0, - "momentum": 0.95, - "optimizer": "tensorflow.train:AdamOptimizer", - "clip_norm": 2.0, - "dropout_rate": 0.4, - "l2_reg_coef": 3e-4, - "hidden_size": 128, - "dense_size": 160, - "word_vocab": "#word_vocab", - "database": { - "class_name": "sqlite_database", - "table_name": "mytable", - "primary_keys": ["name"], - "save_path": "{DSTC2_DATA_PATH}/resto.sqlite" - }, - "nlg_manager": { - "class_name": "gobot_nlg_manager", - "template_path": "{DSTC2_DATA_PATH}/dstc2-templates.txt", - "template_type": "DualTemplate", - "api_call_action": "api_call" - }, - "api_call_action": "api_call", - "use_action_mask": false, - "slot_filler": { - "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json" - }, - "intent_classifier": null, - "embedder": { - "class_name": "glove", - "load_path": "{DOWNLOADS_PATH}/embeddings/glove.6B.100d.txt" - }, - "bow_embedder": { - "class_name": "bow", - "depth": "#word_vocab.__len__()", - "with_counts": true - }, - "tokenizer": { - "class_name": "stream_spacy_tokenizer", - "lowercase": false - }, - "tracker": { - "class_name": "featurized_tracker", - "slot_names": ["pricerange", "this", "area", "food", "name"] - } - } - ] - }, - "train": { - "epochs": 200, - "batch_size": 8, - - "metrics": ["per_item_dialog_accuracy"], - "validation_patience": 10, - "val_every_n_batches": 15, - - "log_every_n_batches": 15, - "show_examples": false, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "DSTC2_DATA_PATH": "{DOWNLOADS_PATH}/dstc2_v3", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/gobot_dstc2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/gobot_dstc2_v9.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/datasets/dstc2_v3.tar.gz", - "subdir": "{DSTC2_DATA_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json deleted file mode 100644 index 1abd931d5d..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ar.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "ar", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ar/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ar.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/ar" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/ar.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/ar" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json deleted file mode 100644 index 047a08cf24..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_cs.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "cs", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/cs/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/cs.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/cs" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/cs.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/cs" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json deleted file mode 100644 index c0c7aa19f1..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_de.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "de", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/de/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/de.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/de" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/de.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/de" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json deleted file mode 100644 index dd771f8216..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "en", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/en/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/en.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/en" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/en.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/en" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json deleted file mode 100644 index ce6c39f736..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_es_ancora.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "es_ancora", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/es_ancora.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/es_ancora" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/es_ancora.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/es_ancora" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json deleted file mode 100644 index 7c944e807a..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_fr.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "fr", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/fr/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/fr.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/fr" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/fr.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/fr" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json deleted file mode 100644 index ff10e2e4ba..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hi.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "hi", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hi/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/hi.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/hi" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/hi.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/hi" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json deleted file mode 100644 index 6e399a3a43..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_hu.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "hu", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/hu/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/hu.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/hu" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/hu.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/hu" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json deleted file mode 100644 index a84510a2e2..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_it.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "it", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/it/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/it.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/it" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/it.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/it" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json deleted file mode 100644 index b5ec00c85e..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "ru_syntagrus", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/ru_syntagrus.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/ru_syntagrus" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json deleted file mode 100644 index ef67338faa..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy.json +++ /dev/null @@ -1,193 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "ru_syntagrus", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict" - }, - { - "id": "pymorphy_vectorizer", - "class_name": "pymorphy_vectorizer", - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "max_pymorphy_variants": 5, - "in": [ - "x_tokens" - ], - "out": [ - "x_possible_tags" - ] - }, - { - "in": [ - "x_chars_lowered_marked", - "x_possible_tags" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model_pymorphy.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model_pymorphy.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01, - "word_vectorizers": [ - [ - "#pymorphy_vectorizer.dim", - 128 - ] - ] - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/ru_syntagrus.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/ru_syntagrus" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json deleted file mode 100644 index da40a4f2a5..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_ru_syntagrus_pymorphy_lemmatize.json +++ /dev/null @@ -1,201 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "ru_syntagrus", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "language": "russian", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/char.dict" - }, - { - "id": "pymorphy_vectorizer", - "class_name": "pymorphy_vectorizer", - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "max_pymorphy_variants": 5, - "in": [ - "x_tokens" - ], - "out": [ - "x_possible_tags" - ] - }, - { - "in": [ - "x_chars_lowered_marked", - "x_possible_tags" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model_pymorphy.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/model_pymorphy.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01, - "word_vectorizers": [ - [ - "#pymorphy_vectorizer.dim", - 128 - ] - ] - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_lemmas" - ], - "class_name": "UD_pymorphy_lemmatizer", - "end": "\n" - }, - { - "in": [ - "x_tokens", - "y_predicted", - "y_lemmas" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "lemmatized_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1 - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/ru_syntagrus.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/ru_syntagrus.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/ru_syntagrus" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json b/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json deleted file mode 100644 index e7887f1560..0000000000 --- a/deeppavlov/configs/morpho_tagger/UD2.0/morpho_tr.json +++ /dev/null @@ -1,174 +0,0 @@ -{ - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "tr", - "data_types": [ - "train", - "dev", - "test" - ] - }, - "dataset_iterator": { - "class_name": "morphotagger_dataset", - "min_train_fraction": 0.9 - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "id": "char_splitting_lowercase_preprocessor", - "class_name": "char_splitting_lowercase_preprocessor", - "in": [ - "x_tokens" - ], - "out": [ - "x_chars_lowered_marked" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": [ - "y" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/tag.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/tag.dict" - }, - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": [ - "x_chars_lowered_marked" - ], - "special_tokens": [ - "PAD", - "BEGIN", - "END" - ], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/char.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/char.dict" - }, - { - "in": [ - "x_chars_lowered_marked" - ], - "in_y": [ - "y" - ], - "out": [ - "y_predicted" - ], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/model.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tr/model.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, - "char_window_size": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "word_lstm_units": 128, - "conv_dropout": 0.0, - "char_conv_layers": 1, - "char_highway_layers": 1, - "highway_dropout": 0.0, - "word_lstm_layers": 1, - "char_filter_multiple": 50, - "intermediate_dropout": 0.0, - "word_dropout": 0.2, - "lstm_dropout": 0.2, - "regularizer": 0.01 - }, - { - "in": [ - "x_tokens", - "y_predicted" - ], - "out": [ - "y_prettified" - ], - "id": "prettifier", - "class_name": "tag_output_prettifier", - "end": "\n" - } - ], - "out": [ - "y_prettified" - ] - }, - "train": { - "epochs": 50, - "batch_size": 32, - "metrics": [ - { - "name": "per_token_accuracy", - "inputs": [ - "y", - "y_predicted" - ] - }, - { - "name": "accuracy", - "inputs": [ - "y", - "y_predicted" - ] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "RESULTS_PATH": "{ROOT_PATH}/results" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.0/tr.tar.gz", - "subdir": "{MODELS_PATH}/morpho_tagger/UD2.0/tr" - }, - { - "url": "http://files.deeppavlov.ai/datasets/UD2.0_source/tr.tar.gz", - "subdir": "{DOWNLOADS_PATH}/UD2.0_source/tr" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ner/ner_conll2003_pos.json b/deeppavlov/configs/ner/ner_conll2003_pos.json deleted file mode 100644 index 3ddd6ab55d..0000000000 --- a/deeppavlov/configs/ner/ner_conll2003_pos.json +++ /dev/null @@ -1,189 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/conll2003/", - "dataset_name": "conll2003", - "provide_pos": true - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x", "pos"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "class_name": "sanitizer", - "nums": true, - "out": ["x_san"] - }, - { - "in": ["x_san"], - "id": "word_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "special_tokens": [""], - "fit_on": ["x_san"], - "save_path": "{MODELS_PATH}/ner_conll2003/word.dict", - "load_path": "{MODELS_PATH}/ner_conll2003/word.dict", - "out": ["x_tok_ind"] - }, - { - "in": ["pos"], - "id": "pos_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["pos"], - "save_path": "{MODELS_PATH}/ner_conll2003/pos.dict", - "load_path": "{MODELS_PATH}/ner_conll2003/pos.dict", - "out": ["pos_ind"] - }, - { - "in": ["pos_ind"], - "class_name": "one_hotter", - "depth": "#pos_vocab.len", - "pad_zeros": true, - "out": ["pos_one_hot"] - }, - { - "in": ["y"], - "id": "tag_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["y"], - "save_path": "{MODELS_PATH}/ner_conll2003/tag.dict", - "load_path": "{MODELS_PATH}/ner_conll2003/tag.dict", - "out": ["y_ind"] - }, - { - "in": ["x_tokens"], - "class_name": "char_splitter", - "out": ["x_char"] - }, - { - "in": ["x_char"], - "id": "char_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["x_char"], - "save_path": "{MODELS_PATH}/ner_conll2003/char.dict", - "load_path": "{MODELS_PATH}/ner_conll2003/char.dict", - "out": ["x_char_ind"] - }, - { - "in": ["x_tokens"], - "class_name": "mask", - "out": ["mask"] - }, - { - "in": ["x_san"], - "id": "glove_emb", - "class_name": "glove", - "pad_zero": true, - "load_path": "{DOWNLOADS_PATH}/embeddings/glove.6B.100d.txt", - - "out": ["x_emb"] - }, - { - "id": "embeddings", - "class_name": "emb_mat_assembler", - "embedder": "#glove_emb", - "vocab": "#word_vocab" - }, - { - "id": "embeddings_char", - "class_name": "emb_mat_assembler", - "character_level": true, - "emb_dim": 32, - "embedder": "#glove_emb", - "vocab": "#char_vocab" - }, - { - "id": "capitalization", - "class_name": "capitalization_featurizer", - "in": ["x_tokens"], - "out": ["cap"] - }, - { - "in": ["x_emb", "mask", "x_char_ind", "cap", "pos_one_hot"], - "in_y": ["y_ind"], - "out": ["y_predicted"], - "class_name": "ner", - "main": true, - "token_emb_dim": "#glove_emb.dim", - "n_hidden_list": [128], - "net_type": "rnn", - "cell_type": "lstm", - "use_cudnn_rnn": true, - "n_tags": "#tag_vocab.len", - "capitalization_dim": "#capitalization.dim", - "char_emb_dim": "#embeddings_char.dim", - "pos_features_dim": "#pos_vocab.len", - "save_path": "{MODELS_PATH}/ner_conll2003/model", - "load_path": "{MODELS_PATH}/ner_conll2003/model", - "char_emb_mat": "#embeddings_char.emb_mat", - "two_dense_on_top": true, - "use_crf": true, - "use_batch_norm": true, - "embeddings_dropout": true, - "top_dropout": true, - "intra_layer_dropout": true, - "l2_reg": 0, - "learning_rate": 1e-2, - "dropout_keep_prob": 0.7 - }, - { - "ref": "tag_vocab", - "in": ["y_predicted"], - "out": ["tags"] - } - ], - - "out": ["x_tokens", "tags"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "tags"] - } - ], - "validation_patience": 7, - "val_every_n_epochs": 1, - - "log_every_n_epochs": 1, - "show_examples": false, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ner/ner_dstc2.json b/deeppavlov/configs/ner/ner_dstc2.json deleted file mode 100644 index 4f35d4b530..0000000000 --- a/deeppavlov/configs/ner/ner_dstc2.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dstc2_ner_iterator", - "slot_values_path": "{SLOT_VALS_PATH}" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "id": "word_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["x_lower"], - "save_path": "{MODEL_PATH}/word.dict", - "load_path": "{MODEL_PATH}/word.dict", - "out": ["x_tok_ind"] - }, - { - "class_name": "random_emb_mat", - "id": "embeddings", - "vocab_len": "#word_vocab.len", - "emb_dim": 100 - }, - { - "in": ["y"], - "id": "tag_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/tag.dict", - "load_path": "{MODEL_PATH}/tag.dict", - "out": ["y_ind"] - }, - { - "in": ["x_tokens"], - "class_name": "mask", - "out": ["mask"] - }, - { - "in": ["x_tok_ind", "mask"], - "in_y": ["y_ind"], - "out": ["y_predicted"], - "class_name": "ner", - "main": true, - "token_emb_mat": "#embeddings.emb_mat", - "n_hidden_list": [64, 64], - "net_type": "cnn", - "n_tags": "#tag_vocab.len", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "embeddings_dropout": true, - "top_dropout": true, - "intra_layer_dropout": false, - "use_batch_norm": true, - "learning_rate": 1e-2, - "dropout_keep_prob": 0.5 - }, - { - "ref": "tag_vocab", - "in": ["y_predicted"], - "out": ["tags"] - } - ], - "out": ["x_tokens", "tags"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "tags"] - }, - { - "name": "per_token_accuracy", - "inputs": ["y", "tags"] - } - ], - "validation_patience": 5, - "val_every_n_epochs": 5, - - "log_every_n_batches": 100, - "show_examples": false, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DATA_PATH": "{ROOT_PATH}/downloads/dstc2", - "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/slotfill_dstc2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", - "subdir": "{DATA_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/slotfill_dstc2.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/ner_few_shot_ru.json b/deeppavlov/configs/ner/ner_few_shot_ru.json deleted file mode 100644 index ad60b46567..0000000000 --- a/deeppavlov/configs/ner/ner_few_shot_ru.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "deeppavlov_root": ".", - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/ner_few_shot_data/" - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["tags"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["tags"], - "id": "tag_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": false, - "fit_on": ["tags"], - "save_path": "{MODELS_PATH}/ner_fs/tag.dict", - "load_path": "{MODELS_PATH}/ner_fs/tag.dict", - "out": ["tag_indices"] - }, - { - "class_name": "elmo_embedder", - "elmo_output_names": ["lstm_outputs1", "lstm_outputs2", "word_emb"], - "mini_batch_size": 32, - "in": ["x_tokens"], - "spec": "{DOWNLOADS_PATH}/embeddings/elmo_ru_news", - "out": [ - "tokens_emb" - ] - }, - { - "class_name": "ner_svm", - "in": "tokens_emb", - "out": "tag_indices", - "fit_on": ["tokens_emb", "tag_indices"], - "save_path": "{MODELS_PATH}/ner_fs/model", - "load_path": "{MODELS_PATH}/ner_fs/model" - }, - { - "ref": "tag_vocab", - "in": ["tag_indices"], - "out": ["tags_hat"] - }, - { - "class_name": "ner_bio_converter", - "in": ["tags_hat"], - "out": ["tags_bio_hat"] - }, - { - "class_name": "ner_bio_converter", - "in": ["tags"], - "out": ["tags_bio"] - } - - ], - - "out": ["x_tokens", "tags_bio_hat"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "ner_f1", - "inputs": [ - "tags_bio", - "tags_bio_hat" - ] - } - ], - "validation_patience": 7, - "val_every_n_epochs": 1, - - "log_every_n_epochs": 1, - "show_examples": false, - "tensorboard_log_dir": "{MODELS_PATH}/ner_fs/logs", - "class_name": "fit_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-news_wmt11-16_1.5M_steps.tar.gz", - "subdir": "{DOWNLOADS_PATH}/embeddings/elmo_ru_news" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json b/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json deleted file mode 100644 index cb58707224..0000000000 --- a/deeppavlov/configs/ner/ner_few_shot_ru_simulate.json +++ /dev/null @@ -1,140 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "dataset_name": "collection_rus", - "data_path": "{DOWNLOADS_PATH}/ner_few_shot_data/" - }, - "dataset_iterator": { - "class_name": "ner_few_shot_iterator", - "target_tag": "PER" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "tags" - ], - "pipe": [ - { - "in": [ - "x" - ], - "class_name": "lazy_tokenizer", - "out": [ - "x_tokens" - ] - }, - { - "in": [ - "tags" - ], - "id": "tag_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": false, - "fit_on": [ - "tags" - ], - "save_path": "{MODELS_PATH}/ner_fs/tag.dict", - "load_path": "{MODELS_PATH}/ner_fs/tag.dict", - "out": [ - "tag_indices" - ] - }, - { - "class_name": "elmo_embedder", - "elmo_output_names": [ - "lstm_outputs1", - "lstm_outputs2", - "word_emb" - ], - "mini_batch_size": 32, - "in": [ - "x_tokens" - ], - "spec": "{DOWNLOADS_PATH}/embeddings/elmo_ru_news", - "out": [ - "tokens_emb" - ] - }, - { - "class_name": "ner_svm", - "in": "tokens_emb", - "out": "tag_indices", - "fit_on": [ - "tokens_emb", - "tag_indices" - ], - "save_path": "{MODELS_PATH}/ner_fs/model", - "load_path": "{MODELS_PATH}/ner_fs/model" - }, - { - "ref": "tag_vocab", - "in": [ - "tag_indices" - ], - "out": [ - "tags_hat" - ] - }, - { - "class_name": "ner_bio_converter", - "in": [ - "tags_hat" - ], - "out": [ - "tags_bio_hat" - ] - }, - { - "class_name": "ner_bio_converter", - "in": [ - "tags" - ], - "out": [ - "tags_bio" - ] - } - ], - "out": [ - "x_tokens", - "tags_bio_hat" - ] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "ner_f1", - "inputs": [ - "tags_bio", - "tags_bio_hat" - ] - } - ], - "validation_patience": 7, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "tensorboard_log_dir": "{MODELS_PATH}/ner_fs/logs", - "class_name": "fit_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/elmo_ru-news_wmt11-16_1.5M_steps.tar.gz", - "subdir": "{DOWNLOADS_PATH}/embeddings/elmo_ru_news" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ner/ner_kb_rus.json b/deeppavlov/configs/ner/ner_kb_rus.json deleted file mode 100644 index 1bef6b87b1..0000000000 --- a/deeppavlov/configs/ner/ner_kb_rus.json +++ /dev/null @@ -1,164 +0,0 @@ -{ - "dataset_reader": { - "class_name": "sq_reader", - "data_path": "{DOWNLOADS_PATH}/ner_sq/SQ_rus_dataset_zs.pckl" - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "class_name": "sanitizer", - "nums": true, - "out": ["x_san"] - }, - { - "in": ["x_san"], - "id": "word_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "special_tokens": [""], - "fit_on": ["x_san"], - "save_path": "{MODEL_PATH}/ner/word.dict", - "load_path": "{MODEL_PATH}/ner/word.dict", - "out": ["x_tok_ind"] - }, - { - "in": ["y"], - "id": "tag_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["y"], - "save_path": "{MODEL_PATH}/ner/tag.dict", - "load_path": "{MODEL_PATH}/ner/tag.dict", - "out": ["y_ind"] - }, - { - "in": ["x_tokens"], - "class_name": "char_splitter", - "out": ["x_char"] - }, - { - "in": ["x_char"], - "id": "char_vocab", - "class_name": "simple_vocab", - "pad_with_zeros": true, - "fit_on": ["x_char"], - "save_path": "{MODEL_PATH}/ner/char.dict", - "load_path": "{MODEL_PATH}/ner/char.dict", - "out": ["x_char_ind"] - }, - { - "in": ["x_san"], - "id": "embedder", - "class_name": "fasttext", - "pad_zero": true, - "load_path": "{DOWNLOADS_PATH}/embeddings/lenta_lower_100.bin", - "out": ["x_emb"] - }, - { - "in": ["x_tokens"], - "class_name": "mask", - "out": ["mask"] - }, - { - "class_name": "random_emb_mat", - "id": "embeddings", - "vocab_len": "#word_vocab.len", - "emb_dim": 100 - }, - { - "class_name": "random_emb_mat", - "id": "embeddings_char", - "vocab_len": "#char_vocab.len", - "emb_dim": 100 - }, - { - "in": ["x_emb", "mask", "x_char_ind"], - "in_y": ["y_ind"], - "out": ["y_predicted"], - "class_name": "ner", - "main": true, - "n_hidden_list": [128], - "net_type": "rnn", - "cell_type": "lstm", - "use_cudnn_rnn": true, - "n_tags": "#tag_vocab.len", - "token_emb_dim": "#embedder.dim", - "char_emb_dim": 100, - "save_path": "{MODEL_PATH}/ner/model", - "load_path": "{MODEL_PATH}/ner/model", - "char_emb_mat": "#embeddings_char.emb_mat", - "use_crf": true, - "use_batch_norm": true, - "embeddings_dropout": true, - "top_dropout": true, - "intra_layer_dropout": true, - "l2_reg": 0, - "learning_rate": 1e-2, - "dropout_keep_prob": 0.7 - }, - { - "ref": "tag_vocab", - "in": ["y_predicted"], - "out": ["tags"] - } - ], - - "out": ["x_tokens", "tags"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "tags"] - } - ], - "validation_patience": 7, - "val_every_n_epochs": 1, - - "log_every_n_epochs": 1, - "show_examples": false, - "tensorboard_log_dir": "{MODEL_PATH}/ner/logs" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/kbqa_mix_lowercase" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_kb_rus.tar.gz", - "subdir": "{MODEL_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/lenta_lower_100.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - }, - { - "url": "http://files.deeppavlov.ai/datasets/SQ_rus_dataset_zs.pckl", - "subdir": "{DOWNLOADS_PATH}/ner_sq" - } - ] - } -} - diff --git a/deeppavlov/configs/ner/ner_lcquad_bert_probas.json b/deeppavlov/configs/ner/ner_lcquad_bert_probas.json deleted file mode 100644 index 60c4febd57..0000000000 --- a/deeppavlov/configs/ner/ner_lcquad_bert_probas.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "dataset_reader": { - "class_name": "sq_reader", - "data_path": "{DOWNLOADS_PATH}/lcquad/entity_detection_dataset.pickle" - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "bert_ner_preprocessor", - "vocab_file": "{BERT_PATH}/vocab.txt", - "do_lower_case": false, - "max_seq_length": 512, - "max_subword_length": 15, - "token_maksing_prob": 0.0, - "in": ["x"], - "out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "pred_subword_mask"] - }, - { - "class_name": "mask", - "in": ["x_subword_tokens"], - "out": ["x_subword_mask"] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "unk_token": ["O"], - "pad_with_zeros": true, - "save_path": "{NER_PATH}/tag.dict", - "load_path": "{NER_PATH}/tag.dict", - "fit_on": ["y"], - "in": ["y"], - "out": ["y_ind"] - }, - { - "class_name": "bert_sequence_tagger", - "n_tags": "#tag_vocab.len", - "keep_prob": 0.1, - "bert_config_file": "{BERT_PATH}/bert_config.json", - "pretrained_bert": "{BERT_PATH}/bert_model.ckpt", - "attention_probs_keep_prob": 0.5, - "use_crf": false, - "return_probas": true, - "ema_decay": 0.9, - "encoder_layer_ids": [-1], - "optimizer": "tf.train:AdamOptimizer", - "learning_rate": 1e-3, - "bert_learning_rate": 2e-5, - "min_learning_rate": 1e-7, - "learning_rate_drop_patience": 30, - "learning_rate_drop_div": 1.5, - "load_before_drop": true, - "clip_norm": 1.0, - "save_path": "{NER_PATH}/model", - "load_path": "{NER_PATH}/model", - "in": ["x_subword_tok_ids", "x_subword_mask", "pred_subword_mask"], - "in_y": ["y_ind"], - "out": ["y_pred_ind"] - } - ], - "out": ["x_tokens", "y_pred_ind"] - }, - "train": { - "epochs": 30, - "batch_size": 16, - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "y_pred"] - }, - { - "name": "ner_token_f1", - "inputs": ["y", "y_pred"] - } - ], - "validation_patience": 10, - "val_every_n_batches": 400, - - "log_every_n_batches": 400, - "tensorboard_log_dir": "{NER_PATH}/logs", - "show_examples": false, - "pytest_max_batches": 2, - "pytest_batch_size": 8, - "evaluation_targets": ["valid", "test"], - "class_name": "nn_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/cased_L-12_H-768_A-12", - "NER_PATH": "{MODELS_PATH}/ner_lcquad" - }, - "labels": { - "telegram_utils": "NERCoNLL2003Model", - "server_utils": "NER" - }, - "download": [ - - { - "url": "http://files.deeppavlov.ai/kbqa/datasets/entity_detection_dataset.pickle", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/kbqa/models/ner_lcquad.tar.gz", - "subdir": "{MODELS_PATH}/ner_lcquad" - } - ] - } -} diff --git a/deeppavlov/configs/ner/ner_ontonotes_m1.json b/deeppavlov/configs/ner/ner_ontonotes_m1.json deleted file mode 100644 index 6e4e85d66e..0000000000 --- a/deeppavlov/configs/ner/ner_ontonotes_m1.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/ontonotes/", - "dataset_name": "ontonotes", - "provide_pos": false, - "provide_chunk": false, - "iobes": true - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "out": ["x_tokens"], - "class_name": "lazy_tokenizer" - }, - { - "in": ["x_tokens"], - "out": ["x_lower", "sent_lengths", "x_tokens_elmo"], - "class_name": "ner_preprocessor", - "get_x_padded_for_elmo": true - }, - { - "in": ["x_lower"], - "out": ["x_tok_ind"], - "fit_on": ["x_lower"], - "class_name": "ner_vocab", - "id": "word_vocab", - "save_path": "{MODEL_PATH}/word.dict", - "load_path": "{MODEL_PATH}/word.dict" - }, - { - "in": ["y"], - "out": ["y_ind"], - "fit_on": ["y"], - "class_name": "ner_vocab", - "id": "tag_vocab", - "save_path": "{MODEL_PATH}/tag.dict", - "load_path": "{MODEL_PATH}/tag.dict" - }, - { - "in": ["x_tokens"], - "out": ["x_char_ind"], - "fit_on": ["x_tokens"], - "class_name": "ner_vocab", - "char_level": true, - "id": "char_vocab", - "save_path": "{MODEL_PATH}/char.dict", - "load_path": "{MODEL_PATH}/char.dict" - }, - { - "in":[ - "sent_lengths", - "x_tok_ind", - "x_char_ind", - "x_tokens_elmo" - ], - "in_y": ["y_ind"], - "out": ["y_predicted"], - "class_name": "hybrid_ner_model", - "n_tags": "#tag_vocab.len", - "word_emb_path": "{DOWNLOADS_PATH}/embeddings/glove.6B.100d.txt", - "word_emb_name": "glove", - "word_dim": 100, - "word_vocab": "#word_vocab", - "char_vocab_size": "#char_vocab.len", - "char_dim": 100, - "elmo_dim": 128, - "lstm_hidden_size": 256, - "save_path": "{MODEL_PATH}/ontonotes", - "load_path": "{MODEL_PATH}/ontonotes", - "learning_rate": 1e-3, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 10, - "dropout_keep_prob": 0.7 - }, - { - "in": ["y_predicted"], - "out": ["tags"], - "class_name": "convert_ids2tags", - "id2tag": "#tag_vocab.i2t" - } - ], - "out": ["x_tokens", "tags"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "tags"] - }, - { - "name": "ner_token_f1", - "inputs": ["y", "tags"] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODEL_PATH": "{ROOT_PATH}/models/ontonotes" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_m1.tar.gz", - "subdir": "{MODEL_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/ner/ner_rus_bert.json b/deeppavlov/configs/ner/ner_rus_bert.json index 9a00116886..8a4c51ff5f 100644 --- a/deeppavlov/configs/ner/ner_rus_bert.json +++ b/deeppavlov/configs/ner/ner_rus_bert.json @@ -9,103 +9,146 @@ "class_name": "data_learning_iterator" }, "chainer": { - "in": ["x"], - "in_y": ["y"], + "in": [ + "x" + ], + "in_y": [ + "y" + ], "pipe": [ { - "class_name": "bert_ner_preprocessor", - "vocab_file": "{BERT_PATH}/vocab.txt", + "class_name": "torch_transformers_ner_preprocessor", + "vocab_file": "{TRANSFORMER}", "do_lower_case": false, "max_seq_length": 512, "max_subword_length": 15, "token_masking_prob": 0.0, - "in": ["x"], - "out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"] + "in": [ + "x" + ], + "out": [ + "x_tokens", + "x_subword_tokens", + "x_subword_tok_ids", + "startofword_markers", + "attention_mask" + ] }, { "id": "tag_vocab", "class_name": "simple_vocab", - "unk_token": ["O"], + "unk_token": [ + "O" + ], "pad_with_zeros": true, - "save_path": "{NER_PATH}/tag.dict", - "load_path": "{NER_PATH}/tag.dict", - "fit_on": ["y"], - "in": ["y"], - "out": ["y_ind"] + "save_path": "{MODEL_PATH}/tag.dict", + "load_path": "{MODEL_PATH}/tag.dict", + "fit_on": [ + "y" + ], + "in": [ + "y" + ], + "out": [ + "y_ind" + ] }, { - "class_name": "bert_sequence_tagger", + "class_name": "torch_transformers_sequence_tagger", "n_tags": "#tag_vocab.len", - "keep_prob": 0.1, - "bert_config_file": "{BERT_PATH}/bert_config.json", - "pretrained_bert": "{BERT_PATH}/bert_model.ckpt", + "pretrained_bert": "{TRANSFORMER}", "attention_probs_keep_prob": 0.5, - "use_crf": true, - "ema_decay": 0.9, "return_probas": false, - "encoder_layer_ids": [-1], - "optimizer": "tf.train:AdamOptimizer", - "learning_rate": 1e-3, - "bert_learning_rate": 2e-5, - "min_learning_rate": 1e-7, + "encoder_layer_ids": [ + -1 + ], + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 1e-06, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, + "clip_norm": 1.0, + "min_learning_rate": 1e-07, "learning_rate_drop_patience": 30, "learning_rate_drop_div": 1.5, "load_before_drop": true, - "clip_norm": null, - "save_path": "{NER_PATH}/model", - "load_path": "{NER_PATH}/model", - "in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"], - "in_y": ["y_ind"], - "out": ["y_pred_ind"] + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "in": [ + "x_subword_tok_ids", + "attention_mask", + "startofword_markers" + ], + "in_y": [ + "y_ind" + ], + "out": [ + "y_pred_ind" + ] }, { "ref": "tag_vocab", - "in": ["y_pred_ind"], - "out": ["y_pred"] + "in": [ + "y_pred_ind" + ], + "out": [ + "y_pred" + ] } ], - "out": ["x_tokens", "y_pred"] + "out": [ + "x_tokens", + "y_pred" + ] }, "train": { "epochs": 30, - "batch_size": 16, + "batch_size": 10, "metrics": [ { "name": "ner_f1", - "inputs": ["y", "y_pred"] + "inputs": [ + "y", + "y_pred" + ] }, { "name": "ner_token_f1", - "inputs": ["y", "y_pred"] + "inputs": [ + "y", + "y_pred" + ] } ], "validation_patience": 100, "val_every_n_batches": 20, - "log_every_n_batches": 20, - "tensorboard_log_dir": "{NER_PATH}/logs", "show_examples": false, "pytest_max_batches": 2, "pytest_batch_size": 8, - "evaluation_targets": ["valid", "test"], - "class_name": "nn_trainer" + "evaluation_targets": [ + "valid", + "test" + ], + "class_name": "torch_trainer" }, "metadata": { "variables": { "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", - "BERT_PATH": "{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1", - "NER_PATH": "{MODELS_PATH}/ner_rus_bert" + "TRANSFORMER": "DeepPavlov/rubert-base-cased", + "MODEL_PATH": "{MODELS_PATH}/ner_rus_bert_torch" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_rus_bert_v1.tar.gz", + "url": "http://files.deeppavlov.ai/0.16/ner/ner_rus_bert_torch.tar.gz", "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v1.tar.gz", - "subdir": "{DOWNLOADS_PATH}/bert_models" } ] } diff --git a/deeppavlov/configs/ner/ner_rus_bert_torch.json b/deeppavlov/configs/ner/ner_rus_bert_torch.json deleted file mode 100644 index 8a4c51ff5f..0000000000 --- a/deeppavlov/configs/ner/ner_rus_bert_torch.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/total_rus/", - "dataset_name": "collection_rus", - "provide_pos": false - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": [ - "x" - ], - "in_y": [ - "y" - ], - "pipe": [ - { - "class_name": "torch_transformers_ner_preprocessor", - "vocab_file": "{TRANSFORMER}", - "do_lower_case": false, - "max_seq_length": 512, - "max_subword_length": 15, - "token_masking_prob": 0.0, - "in": [ - "x" - ], - "out": [ - "x_tokens", - "x_subword_tokens", - "x_subword_tok_ids", - "startofword_markers", - "attention_mask" - ] - }, - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "unk_token": [ - "O" - ], - "pad_with_zeros": true, - "save_path": "{MODEL_PATH}/tag.dict", - "load_path": "{MODEL_PATH}/tag.dict", - "fit_on": [ - "y" - ], - "in": [ - "y" - ], - "out": [ - "y_ind" - ] - }, - { - "class_name": "torch_transformers_sequence_tagger", - "n_tags": "#tag_vocab.len", - "pretrained_bert": "{TRANSFORMER}", - "attention_probs_keep_prob": 0.5, - "return_probas": false, - "encoder_layer_ids": [ - -1 - ], - "optimizer": "AdamW", - "optimizer_parameters": { - "lr": 2e-05, - "weight_decay": 1e-06, - "betas": [ - 0.9, - 0.999 - ], - "eps": 1e-06 - }, - "clip_norm": 1.0, - "min_learning_rate": 1e-07, - "learning_rate_drop_patience": 30, - "learning_rate_drop_div": 1.5, - "load_before_drop": true, - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "in": [ - "x_subword_tok_ids", - "attention_mask", - "startofword_markers" - ], - "in_y": [ - "y_ind" - ], - "out": [ - "y_pred_ind" - ] - }, - { - "ref": "tag_vocab", - "in": [ - "y_pred_ind" - ], - "out": [ - "y_pred" - ] - } - ], - "out": [ - "x_tokens", - "y_pred" - ] - }, - "train": { - "epochs": 30, - "batch_size": 10, - "metrics": [ - { - "name": "ner_f1", - "inputs": [ - "y", - "y_pred" - ] - }, - { - "name": "ner_token_f1", - "inputs": [ - "y", - "y_pred" - ] - } - ], - "validation_patience": 100, - "val_every_n_batches": 20, - "log_every_n_batches": 20, - "show_examples": false, - "pytest_max_batches": 2, - "pytest_batch_size": 8, - "evaluation_targets": [ - "valid", - "test" - ], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "TRANSFORMER": "DeepPavlov/rubert-base-cased", - "MODEL_PATH": "{MODELS_PATH}/ner_rus_bert_torch" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/0.16/ner/ner_rus_bert_torch.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/slotfill_dstc2.json b/deeppavlov/configs/ner/slotfill_dstc2.json deleted file mode 100644 index e1df2f26fd..0000000000 --- a/deeppavlov/configs/ner/slotfill_dstc2.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dstc2_ner_iterator", - "slot_values_path": "{SLOT_VALS_PATH}" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "config_path": "{NER_CONFIG_PATH}", - "out": ["x_tokens", "tags"] - }, - - { - "in": ["x_tokens", "tags"], - "class_name": "dstc_slotfilling", - "threshold": 0.8, - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "out": ["slots"] - } - ], - "out": ["slots"] - }, - "train": { - "metrics": ["slots_accuracy"], - "class_name": "fit_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "NER_CONFIG_PATH": "{DEEPPAVLOV_PATH}/configs/ner/ner_dstc2.json", - "DATA_PATH": "{ROOT_PATH}/downloads/dstc2", - "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/slotfill_dstc2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", - "subdir": "{DATA_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/slotfill_dstc2.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/slotfill_dstc2_raw.json b/deeppavlov/configs/ner/slotfill_dstc2_raw.json deleted file mode 100644 index 9138d99c01..0000000000 --- a/deeppavlov/configs/ner/slotfill_dstc2_raw.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "dataset_reader": { - "class_name": "dstc2_reader", - "data_path": "{DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dstc2_ner_iterator", - "slot_values_path": "{SLOT_VALS_PATH}" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "class_name": "slotfill_raw", - "save_path": "{SLOT_VALS_PATH}", - "load_path": "{SLOT_VALS_PATH}", - "out": ["slots"] - } - ], - "out": ["slots"] - }, - "train": { - "metrics": ["slots_accuracy"], - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DATA_PATH": "{ROOT_PATH}/downloads/dstc2", - "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", - "subdir": "{DATA_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json b/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json deleted file mode 100644 index d6f9750e34..0000000000 --- a/deeppavlov/configs/ner/slotfill_simple_dstc2_raw.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "dataset_reader": { - "class_name": "simple_dstc2_reader", - "data_path": "{DATA_PATH}" - }, - "dataset_iterator": { - "class_name": "dstc2_ner_iterator", - "slot_values_path": "{SLOT_VALS_PATH}" - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "class_name": "slotfill_raw", - "save_path": "{SLOT_VALS_PATH}", - "load_path": "{SLOT_VALS_PATH}", - "out": ["slots"] - } - ], - "out": ["slots"] - }, - "train": { - "metrics": ["slots_accuracy"], - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DATA_PATH": "{ROOT_PATH}/downloads/simple-dstc2", - "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", - "subdir": "{DATA_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json b/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json deleted file mode 100644 index 1365ebe7f4..0000000000 --- a/deeppavlov/configs/ner/slotfill_simple_rasa_raw.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x_tokens"] - }, - { - "in": ["x_tokens"], - "class_name": "str_lower", - "out": ["x_lower"] - }, - { - "in": ["x_lower"], - "class_name": "slotfill_raw_rasa", - "save_path": "{DATA_PATH}", - "load_path": "{DATA_PATH}", - "out": ["slots"] - } - ], - "out": ["slots"] - }, - "train": { - "metrics": [], - "evaluation_targets": [] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "DATA_PATH": "{DOWNLOADS_PATH}/rasa_configs_reader" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dp_minimal_rasa_demo.tar.gz", - "subdir": "{DATA_PATH}" - } - ] - } -} diff --git a/deeppavlov/configs/ner/vlsp2016_full.json b/deeppavlov/configs/ner/vlsp2016_full.json deleted file mode 100644 index ec8d10ffbe..0000000000 --- a/deeppavlov/configs/ner/vlsp2016_full.json +++ /dev/null @@ -1,170 +0,0 @@ -{ - "dataset_reader": { - "class_name": "conll2003_reader", - "data_path": "{DOWNLOADS_PATH}/vlsp2016/", - "dataset_name": "vlsp2016", - "provide_pos": true, - "provide_chunk": true, - "iobes": true - }, - "dataset_iterator": { - "class_name": "data_learning_iterator" - }, - "chainer": { - "in": ["x", "pos", "chunk"], - "in_y": ["y"], - "pipe": [ - { - "in": ["x"], - "out": ["x_tokens"], - "class_name": "lazy_tokenizer" - }, - { - "in": ["pos"], - "out": ["pos_tokens"], - "class_name": "lazy_tokenizer" - }, - { - "in": ["chunk"], - "out": ["chunk_tokens"], - "class_name": "lazy_tokenizer" - }, - { - "in": ["x_tokens"], - "out": ["x_lower", "sent_lengths"], - "class_name": "ner_preprocessor", - "id": "ner_preprocessor", - "get_x_padded_for_elmo": false, - "get_x_cap_padded": false - }, - { - "in": ["x_lower"], - "out": ["x_tok_ind"], - "fit_on": ["x_lower"], - "class_name": "ner_vocab", - "id": "word_vocab", - "save_path": "{MODELS_PATH}/word.dict", - "load_path": "{MODELS_PATH}/word.dict" - }, - { - "in": ["pos_tokens"], - "out": ["pos_ind"], - "fit_on": ["pos_tokens"], - "class_name": "ner_vocab", - "id": "pos_vocab", - "save_path": "{MODELS_PATH}/pos.dict", - "load_path": "{MODELS_PATH}/pos.dict" - }, - { - "in": ["chunk_tokens"], - "out": ["chunk_ind"], - "fit_on": ["chunk_tokens"], - "class_name": "ner_vocab", - "id": "chunk_vocab", - "save_path": "{MODELS_PATH}/chunk.dict", - "load_path": "{MODELS_PATH}/chunk.dict" - }, - { - "in": ["y"], - "out": ["y_ind"], - "fit_on": ["y"], - "class_name": "ner_vocab", - "id": "tag_vocab", - "save_path": "{MODELS_PATH}/tag.dict", - "load_path": "{MODELS_PATH}/tag.dict" - }, - { - "in": ["x_tokens"], - "out": ["x_char"], - "class_name": "char_splitter" - }, - { - "in": ["x_tokens"], - "out": ["x_char_ind"], - "fit_on": ["x_tokens"], - "class_name": "ner_vocab", - "char_level": true, - "id": "char_vocab", - "save_path": "{MODELS_PATH}/char.dict", - "load_path": "{MODELS_PATH}/char.dict" - }, - { - "in":[ - "sent_lengths", - "x_tok_ind", - "pos_ind", - "chunk_ind", - "x_char_ind" - ], - "in_y": ["y_ind"], - "out": ["y_predicted"], - "class_name": "hybrid_ner_model", - "n_tags": "#tag_vocab.len", - "word_emb_path": "{DOWNLOADS_PATH}/embeddings/baomoi.bin", - "word_emb_name": "baomoi", - "word_dim": 300, - "word_vocab": "#word_vocab", - "char_vocab_size": "#char_vocab.len", - "pos_vocab_size": "#pos_vocab.len", - "chunk_vocab_size": "#chunk_vocab.len", - "pos_dim": 40, - "chunk_dim": 40, - "char_dim": 100, - "lstm_hidden_size": 256, - "save_path": "{MODELS_PATH}/vlsp2016_full", - "load_path": "{MODELS_PATH}/vlsp2016_full", - "learning_rate": 1e-3, - "learning_rate_drop_patience": 5, - "learning_rate_drop_div": 10, - "dropout_keep_prob": 0.7 - }, - { - "in": ["y_predicted"], - "out": ["tags"], - "class_name": "convert_ids2tags", - "id2tag": "#tag_vocab.i2t" - } - ], - "out": ["x_tokens", "tags"] - }, - "train": { - "epochs": 100, - "batch_size": 64, - "metrics": [ - { - "name": "ner_f1", - "inputs": ["y", "tags"] - }, - { - "name": "ner_token_f1", - "inputs": ["y", "tags"] - } - ], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_epochs": 1, - "show_examples": false, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models/vlsp2016_full" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ner_vlsp2016_full.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/baomoi.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} diff --git a/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json b/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json deleted file mode 100644 index 7b011f13d4..0000000000 --- a/deeppavlov/configs/odqa/en_odqa_infer_enwiki20161221.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "chainer": { - "in": [ - "question_raw" - ], - "out": [ - "best_answer" - ], - "pipe": [ - { - "config_path": "{CONFIGS_PATH}/doc_retrieval/en_ranker_tfidf_enwiki20161221.json", - "in": [ - "question_raw" - ], - "out": [ - "tfidf_doc_ids" - ] - }, - { - "class_name": "wiki_sqlite_vocab", - "in": [ - "tfidf_doc_ids" - ], - "out": [ - "tfidf_doc_text" - ], - "join_docs": false, - "shuffle": false, - "load_path": "{DOWNLOADS_PATH}/odqa/enwiki20161221.db" - }, - { - "class_name": "document_chunker", - "in": ["tfidf_doc_text"], - "out": ["chunks"], - "flatten_result": true, - "paragraphs": true - }, - { - "class_name": "string_multiplier", - "in": ["question_raw", "chunks"], - "out":["questions"] - }, - { - "class_name": "logit_ranker", - "batch_size": 10, - "squad_model": {"config_path": "{CONFIGS_PATH}/squad/multi_squad_noans_infer.json"}, - "sort_noans": true, - "in": [ - "chunks", - "questions" - ], - "out": [ - "best_answer", - "best_answer_score" - ] - } - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" - }, - "download": [ - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json deleted file mode 100644 index d9cb4c5ec6..0000000000 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "dataset_reader": { - "class_name": "paraphraser_reader", - "data_path": "{DOWNLOADS_PATH}/paraphraser_data" - }, - "dataset_iterator": { - "class_name": "siamese_iterator", - "seed": 243 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "preproc", - "class_name": "siamese_preprocessor", - "use_matrix": false, - "max_sequence_length": 28, - "fit_on": ["x"], - "in": ["x"], - "out": ["x_proc"], - "sent_vocab": { - "id": "siam_sent_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/paraphraser_vocabs/sent.dict", - "load_path": "{MODELS_PATH}/paraphraser_vocabs/sent.dict" - }, - "tokenizer": { - "class_name": "nltk_tokenizer" - }, - "vocab": { - "id": "siam_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/paraphraser_vocabs/tok.dict", - "load_path": "{MODELS_PATH}/paraphraser_vocabs/tok.dict" - }, - "embedder": { - "id": "siam_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_lower_case.bin" - } - }, - { - "id": "embeddings", - "class_name": "emb_mat_assembler", - "embedder": "#siam_embedder", - "vocab": "#siam_vocab" - }, - { - "in": ["x_proc"], - "in_y": ["y"], - "out": ["y_predicted"], - "class_name": "mpm_nn", - "len_vocab": "#siam_vocab.len", - "use_matrix": "#preproc.use_matrix", - "attention": true, - "max_sequence_length": "#preproc.max_sequence_length", - "emb_matrix": "#embeddings.emb_mat", - "embedding_dim": "#siam_embedder.dim", - "seed": 243, - "hidden_dim": 200, - "learning_rate": 1e-3, - "triplet_loss": false, - "batch_size": 256, - "save_path": "{MODELS_PATH}/paraphraser_model/model_weights.h5", - "load_path": "{MODELS_PATH}/paraphraser_model/model_weights.h5", - "preprocess": "#preproc.__call__" - } - ], - "out": ["y_predicted"] - }, - "train": { - "epochs": 200, - "batch_size": 256, - "pytest_max_batches": 2, - "train_metrics": ["f1", "acc", "log_loss"], - "metrics": ["f1", "acc", "log_loss"], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_batches": 24, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", - "subdir": "{DOWNLOADS_PATH}/paraphraser_data" - }, - { - "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", - "subdir": "{DOWNLOADS_PATH}/paraphraser_data" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_lower_case/ft_native_300_ru_wiki_lenta_lower_case.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json b/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json deleted file mode 100644 index 4c6e3fa28b..0000000000 --- a/deeppavlov/configs/ranking/paraphrase_ident_paraphraser_interact.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "dataset_reader": { - "class_name": "paraphraser_reader", - "data_path": "{DOWNLOADS_PATH}/paraphraser_data" - }, - "dataset_iterator": { - "class_name": "siamese_iterator", - "seed": 243 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "preproc", - "class_name": "siamese_preprocessor", - "use_matrix": false, - "max_sequence_length": 28, - "fit_on": ["x"], - "in": ["x"], - "out": ["x_proc"], - "sent_vocab": { - "id": "siam_sent_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/paraphraser_vocabs/sent.dict", - "load_path": "{MODELS_PATH}/paraphraser_vocabs/sent.dict" - }, - "tokenizer": { - "class_name": "nltk_tokenizer" - }, - "vocab": { - "id": "siam_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/paraphraser_vocabs/tok.dict", - "load_path": "{MODELS_PATH}/paraphraser_vocabs/tok.dict" - }, - "embedder": { - "id": "siam_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_lower_case.bin" - } - }, - { - "id": "embeddings", - "class_name": "emb_mat_assembler", - "embedder": "#siam_embedder", - "vocab": "#siam_vocab" - }, - { - "id": "model", - "class_name": "mpm_nn", - "len_vocab": "#siam_vocab.len", - "use_matrix": "#preproc.use_matrix", - "attention": true, - "max_sequence_length": "#preproc.max_sequence_length", - "emb_matrix": "#embeddings.emb_mat", - "embedding_dim": "#siam_embedder.dim", - "seed": 243, - "hidden_dim": 200, - "learning_rate": 1e-3, - "triplet_loss": false, - "batch_size": 256, - "save_path": "{MODELS_PATH}/paraphraser_model/model_weights.h5", - "load_path": "{MODELS_PATH}/paraphraser_model/model_weights.h5", - "preprocess": "#preproc.__call__" - }, - { - "in": ["x_proc"], - "in_y": ["y"], - "out": ["y_predicted"], - "class_name": "siamese_predictor", - "model": "#model", - "ranking": false, - "attention": true, - "batch_size": "#model.batch_size", - "preproc_func": "#preproc.__call__" - } - ], - "out": ["y_predicted"] - }, - "train": { - "epochs": 200, - "batch_size": 256, - "pytest_max_batches": 2, - "train_metrics": ["f1", "acc", "log_loss"], - "metrics": ["f1", "acc", "log_loss"], - "validation_patience": 10, - "val_every_n_epochs": 5, - "log_every_n_batches": 12, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/paraphrase_ident_paraphraser.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/datasets/paraphraser.zip", - "subdir": "{DOWNLOADS_PATH}/paraphraser_data" - }, - { - "url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip", - "subdir": "{DOWNLOADS_PATH}/paraphraser_data" - }, - { - "url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_lower_case/ft_native_300_ru_wiki_lenta_lower_case.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json deleted file mode 100644 index 499bd3d3dc..0000000000 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "dataset_reader": { - "class_name": "ubuntu_v2_mt_reader", - "data_path": "{DOWNLOADS_PATH}/ubuntu_v2_data", - "num_context_turns": "{NUM_CONTEXT_TURNS}" - }, - "dataset_iterator": { - "class_name": "siamese_iterator", - "seed": 243 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "preproc", - "class_name": "siamese_preprocessor", - "use_matrix": true, - "num_ranking_samples": 10, - "num_context_turns": "{NUM_CONTEXT_TURNS}", - "max_sequence_length": 50, - "fit_on": ["x"], - "in": ["x"], - "out": ["x_proc"], - "sent_vocab": { - "id": "siam_sent_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/ubuntu_v2_vocabs/sent.dict", - "load_path": "{MODELS_PATH}/ubuntu_v2_vocabs/sent.dict" - }, - "tokenizer": { - "class_name": "nltk_tokenizer" - }, - "vocab": { - "id": "siam_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/ubuntu_v2_mt_vocabs/tok.dict", - "load_path": "{MODELS_PATH}/ubuntu_v2_mt_vocabs/tok.dict" - }, - "embedder": { - "id": "siam_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - } - }, - { - "id": "embeddings", - "class_name": "emb_mat_assembler", - "embedder": "#siam_embedder", - "vocab": "#siam_vocab" - }, - { - "in": ["x_proc"], - "in_y": ["y"], - "out": ["y_predicted"], - "class_name": "bilstm_gru_nn", - "use_matrix": "#preproc.use_matrix", - "num_context_turns": "{NUM_CONTEXT_TURNS}", - "len_vocab": "#siam_vocab.len", - "max_sequence_length": "#preproc.max_sequence_length", - "embedding_dim": "#siam_embedder.dim", - "emb_matrix": "#embeddings.emb_mat", - "seed": 243, - "hidden_dim": 300, - "learning_rate": 1e-3, - "triplet_loss": false, - "batch_size": 256, - "save_path": "{MODELS_PATH}/ubuntu_v2_mt_model/model_weights.h5", - "load_path": "{MODELS_PATH}/ubuntu_v2_mt_model/model_weights.h5" - } - ], - "out": ["y_predicted"] - }, - "train": { - "epochs": 200, - "batch_size": 256, - "pytest_max_batches": 2, - "train_metrics": [], - "metrics": ["r@1", "rank_response"], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_batches": 1000, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "NUM_CONTEXT_TURNS": 10 - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}/ubuntu_v2_data" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json b/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json deleted file mode 100644 index 3ece2399dd..0000000000 --- a/deeppavlov/configs/ranking/ranking_ubuntu_v2_mt_interact.json +++ /dev/null @@ -1,121 +0,0 @@ -{ - "dataset_reader": { - "class_name": "ubuntu_v2_mt_reader", - "data_path": "{DOWNLOADS_PATH}/ubuntu_v2_data", - "num_context_turns": "{NUM_CONTEXT_TURNS}" - }, - "dataset_iterator": { - "class_name": "siamese_iterator", - "seed": 243 - }, - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "id": "preproc", - "class_name": "siamese_preprocessor", - "use_matrix": true, - "num_ranking_samples": 10, - "num_context_turns": "{NUM_CONTEXT_TURNS}", - "max_sequence_length": 50, - "fit_on": ["x"], - "in": ["x"], - "out": ["x_proc"], - "sent_vocab": { - "id": "siam_sent_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/ubuntu_v2_vocabs/sent.dict", - "load_path": "{MODELS_PATH}/ubuntu_v2_vocabs/sent.dict" - }, - "tokenizer": { - "class_name": "nltk_tokenizer" - }, - "vocab": { - "id": "siam_vocab", - "class_name": "simple_vocab", - "save_path": "{MODELS_PATH}/ubuntu_v2_mt_vocabs/tok.dict", - "load_path": "{MODELS_PATH}/ubuntu_v2_mt_vocabs/tok.dict" - }, - "embedder": { - "id": "siam_embedder", - "class_name": "fasttext", - "load_path": "{DOWNLOADS_PATH}/embeddings/wiki.en.bin" - } - }, - { - "id": "embeddings", - "class_name": "emb_mat_assembler", - "embedder": "#siam_embedder", - "vocab": "#siam_vocab" - }, - { - "id": "model", - "class_name": "bilstm_gru_nn", - "use_matrix": "#preproc.use_matrix", - "num_context_turns": "{NUM_CONTEXT_TURNS}", - "len_vocab": "#siam_vocab.len", - "max_sequence_length": "#preproc.max_sequence_length", - "embedding_dim": "#siam_embedder.dim", - "emb_matrix": "#embeddings.emb_mat", - "seed": 243, - "hidden_dim": 300, - "learning_rate": 1e-3, - "triplet_loss": false, - "batch_size": 256, - "save_path": "{MODELS_PATH}/ubuntu_v2_mt_model/model_weights.h5", - "load_path": "{MODELS_PATH}/ubuntu_v2_mt_model/model_weights.h5" - }, - { - "in": ["x_proc"], - "in_y": ["y"], - "out": ["y_predicted"], - "class_name": "siamese_predictor", - "model": "#model", - "num_context_turns": "{NUM_CONTEXT_TURNS}", - "batch_size": "#model.batch_size", - "responses": "#siam_sent_vocab", - "preproc_func": "#preproc.__call__" - } - ], - "out": ["y_predicted"] - }, - "train": { - "epochs": 200, - "batch_size": 256, - "pytest_max_batches": 2, - "train_metrics": [], - "metrics": ["r@1", "rank_response"], - "validation_patience": 10, - "val_every_n_epochs": 1, - "log_every_n_batches": 1000, - "class_name": "nn_trainer", - "evaluation_targets": [ - "valid", - "test" - ] - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "NUM_CONTEXT_TURNS": 10 - - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/ubuntu_v2_mt_ranking.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/datasets/ubuntu_v2_data.tar.gz", - "subdir": "{DOWNLOADS_PATH}/ubuntu_v2_data" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin", - "subdir": "{DOWNLOADS_PATH}/embeddings" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json deleted file mode 100644 index d24b70d8e4..0000000000 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "dataset_reader": { - "class_name": "typos_kartaslov_reader", - "data_path": "{DOWNLOADS_PATH}" - }, - "dataset_iterator": { - "class_name": "typos_iterator", - "test_ratio": 0.02 - }, - "chainer":{ - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "str_lower", - "id": "lower", - "in": ["x"], - "out": ["x_lower"] - }, - { - "class_name": "nltk_moses_tokenizer", - "id": "tokenizer", - "in": ["x_lower"], - "out": ["x_tokens"] - }, - { - "ref": "tokenizer", - "in": ["y"], - "out": ["y_tokens"] - }, - { - "fit_on": ["x_tokens", "y_tokens"], - "in": ["x_tokens"], - "out": ["tokens_candidates"], - "class_name": "spelling_error_model", - "window": 1, - "candidates_count": 4, - "dictionary": { - "class_name": "russian_words_vocab", - "data_dir": "{DOWNLOADS_PATH}/vocabs" - }, - "save_path": "{MODELS_PATH}/error_model/error_model_ru.tsv", - "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" - }, - { - "class_name": "kenlm_elector", - "in": ["tokens_candidates"], - "out": ["y_predicted_tokens"], - "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary" - }, - { - "ref": "tokenizer", - "in": ["y_predicted_tokens"], - "out": ["y_predicted"] - } - ], - "out": ["y_predicted"] - }, - "train": { - "evaluation_targets": [ - "test" - ], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/lang_models/ru_wiyalen_no_punkt.arpa.binary.gz", - "subdir": "{DOWNLOADS_PATH}/language_models" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json deleted file mode 100644 index 46694d2205..0000000000 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_custom_vocab.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "dataset_reader": { - "class_name": "typos_kartaslov_reader", - "data_path": "{DOWNLOADS_PATH}" - }, - "dataset_iterator": { - "class_name": "typos_iterator", - "test_ratio": 0.02 - }, - "chainer":{ - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "str_lower", - "id": "lower", - "in": ["x"], - "out": ["x_lower"] - }, - { - "class_name": "nltk_moses_tokenizer", - "id": "tokenizer", - "in": ["x_lower"], - "out": ["x_tokens"] - }, - { - "ref": "tokenizer", - "in": ["y"], - "out": ["y_tokens"] - }, - { - "fit_on": ["x_tokens", "y_tokens"], - "in": ["x_tokens"], - "out": ["tokens_candidates"], - "class_name": "spelling_error_model", - "window": 1, - "candidates_count": 4, - "dictionary": { - "class_name": "static_dictionary", - "dictionary_name": "compreno_words", - "data_dir": "{DOWNLOADS_PATH}/vocabs", - "raw_dictionary_path": "./compreno_wordforms.txt" - }, - "save_path": "{MODELS_PATH}/error_model/error_model_ru.tsv", - "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" - }, - { - "class_name": "kenlm_elector", - "in": ["tokens_candidates"], - "out": ["y_predicted_tokens"], - "load_path": "{DOWNLOADS_PATH}/language_models/ru_wiyalen_no_punkt.arpa.binary" - }, - { - "ref": "tokenizer", - "in": ["y_predicted_tokens"], - "out": ["y_predicted"] - } - ], - "out": ["y_predicted"] - }, - "train": { - "evaluation_targets": [ - "test" - ], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", - "subdir": "{MODELS_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/lang_models/ru_wiyalen_no_punkt.arpa.binary.gz", - "subdir": "{DOWNLOADS_PATH}/language_models" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json b/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json deleted file mode 100644 index 6aa7de9c85..0000000000 --- a/deeppavlov/configs/spelling_correction/brillmoore_kartaslov_ru_nolm.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "dataset_reader": { - "class_name": "typos_kartaslov_reader", - "data_path": "{DOWNLOADS_PATH}" - }, - "dataset_iterator": { - "class_name": "typos_iterator", - "test_ratio": 0.02 - }, - "chainer":{ - "in": ["x"], - "in_y": ["y"], - "pipe": [ - { - "class_name": "str_lower", - "id": "lower", - "in": ["x"], - "out": ["x_lower"] - }, - { - "class_name": "nltk_moses_tokenizer", - "id": "tokenizer", - "in": ["x_lower"], - "out": ["x_tokens"] - }, - { - "ref": "tokenizer", - "in": ["y"], - "out": ["y_tokens"] - }, - { - "fit_on": ["x_tokens", "y_tokens"], - "in": ["x_tokens"], - "out": ["tokens_candidates"], - "class_name": "spelling_error_model", - "window": 1, - "candidates_count": 1, - "dictionary": { - "class_name": "russian_words_vocab", - "data_dir": "{DOWNLOADS_PATH}/vocabs" - }, - "save_path": "{MODELS_PATH}/error_model/error_model_ru.tsv", - "load_path": "{MODELS_PATH}/error_model/error_model_ru.tsv" - }, - { - "class_name": "top1_elector", - "in": ["tokens_candidates"], - "out": ["y_predicted_tokens"] - }, - { - "ref": "tokenizer", - "in": ["y_predicted_tokens"], - "out": ["y_predicted"] - } - ], - "out": ["y_predicted"] - }, - "train": { - "evaluation_targets": [ - "test" - ], - "class_name": "fit_trainer" - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/error_model.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} \ No newline at end of file diff --git a/deeppavlov/configs/squad/squad_bert_uncased.json b/deeppavlov/configs/squad/squad_bert_uncased.json deleted file mode 100644 index 5542458965..0000000000 --- a/deeppavlov/configs/squad/squad_bert_uncased.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "dataset_reader": { - "class_name": "squad_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/squad/" - }, - "dataset_iterator": { - "class_name": "squad_iterator", - "seed": 1337, - "shuffle": true - }, - "chainer": { - "in": ["context_raw", "question_raw"], - "in_y": ["ans_raw", "ans_raw_start"], - "pipe": [ - { - "class_name": "bert_preprocessor", - "id": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/uncased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": "{lowercase}", - "max_seq_length": 384, - "in": ["question_raw", "context_raw"], - "out": ["bert_features"] - }, - { - "class_name": "squad_bert_mapping", - "do_lower_case": "{lowercase}", - "in": ["context_raw", "bert_features"], - "out": ["subtok2chars", "char2subtoks"] - }, - { - "class_name": "squad_bert_ans_preprocessor", - "do_lower_case": "{lowercase}", - "in": ["ans_raw", "ans_raw_start","char2subtoks"], - "out": ["ans", "ans_start", "ans_end"] - }, - { - "class_name": "squad_bert_model", - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/uncased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/uncased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODELS_PATH}/squad_bert/uncased_model", - "load_path": "{MODELS_PATH}/squad_bert/uncased_model", - "keep_prob": 0.5, - "learning_rate": 2e-05, - "learning_rate_drop_patience": 2, - "learning_rate_drop_div": 2.0, - "in": ["bert_features"], - "in_y": ["ans_start", "ans_end"], - "out": ["ans_start_predicted", "ans_end_predicted", "logits"] - }, - { - "class_name": "squad_bert_ans_postprocessor", - "in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"], - "out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"] - } - ], - "out": ["ans_predicted", "ans_start_predicted", "logits"] - }, - "train": { - "show_examples": false, - "test_best": false, - "validate_best": true, - "log_every_n_batches": 250, - "val_every_n_batches": 500, - "batch_size": 10, - "pytest_max_batches": 2, - "pytest_batch_size": 5, - "validation_patience": 10, - "metrics": [ - { - "name": "squad_v1_f1", - "inputs": ["ans", "ans_predicted"] - }, - { - "name": "squad_v1_em", - "inputs": ["ans", "ans_predicted"] - }, - { - "name": "squad_v2_f1", - "inputs": ["ans", "ans_predicted"] - }, - { - "name": "squad_v2_em", - "inputs": ["ans", "ans_predicted"] - } - ], - "tensorboard_log_dir": "{MODELS_PATH}/squad_bert/uncased_logs" - }, - "metadata": { - "variables": { - "lowercase": true, - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/uncased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - } - ] - } -} - diff --git a/deeppavlov/configs/squad/squad_ru_bert.json b/deeppavlov/configs/squad/squad_ru_bert.json index 7b105b47ef..029777626e 100644 --- a/deeppavlov/configs/squad/squad_ru_bert.json +++ b/deeppavlov/configs/squad/squad_ru_bert.json @@ -11,98 +11,165 @@ "shuffle": true }, "chainer": { - "in": ["context_raw", "question_raw"], - "in_y": ["ans_raw", "ans_raw_start"], + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], "pipe": [ { - "class_name": "bert_preprocessor", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": "{lowercase}", + "class_name": "torch_squad_transformers_preprocessor", + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{LOWERCASE}", "max_seq_length": 384, - "in": ["question_raw", "context_raw"], - "out": ["bert_features"] + "return_tokens": true, + "in": [ + "question_raw", + "context_raw" + ], + "out": [ + "bert_features", + "subtokens" + ] }, { "class_name": "squad_bert_mapping", - "do_lower_case": "{lowercase}", - "in": ["context_raw", "bert_features"], - "out": ["subtok2chars", "char2subtoks"] + "do_lower_case": "{LOWERCASE}", + "in": [ + "context_raw", + "bert_features", + "subtokens" + ], + "out": [ + "subtok2chars", + "char2subtoks" + ] }, { "class_name": "squad_bert_ans_preprocessor", - "do_lower_case": "{lowercase}", - "in": ["ans_raw", "ans_raw_start","char2subtoks"], - "out": ["ans", "ans_start", "ans_end"] + "do_lower_case": "{LOWERCASE}", + "in": [ + "ans_raw", + "ans_raw_start", + "char2subtoks" + ], + "out": [ + "ans", + "ans_start", + "ans_end" + ] }, { - "class_name": "squad_bert_model", - "bert_config_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_config.json", - "pretrained_bert": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/bert_model.ckpt", - "save_path": "{MODELS_PATH}/squad_ru_bert/model_multi", - "load_path": "{MODELS_PATH}/squad_ru_bert/model_multi", - "keep_prob": 0.5, - "learning_rate": 2e-05, + "class_name": "torch_transformers_squad", + "pretrained_bert": "{TRANSFORMER}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 0.01, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-06 + }, "learning_rate_drop_patience": 3, "learning_rate_drop_div": 2.0, - "in": ["bert_features"], - "in_y": ["ans_start", "ans_end"], - "out": ["ans_start_predicted", "ans_end_predicted", "logits", "score"] + "in": [ + "bert_features" + ], + "in_y": [ + "ans_start", + "ans_end" + ], + "out": [ + "ans_start_predicted", + "ans_end_predicted", + "logits" + ] }, { "class_name": "squad_bert_ans_postprocessor", - "in": ["ans_start_predicted", "ans_end_predicted", "context_raw", "bert_features", "subtok2chars"], - "out": ["ans_predicted", "ans_start_predicted", "ans_end_predicted"] + "in": [ + "ans_start_predicted", + "ans_end_predicted", + "context_raw", + "bert_features", + "subtok2chars", + "subtokens" + ], + "out": [ + "ans_predicted", + "ans_start_predicted", + "ans_end_predicted" + ] } ], - "out": ["ans_predicted", "ans_start_predicted", "logits"] + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] }, "train": { "show_examples": false, - "test_best": false, - "validate_best": true, + "evaluation_targets": [ + "valid" + ], "log_every_n_batches": 250, "val_every_n_batches": 500, "batch_size": 10, - "pytest_max_batches": 2, - "pytest_batch_size": 5, "validation_patience": 10, "metrics": [ { - "name": "squad_v2_f1", - "inputs": ["ans", "ans_predicted"] + "name": "squad_v1_f1", + "inputs": [ + "ans", + "ans_predicted" + ] }, { - "name": "squad_v2_em", - "inputs": ["ans", "ans_predicted"] + "name": "squad_v1_em", + "inputs": [ + "ans", + "ans_predicted" + ] }, { - "name": "squad_v1_f1", - "inputs": ["ans", "ans_predicted"] + "name": "squad_v2_f1", + "inputs": [ + "ans", + "ans_predicted" + ] }, { - "name": "squad_v1_em", - "inputs": ["ans", "ans_predicted"] + "name": "squad_v2_em", + "inputs": [ + "ans", + "ans_predicted" + ] } ], - "tensorboard_log_dir": "{MODELS_PATH}/squad_ru_bert/logs" + "class_name": "torch_trainer" }, "metadata": { "variables": { - "lowercase": false, + "LOWERCASE": false, + "TRANSFORMER": "DeepPavlov/rubert-base-cased", "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models" + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_torch_bert/{TRANSFORMER}" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_bert.tar.gz", + "url": "http://files.deeppavlov.ai/v1/squad/squad_ru_torch_bert.tar.gz", "subdir": "{MODELS_PATH}" } - ] + ] } } - diff --git a/deeppavlov/configs/squad/squad_ru_bert_infer.json b/deeppavlov/configs/squad/squad_ru_bert_infer.json index 83cc2cdd68..1dcb8e5d2e 100644 --- a/deeppavlov/configs/squad/squad_ru_bert_infer.json +++ b/deeppavlov/configs/squad/squad_ru_bert_infer.json @@ -11,68 +11,87 @@ "shuffle": true }, "chainer": { - "in": ["context_raw", "question_raw"], - "in_y": ["ans_raw", "ans_raw_start"], + "in": [ + "context_raw", + "question_raw" + ], + "in_y": [ + "ans_raw", + "ans_raw_start" + ], "pipe": [ { - "class_name": "squad_bert_infer", - "lang": "ru", + "class_name": "torch_transformers_squad_infer", "batch_size": 10, "squad_model_config": "{CONFIGS_PATH}/squad/squad_ru_bert.json", - "vocab_file": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12/vocab.txt", - "do_lower_case": false, - "max_seq_length": 512, + "vocab_file": "{TRANSFORMER}", + "do_lower_case": "{LOWERCASE}", + "max_seq_length": 384, "in": ["context_raw", "question_raw"], - "out": ["ans_predicted", "ans_start_predicted", "logits", "score"] + "out": ["ans_predicted", "ans_start_predicted", "logits"] } ], - "out": ["ans_predicted", "ans_start_predicted", "logits"] + "out": [ + "ans_predicted", + "ans_start_predicted", + "logits" + ] }, "train": { "show_examples": false, - "test_best": false, - "validate_best": true, + "evaluation_targets": [ + "valid" + ], "log_every_n_batches": 250, "val_every_n_batches": 500, "batch_size": 10, - "pytest_max_batches": 2, "validation_patience": 10, "metrics": [ { "name": "squad_v1_f1", - "inputs": ["ans_raw", "ans_predicted"] + "inputs": [ + "ans_raw", + "ans_predicted" + ] }, { "name": "squad_v1_em", - "inputs": ["ans_raw", "ans_predicted"] + "inputs": [ + "ans_raw", + "ans_predicted" + ] }, { "name": "squad_v2_f1", - "inputs": ["ans_raw", "ans_predicted"] + "inputs": [ + "ans_raw", + "ans_predicted" + ] }, { "name": "squad_v2_em", - "inputs": ["ans_raw", "ans_predicted"] + "inputs": [ + "ans_raw", + "ans_predicted" + ] } ] }, "metadata": { "variables": { + "LOWERCASE": false, + "TRANSFORMER": "DeepPavlov/rubert-base-cased", "ROOT_PATH": "~/.deeppavlov", "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/squad_ru_torch_bert/{TRANSFORMER}", "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs" }, "download": [ { - "url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip", - "subdir": "{DOWNLOADS_PATH}/bert_models" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/squad_ru_bert.tar.gz", + "url": "http://files.deeppavlov.ai/v1/squad/squad_ru_torch_bert.tar.gz", "subdir": "{MODELS_PATH}" } - ] + ] } } - diff --git a/deeppavlov/configs/squad/squad_ru_torch_bert.json b/deeppavlov/configs/squad/squad_ru_torch_bert.json deleted file mode 100644 index 029777626e..0000000000 --- a/deeppavlov/configs/squad/squad_ru_torch_bert.json +++ /dev/null @@ -1,175 +0,0 @@ -{ - "dataset_reader": { - "class_name": "squad_dataset_reader", - "dataset": "SberSQuADClean", - "url": "http://files.deeppavlov.ai/datasets/sber_squad_clean-v1.1.tar.gz", - "data_path": "{DOWNLOADS_PATH}/squad_ru_clean/" - }, - "dataset_iterator": { - "class_name": "squad_iterator", - "seed": 1337, - "shuffle": true - }, - "chainer": { - "in": [ - "context_raw", - "question_raw" - ], - "in_y": [ - "ans_raw", - "ans_raw_start" - ], - "pipe": [ - { - "class_name": "torch_squad_transformers_preprocessor", - "vocab_file": "{TRANSFORMER}", - "do_lower_case": "{LOWERCASE}", - "max_seq_length": 384, - "return_tokens": true, - "in": [ - "question_raw", - "context_raw" - ], - "out": [ - "bert_features", - "subtokens" - ] - }, - { - "class_name": "squad_bert_mapping", - "do_lower_case": "{LOWERCASE}", - "in": [ - "context_raw", - "bert_features", - "subtokens" - ], - "out": [ - "subtok2chars", - "char2subtoks" - ] - }, - { - "class_name": "squad_bert_ans_preprocessor", - "do_lower_case": "{LOWERCASE}", - "in": [ - "ans_raw", - "ans_raw_start", - "char2subtoks" - ], - "out": [ - "ans", - "ans_start", - "ans_end" - ] - }, - { - "class_name": "torch_transformers_squad", - "pretrained_bert": "{TRANSFORMER}", - "save_path": "{MODEL_PATH}/model", - "load_path": "{MODEL_PATH}/model", - "optimizer": "AdamW", - "optimizer_parameters": { - "lr": 2e-05, - "weight_decay": 0.01, - "betas": [ - 0.9, - 0.999 - ], - "eps": 1e-06 - }, - "learning_rate_drop_patience": 3, - "learning_rate_drop_div": 2.0, - "in": [ - "bert_features" - ], - "in_y": [ - "ans_start", - "ans_end" - ], - "out": [ - "ans_start_predicted", - "ans_end_predicted", - "logits" - ] - }, - { - "class_name": "squad_bert_ans_postprocessor", - "in": [ - "ans_start_predicted", - "ans_end_predicted", - "context_raw", - "bert_features", - "subtok2chars", - "subtokens" - ], - "out": [ - "ans_predicted", - "ans_start_predicted", - "ans_end_predicted" - ] - } - ], - "out": [ - "ans_predicted", - "ans_start_predicted", - "logits" - ] - }, - "train": { - "show_examples": false, - "evaluation_targets": [ - "valid" - ], - "log_every_n_batches": 250, - "val_every_n_batches": 500, - "batch_size": 10, - "validation_patience": 10, - "metrics": [ - { - "name": "squad_v1_f1", - "inputs": [ - "ans", - "ans_predicted" - ] - }, - { - "name": "squad_v1_em", - "inputs": [ - "ans", - "ans_predicted" - ] - }, - { - "name": "squad_v2_f1", - "inputs": [ - "ans", - "ans_predicted" - ] - }, - { - "name": "squad_v2_em", - "inputs": [ - "ans", - "ans_predicted" - ] - } - ], - "class_name": "torch_trainer" - }, - "metadata": { - "variables": { - "LOWERCASE": false, - "TRANSFORMER": "DeepPavlov/rubert-base-cased", - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/squad_ru_torch_bert/{TRANSFORMER}" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/v1/squad/squad_ru_torch_bert.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } -} diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json index 2ed9d8e217..a870e00a75 100644 --- a/deeppavlov/core/common/registry.json +++ b/deeppavlov/core/common/registry.json @@ -17,7 +17,6 @@ "bert_sequence_network": "deeppavlov.models.bert.bert_sequence_tagger:BertSequenceNetwork", "bert_sequence_tagger": "deeppavlov.models.bert.bert_sequence_tagger:BertSequenceTagger", "bert_syntax_parser": "deeppavlov.models.syntax_parser.network:BertSyntaxParser", - "bilstm_gru_nn": "deeppavlov.models.ranking.bilstm_gru_siamese_network:BiLSTMGRUSiameseNetwork", "bilstm_nn": "deeppavlov.models.ranking.bilstm_siamese_network:BiLSTMSiameseNetwork", "boolqa_reader": "deeppavlov.dataset_readers.boolqa_reader:BoolqaReader", "bow": "deeppavlov.models.embedders.bow_embedder:BoWEmbedder", @@ -79,10 +78,8 @@ "logit_ranker": "deeppavlov.models.doc_retrieval.logit_ranker:LogitRanker", "mask": "deeppavlov.models.preprocessors.mask:Mask", "md_yaml_dialogs_reader": "deeppavlov.dataset_readers.md_yaml_dialogs_reader:MD_YAML_DialogsDatasetReader", - "morpho_tagger": "deeppavlov.models.morpho_tagger.morpho_tagger:MorphoTagger", "morphotagger_dataset": "deeppavlov.dataset_iterators.morphotagger_iterator:MorphoTaggerDatasetIterator", "morphotagger_dataset_reader": "deeppavlov.dataset_readers.morphotagging_dataset_reader:MorphotaggerDatasetReader", - "mpm_nn": "deeppavlov.models.ranking.mpm_siamese_network:MPMSiameseNetwork", "mt_bert": "deeppavlov.models.multitask_bert.multitask_bert:MultiTaskBert", "mt_bert_classification_task": "deeppavlov.models.multitask_bert.multitask_bert:MTBertClassificationTask", "mt_bert_reuser": "deeppavlov.models.multitask_bert.multitask_bert:MTBertReUser", @@ -97,7 +94,6 @@ "ner": "deeppavlov.models.ner.network:NerNetwork", "ner_bio_converter": "deeppavlov.models.ner.bio:BIOMarkupRestorer", "ner_chunker": "deeppavlov.models.kbqa.entity_linking:NerChunker", - "ner_few_shot_iterator": "deeppavlov.dataset_iterators.ner_few_shot_iterator:NERFewShotIterator", "ner_preprocessor": "deeppavlov.models.preprocessors.ner_preprocessor:NerPreprocessor", "ner_svm": "deeppavlov.models.ner.svm:SVMTagger", "ner_vocab": "deeppavlov.models.preprocessors.ner_preprocessor:NerVocab", @@ -124,7 +120,6 @@ "re_classifier": "deeppavlov.models.relation_extraction.relation_extraction_bert:REBertModel", "response_base_loader": "deeppavlov.models.preprocessors.response_base_loader:ResponseBaseLoader", "ru_adj_to_noun": "deeppavlov.models.kbqa.tree_to_sparql:RuAdjToNoun", - "ru_obscenity_classifier": "deeppavlov.models.classifiers.ru_obscenity_classifier:RuObscenityClassifier", "ru_sent_tokenizer": "deeppavlov.models.tokenizers.ru_sent_tokenizer:RuSentTokenizer", "ru_tokenizer": "deeppavlov.models.tokenizers.ru_tokenizer:RussianTokenizer", "rured_reader": "deeppavlov.dataset_readers.rured_reader:RuREDDatasetReader", @@ -132,7 +127,6 @@ "sanitizer": "deeppavlov.models.preprocessors.sanitizer:Sanitizer", "sentseg_restore_sent": "deeppavlov.models.preprocessors.sentseg_preprocessor:SentSegRestoreSent", "siamese_iterator": "deeppavlov.dataset_iterators.siamese_iterator:SiameseIterator", - "siamese_predictor": "deeppavlov.models.ranking.siamese_predictor:SiamesePredictor", "siamese_preprocessor": "deeppavlov.models.preprocessors.siamese_preprocessor:SiamesePreprocessor", "siamese_reader": "deeppavlov.dataset_readers.siamese_reader:SiameseReader", "simple_dstc2_reader": "deeppavlov.dataset_readers.dstc2_reader:SimpleDSTC2DatasetReader", @@ -141,9 +135,6 @@ "slotfill_raw": "deeppavlov.models.slotfill.slotfill_raw:SlotFillingComponent", "slotfill_raw_rasa": "deeppavlov.models.slotfill.slotfill_raw:RASA_SlotFillingComponent", "smn_nn": "deeppavlov.models.ranking.sequential_matching_network:SMNNetwork", - "snips_intents_iterator": "deeppavlov.dataset_iterators.snips_intents_iterator:SnipsIntentIterator", - "snips_ner_iterator": "deeppavlov.dataset_iterators.snips_ner_iterator:SnipsNerIterator", - "snips_reader": "deeppavlov.dataset_readers.snips_reader:SnipsReader", "spelling_error_model": "deeppavlov.models.spelling_correction.brillmoore.error_model:ErrorModel", "spelling_levenshtein": "deeppavlov.models.spelling_correction.levenshtein.searcher_component:LevenshteinSearcherComponent", "split_tokenizer": "deeppavlov.models.tokenizers.split_tokenizer:SplitTokenizer", diff --git a/deeppavlov/core/common/requirements_registry.json b/deeppavlov/core/common/requirements_registry.json index 7d4f1bc84f..05b2350d75 100644 --- a/deeppavlov/core/common/requirements_registry.json +++ b/deeppavlov/core/common/requirements_registry.json @@ -1,7 +1,6 @@ { "UD_pymorphy_lemmatizer": [ - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" ], "bert_classifier": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt", @@ -62,8 +61,7 @@ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], "dictionary_vectorizer": [ - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" ], "dstc_slotfilling": [ "{DEEPPAVLOV_PATH}/requirements/rapidfuzz.txt" @@ -105,12 +103,6 @@ "lemmatized_output_prettifier": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], - "morpho_tagger": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], - "mpm_nn": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "mt_bert": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt", "{DEEPPAVLOV_PATH}/requirements/bert_dp.txt" @@ -131,8 +123,7 @@ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], "pymorphy_vectorizer": [ - "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt", - "{DEEPPAVLOV_PATH}/requirements/tf.txt" + "{DEEPPAVLOV_PATH}/requirements/morpho_tagger.txt" ], "rel_ranker": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt" @@ -140,9 +131,6 @@ "rel_ranking_infer": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], - "siamese_predictor": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "smn_nn": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], @@ -167,9 +155,6 @@ "two_sentences_emb": [ "{DEEPPAVLOV_PATH}/requirements/tf.txt" ], - "bilstm_gru_nn": [ - "{DEEPPAVLOV_PATH}/requirements/tf.txt" - ], "wiki_parser": [ "{DEEPPAVLOV_PATH}/requirements/hdt.txt" ], @@ -206,10 +191,6 @@ "tree_to_sparql": [ "{DEEPPAVLOV_PATH}/requirements/udapi.txt" ], - "torch_squad_bert_model": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "torch_transformers_preprocessor": [ "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", "{DEEPPAVLOV_PATH}/requirements/transformers.txt" @@ -262,10 +243,6 @@ "typos_kartaslov_reader": [ "{DEEPPAVLOV_PATH}/requirements/lxml.txt" ], - "torch_squad_bert_infer": [ - "{DEEPPAVLOV_PATH}/requirements/pytorch16.txt", - "{DEEPPAVLOV_PATH}/requirements/transformers.txt" - ], "nemo_asr": [ "{DEEPPAVLOV_PATH}/requirements/pytorch14.txt", "{DEEPPAVLOV_PATH}/requirements/nemo.txt", diff --git a/deeppavlov/dataset_iterators/ner_few_shot_iterator.py b/deeppavlov/dataset_iterators/ner_few_shot_iterator.py deleted file mode 100644 index 52e1fa38c1..0000000000 --- a/deeppavlov/dataset_iterators/ner_few_shot_iterator.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -from typing import List, Dict, Tuple, Any, Iterator, Optional - -import numpy as np - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.data_learning_iterator import DataLearningIterator - - -@register('ner_few_shot_iterator') -class NERFewShotIterator(DataLearningIterator): - """Dataset iterator for simulating few-shot Named Entity Recognition setting. - - Args: - data: list of (x, y) pairs for every data type in ``'train'``, ``'valid'`` and ``'test'`` - seed: random seed for data shuffling - shuffle: whether to shuffle data during batching - target_tag: the tag of interest. For this tag the few-shot setting will be simulated - filter_bi: whether to filter BIO markup or not - n_train_samples: number of training samples in the few shot setting. The validation and the test sets will be - the same - remove_not_targets: whether to replace all non target tags with `O` tag or not. - """ - - def __init__(self, - data: Dict[str, List[Tuple[Any, Any]]], - seed: int = None, - shuffle: bool = True, - target_tag: str = None, - filter_bi: bool = True, - n_train_samples: int = 20, - remove_not_targets: bool = True, - *args, **kwargs) -> None: - super(NERFewShotIterator, self).__init__(data=data, seed=seed, shuffle=shuffle) - self.target_tag = target_tag - self.filter_bi = filter_bi - self.n_train_samples = n_train_samples - self.remove_not_targets = remove_not_targets - if self.target_tag is None: - raise RuntimeError('You must provide a target tag to NERFewShotIterator!') - - self.n_samples = len(self.train) - - if self.remove_not_targets: - self._remove_not_target_tags() - - if self.filter_bi: - for key in self.data: - for n, (x, y) in enumerate(self.data[key]): - self.data[key][n] = [x, [re.sub('(B-|I-)', '', tag) for tag in y]] - - self.tag_map = np.zeros(self.n_samples, dtype=bool) - for n, (toks, tags) in enumerate(self.data['train']): - if self.filter_bi: - self.tag_map[n] = any(self.target_tag == tag for tag in tags if len(tag) > 2) - else: - self.tag_map[n] = any(self.target_tag == tag[2:] for tag in tags if len(tag) > 2) - - self.marked_nums = None - self.unmarked_nums = None - self._sample_marked() - - def _sample_marked(self): - np.zeros(len(self.data['train']), dtype=bool) - n_marked = 0 - self.marked_mask = np.zeros(self.n_samples, dtype=bool) - while n_marked < self.n_train_samples: - is_picked = True - while is_picked: - n = np.random.randint(self.n_samples) - if not self.marked_mask[n]: - is_picked = False - self.marked_mask[n] = True - if self.tag_map[n]: - n_marked += 1 - - self.marked_nums = np.arange(self.n_samples)[self.marked_mask] - self.unmarked_nums = np.arange(self.n_samples)[~self.marked_mask] - - def _remove_not_target_tags(self): - if self.remove_not_targets: - for key in self.data: - for n, (x, y) in enumerate(self.data[key]): - tags = [] - for tag in y: - if tag.endswith('-' + self.target_tag): - tags.append(tag) - else: - tags.append('O') - self.data[key][n] = [x, tags] - - def get_instances(self, data_type: str = 'train') -> Tuple[List[List[str]], List[List[str]]]: - """Get all data for a selected data type - - Args: - data_type (str): can be either ``'train'``, ``'test'``, ``'valid'`` or ``'all'`` - - Returns: - a tuple of all inputs for a data type and all expected outputs for a data type - """ - - if data_type == 'train': - samples = [self.data[data_type][i] for i in self.marked_nums] - else: - samples = self.data[data_type][:] - - x, y = list(zip(*samples)) - - return x, y - - def gen_batches(self, batch_size: int, - data_type: str = 'train', - shuffle: Optional[bool] = None) -> Iterator[Tuple[List[List[str]], List[List[str]]]]: - x, y = self.get_instances(data_type) - data_len = len(x) - - if data_len == 0: - return - - order = list(range(data_len)) - if shuffle is None and self.shuffle: - self.random.shuffle(order) - elif shuffle: - self.random.shuffle(order) - - if batch_size < 0: - batch_size = data_len - - for i in range((data_len - 1) // batch_size + 1): - yield tuple(zip(*[(x[o], y[o]) for o in order[i * batch_size:(i + 1) * batch_size]])) diff --git a/deeppavlov/dataset_iterators/snips_intents_iterator.py b/deeppavlov/dataset_iterators/snips_intents_iterator.py deleted file mode 100644 index 2a881634ac..0000000000 --- a/deeppavlov/dataset_iterators/snips_intents_iterator.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2019 Alexey Romanov -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from overrides import overrides - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.data_learning_iterator import DataLearningIterator - - -@register('snips_intents_iterator') -class SnipsIntentIterator(DataLearningIterator): - @overrides - def preprocess(self, data, *args, **kwargs): - result = [] - for query in data: - text = ''.join(part['text'] for part in query['data']) - intent = query['intent'] - result.append((text, intent)) - return result diff --git a/deeppavlov/dataset_iterators/snips_ner_iterator.py b/deeppavlov/dataset_iterators/snips_ner_iterator.py deleted file mode 100644 index 2186ebbaa9..0000000000 --- a/deeppavlov/dataset_iterators/snips_ner_iterator.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2019 Alexey Romanov -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import nltk -from overrides import overrides - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.data_learning_iterator import DataLearningIterator - - -@register('snips_ner_iterator') -class SnipsNerIterator(DataLearningIterator): - @overrides - def preprocess(self, data, *args, **kwargs): - result = [] - for query in data: - query = query['data'] - words = [] - slots = [] - for part in query: - part_words = nltk.tokenize.wordpunct_tokenize(part['text']) - entity = part.get('entity', None) - if entity: - slots.append('B-' + entity) - slots += ['I-' + entity] * (len(part_words) - 1) - else: - slots += ['O'] * len(part_words) - words += part_words - - result.append((words, slots)) - return result diff --git a/deeppavlov/dataset_readers/snips_reader.py b/deeppavlov/dataset_readers/snips_reader.py deleted file mode 100644 index 7041df6aa7..0000000000 --- a/deeppavlov/dataset_readers/snips_reader.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2019 Alexey Romanov -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from logging import getLogger -from pathlib import Path -from typing import List, Dict, Any, Optional - -from overrides import overrides - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.dataset_reader import DatasetReader -from deeppavlov.core.data.utils import download_decompress, mark_done, is_done - -log = getLogger(__name__) - - -@register('snips_reader') -class SnipsReader(DatasetReader): - """The class to download and read Snips NLU Benchmark dataset (custom intents section). - - See https://github.com/snipsco/nlu-benchmark. - """ - - # noinspection PyAttributeOutsideInit - @overrides - def read(self, data_path: str, queries_per_intent: Optional[int] = None, test_validate_split: float = 0.5, - *args, **kwargs) -> \ - Dict[str, List[Dict[str, Any]]]: - """ - Each query in the output has the following form: - { 'intent': intent_name, - 'data': [ { 'text': text, ('entity': slot_name)? } ] - } - - Args: - data_path: A path to a folder with dataset files. - queries_per_intent: Number of queries to load for each intent. None to load all. - If the requested number is greater than available in file, all queries are returned. - test_validate_split: Proportion of `_validate` files to be used as test dataset (since Snips - is split into training and validation sets without a separate test set). - """ - data_path = Path(data_path) - intents = ['AddToPlaylist', 'BookRestaurant', 'GetWeather', 'PlayMusic', - 'RateBook', 'SearchCreativeWork', 'SearchScreeningEvent'] - - if not is_done(data_path): - url = 'http://files.deeppavlov.ai/datasets/snips.tar.gz' - log.info('[downloading data from {} to {}]'.format(url, data_path)) - download_decompress(url, data_path) - mark_done(data_path) - - use_full_file = queries_per_intent is None or queries_per_intent > 70 - training_data = [] - validation_data = [] - test_data = [] - - for intent in intents: - intent_path = data_path / intent - train_file_name = f"train_{intent}{'_full' if use_full_file else ''}.json" - validate_file_name = f"validate_{intent}.json" - - train_queries = self._load_file(intent_path / train_file_name, intent, queries_per_intent) - validate_queries = self._load_file(intent_path / validate_file_name, intent, queries_per_intent) - num_test_queries = round(len(validate_queries) * test_validate_split) - - training_data.extend(train_queries) - validation_data.extend(validate_queries[num_test_queries:]) - test_data.extend(validate_queries[:num_test_queries]) - - return {'train': training_data, 'valid': validation_data, 'test': test_data} - - @staticmethod - def _load_file(path: Path, intent: str, num_queries: Optional[int]): - with path.open(encoding='latin_1') as f: - data = json.load(f) - - # restrict number of queries - queries = data[intent][:num_queries] - for query in queries: - query['intent'] = intent - return queries diff --git a/deeppavlov/models/classifiers/ru_obscenity_classifier.py b/deeppavlov/models/classifiers/ru_obscenity_classifier.py deleted file mode 100644 index 6c17ae2ae8..0000000000 --- a/deeppavlov/models/classifiers/ru_obscenity_classifier.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import re -from logging import getLogger -from pathlib import Path -from typing import List, Union - -import pymorphy2 - -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.registry import register -from deeppavlov.core.models.estimator import Component - -log = getLogger(__name__) - - -@register("ru_obscenity_classifier") -class RuObscenityClassifier(Component): - """Rule-Based model that decides whether the sentence is obscene or not, - for Russian language - - Args: - data_path: a directory where the required files are stored. - next files are required: - -'obscenity_words.json' — file that stores list of obscenity words - -'obscenity_words_exception.json' — file that stores list of not obscenity words, - but which are detects by algorithm as obscenity(for fixing this situation) - -'obscenity_words_extended.json' — file that stores list of obscenity words, - in which user can add additional obscenity words - - Attributes: - obscenity_words: list of russian obscenity words - obscenity_words_extended: list of russian obscenity words - obscenity_words_exception: list of words on that model makes mistake that they are obscene - regexp: reg exp that finds various obscene words - regexp2: reg exp that finds various obscene words - morph: pymorphy2.MorphAnalyzer object - word_pattern: reg exp that finds words in text - """ - - def _get_patterns(self): - PATTERN_1 = r''.join(( - r'\w{0,5}[хx]([хx\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[уy]([уy\s\!@#\$%\^&*+-\|\/]{0,6})[ёiлeеюийя]\w{0,7}|\w{0,6}[пp]', - r'([пp\s\!@#\$%\^&*+-\|\/]{0,6})[iие]([iие\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[3зс]([3зс\s\!@#\$%\^&*+-\|\/]{0,6})[дd]\w{0,10}|[сcs][уy]', - r'([уy\!@#\$%\^&*+-\|\/]{0,6})[4чkк]\w{1,3}|\w{0,4}[bб]', - r'([bб\s\!@#\$%\^&*+-\|\/]{0,6})[lл]([lл\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[yя]\w{0,10}|\w{0,8}[её][bб][лске@eыиаa][наи@йвл]\w{0,8}|\w{0,4}[еe]', - r'([еe\s\!@#\$%\^&*+-\|\/]{0,6})[бb]([бb\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[uу]([uу\s\!@#\$%\^&*+-\|\/]{0,6})[н4ч]\w{0,4}|\w{0,4}[еeё]', - r'([еeё\s\!@#\$%\^&*+-\|\/]{0,6})[бb]([бb\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[нn]([нn\s\!@#\$%\^&*+-\|\/]{0,6})[уy]\w{0,4}|\w{0,4}[еe]', - r'([еe\s\!@#\$%\^&*+-\|\/]{0,6})[бb]([бb\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[оoаa@]([оoаa@\s\!@#\$%\^&*+-\|\/]{0,6})[тnнt]\w{0,4}|\w{0,10}[ё]', - r'([ё\!@#\$%\^&*+-\|\/]{0,6})[б]\w{0,6}|\w{0,4}[pп]', - r'([pп\s\!@#\$%\^&*+-\|\/]{0,6})[иeеi]([иeеi\s\!@#\$%\^&*+-\|\/]{0,6})', - r'[дd]([дd\s\!@#\$%\^&*+-\|\/]{0,6})[oоаa@еeиi]', - r'([oоаa@еeиi\s\!@#\$%\^&*+-\|\/]{0,6})[рr]\w{0,12}', - )) - - PATTERN_2 = r'|'.join(( - r"(\b[сs]{1}[сsц]{0,1}[uуy](?:[ч4]{0,1}[иаakк][^ц])\w*\b)", - r"(\b(?!пло|стра|[тл]и)(\w(?!(у|пло)))*[хx][уy](й|йа|[еeё]|и|я|ли|ю)(?!га)\w*\b)", - r"(\b(п[oо]|[нз][аa])*[хx][eе][рp]\w*\b)", - r"(\b[мm][уy][дd]([аa][кk]|[oо]|и)\w*\b)", - r"(\b\w*д[рp](?:[oо][ч4]|[аa][ч4])(?!л)\w*\b)", - r"(\b(?!(?:кило)?[тм]ет)(?!смо)[а-яa-z]*(? None: - log.info(f"Initializing `{self.__class__.__name__}`") - - data_path = expand_path(data_path) - with open(data_path / 'obscenity_words.json', encoding="utf-8") as f: - self.obscenity_words = set(json.load(f)) - with open(data_path / 'obscenity_words_exception.json', encoding="utf-8") as f: - self.obscenity_words_exception = set(json.load(f)) - if (data_path / 'obscenity_words_extended.json').exists(): - with open(data_path / 'obscenity_words_extended.json', encoding="utf-8") as f: - self.obscenity_words_extended = set(json.load(f)) - self.obscenity_words.update(self.obscenity_words_extended) - - PATTERN_1, PATTERN_2 = self._get_patterns() - self.regexp = re.compile(PATTERN_1, re.U | re.I) - self.regexp2 = re.compile(PATTERN_2, re.U | re.I) - self.morph = pymorphy2.MorphAnalyzer() - self.word_pattern = re.compile(r'[А-яЁё]+') - - def _check_obscenity(self, text: str) -> bool: - for word in self.word_pattern.findall(text): - if len(word) < 3: - continue - word = word.lower() - word.replace('ё', 'е') - normal_word = self.morph.parse(word)[0].normal_form - if normal_word in self.obscenity_words_exception \ - or word in self.obscenity_words_exception: - continue - if normal_word in self.obscenity_words \ - or word in self.obscenity_words \ - or bool(self.regexp.findall(normal_word)) \ - or bool(self.regexp.findall(word)) \ - or bool(self.regexp2.findall(normal_word)) \ - or bool(self.regexp2.findall(word)): - return True - return False - - def __call__(self, texts: List[str]) -> List[bool]: - """It decides whether text is obscene or not - - Args: - texts: list of texts, for which it needs to decide they are obscene or not - - Returns: - list of bool: True is for obscene text, False is for not obscene text - """ - decisions = list(map(self._check_obscenity, texts)) - return decisions diff --git a/deeppavlov/models/morpho_tagger/common_tagger.py b/deeppavlov/models/morpho_tagger/common_tagger.py index dfc7e330aa..572747fc4d 100644 --- a/deeppavlov/models/morpho_tagger/common_tagger.py +++ b/deeppavlov/models/morpho_tagger/common_tagger.py @@ -16,7 +16,6 @@ from typing import Union, Optional, Tuple -from tensorflow.keras import backend as K import numpy as np EPS = 1e-15 @@ -35,11 +34,6 @@ def to_one_hot(x, k): return unit[x] -def repeat_(x, k): - tile_factor = [1, k] + [1] * (K.ndim(x) - 1) - return K.tile(x[:, None, :], tile_factor) - - def make_pos_and_tag(tag: str, sep: str = ",", return_mode: Optional[str] = None) -> Tuple[str, Union[str, list, dict, tuple]]: """ diff --git a/deeppavlov/models/morpho_tagger/morpho_tagger.py b/deeppavlov/models/morpho_tagger/morpho_tagger.py deleted file mode 100644 index 45a6bb2379..0000000000 --- a/deeppavlov/models/morpho_tagger/morpho_tagger.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from logging import getLogger -from pathlib import Path -from typing import List, Optional, Union, Tuple - -import numpy as np -import tensorflow.keras.backend as K -from tensorflow.keras import Model -from tensorflow.keras.layers import (Input, Dense, Lambda, Concatenate, Conv2D, Dropout, LSTM, Bidirectional, - TimeDistributed) -from tensorflow.keras.optimizers import Nadam -from tensorflow.keras.regularizers import l2 - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.simple_vocab import SimpleVocabulary -from deeppavlov.core.models.keras_model import KerasModel -from .cells import Highway -from .common_tagger import to_one_hot - -log = getLogger(__name__) - -MAX_WORD_LENGTH = 30 - - -@register("morpho_tagger") -class MorphoTagger(KerasModel): - """A class for character-based neural morphological tagger - - Parameters: - symbols: character vocabulary - tags: morphological tags vocabulary - save_path: the path where model is saved - load_path: the path from where model is loaded - mode: usage mode - - word_rnn: the type of character-level network (only `cnn` implemented) - char_embeddings_size: the size of character embeddings - char_conv_layers: the number of convolutional layers on character level - char_window_size: the width of convolutional filter (filters). - It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5]. - char_filters: the number of convolutional filters for each window width. - It can be a number, a list (when there are several windows of different width - on a single convolution layer), a list of lists, if there - are more than 1 convolution layers, or **None**. - If **None**, a layer with width **width** contains - min(**char_filter_multiple** * **width**, 200) filters. - - char_filter_multiple: the ratio between filters number and window width - char_highway_layers: the number of highway layers on character level - conv_dropout: the ratio of dropout between convolutional layers - highway_dropout: the ratio of dropout between highway layers, - intermediate_dropout: the ratio of dropout between convolutional - and highway layers on character level - lstm_dropout: dropout ratio in word-level LSTM - word_vectorizers: list of parameters for additional word-level vectorizers, - for each vectorizer it stores a pair of vectorizer dimension and - the dimension of the corresponding word embedding - word_lstm_layers: the number of word-level LSTM layers - word_lstm_units: hidden dimensions of word-level LSTMs - word_dropout: the ratio of dropout before word level (it is applied to word embeddings) - regularizer: l2 regularization parameter - verbose: the level of verbosity - - A subclass of :class:`~deeppavlov.core.models.keras_model.KerasModel` - """ - def __init__(self, - symbols: SimpleVocabulary, - tags: SimpleVocabulary, - save_path: Optional[Union[str, Path]] = None, - load_path: Optional[Union[str, Path]] = None, - mode: str = 'infer', - word_rnn: str = "cnn", - char_embeddings_size: int = 16, - char_conv_layers: int = 1, - char_window_size: Union[int, List[int]] = 5, - char_filters: Union[int, List[int]] = None, - char_filter_multiple: int = 25, - char_highway_layers: int = 1, - conv_dropout: float = 0.0, - highway_dropout: float = 0.0, - intermediate_dropout: float = 0.0, - lstm_dropout: float = 0.0, - word_vectorizers: List[Tuple[int, int]] = None, - word_lstm_layers: int = 1, - word_lstm_units: Union[int, List[int]] = 128, - word_dropout: float = 0.0, - regularizer: float = None, - verbose: int = 1, **kwargs): - # Calls parent constructor. Results in creation of save_folder if it doesn't exist - super().__init__(save_path=save_path, load_path=load_path, mode=mode, **kwargs) - self.symbols = symbols - self.tags = tags - self.word_rnn = word_rnn - self.char_embeddings_size = char_embeddings_size - self.char_conv_layers = char_conv_layers - self.char_window_size = char_window_size - self.char_filters = char_filters - self.char_filter_multiple = char_filter_multiple - self.char_highway_layers = char_highway_layers - self.conv_dropout = conv_dropout - self.highway_dropout = highway_dropout - self.intermediate_dropout = intermediate_dropout - self.lstm_dropout = lstm_dropout - self.word_dropout = word_dropout - self.word_vectorizers = word_vectorizers # a list of additional vectorizer dimensions - self.word_lstm_layers = word_lstm_layers - self.word_lstm_units = word_lstm_units - self.regularizer = regularizer - self.verbose = verbose - self._initialize() - self.model_ = None - self.build() - - # Tries to load the model from model `load_path`, if it is available - self.load() - - def load(self) -> None: - """ - Checks existence of the model file, loads the model if the file exists - Loads model weights from a file - """ - - # Checks presence of the model files - if self.load_path.exists(): - path = str(self.load_path.resolve()) - log.info('[loading model from {}]'.format(path)) - self.model_.load_weights(path) - - def save(self) -> None: - """ - Saves model weights to the save_path, provided in config. The directory is - already created by super().__init__, which is called in __init__ of this class""" - path = str(self.save_path.absolute()) - log.info('[saving model to {}]'.format(path)) - self.model_.save_weights(path) - - def _initialize(self): - if isinstance(self.char_window_size, int): - self.char_window_size = [self.char_window_size] - if self.char_filters is None or isinstance(self.char_filters, int): - self.char_filters = [self.char_filters] * len(self.char_window_size) - if len(self.char_window_size) != len(self.char_filters): - raise ValueError("There should be the same number of window sizes and filter sizes") - if isinstance(self.word_lstm_units, int): - self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers - if len(self.word_lstm_units) != self.word_lstm_layers: - raise ValueError("There should be the same number of lstm layer units and lstm layers") - if self.word_vectorizers is None: - self.word_vectorizers = [] - if self.regularizer is not None: - self.regularizer = l2(self.regularizer) - if self.verbose > 0: - log.info("{} symbols, {} tags in CharacterTagger".format(len(self.symbols), len(self.tags))) - - def build(self): - """Builds the network using Keras. - """ - word_inputs = Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32") - inputs = [word_inputs] - word_outputs = self._build_word_cnn(word_inputs) - if len(self.word_vectorizers) > 0: - additional_word_inputs = [Input(shape=(None, input_dim), dtype="float32") - for input_dim, dense_dim in self.word_vectorizers] - inputs.extend(additional_word_inputs) - additional_word_embeddings = [Dense(dense_dim)(additional_word_inputs[i]) - for i, (_, dense_dim) in enumerate(self.word_vectorizers)] - word_outputs = Concatenate()([word_outputs] + additional_word_embeddings) - outputs, lstm_outputs = self._build_basic_network(word_outputs) - compile_args = {"optimizer": Nadam(lr=0.002, clipnorm=5.0), - "loss": "categorical_crossentropy", "metrics": ["accuracy"]} - self.model_ = Model(inputs, outputs) - self.model_.compile(**compile_args) - if self.verbose > 0: - self.model_.summary(print_fn=log.info) - return self - - def _build_word_cnn(self, inputs): - """Builds word-level network - """ - inputs = Lambda(K.one_hot, arguments={"num_classes": len(self.symbols)}, - output_shape=lambda x: tuple(x) + (len(self.symbols),))(inputs) - char_embeddings = Dense(self.char_embeddings_size, use_bias=False)(inputs) - conv_outputs = [] - self.char_output_dim_ = 0 - for window_size, filters_number in zip(self.char_window_size, self.char_filters): - curr_output = char_embeddings - curr_filters_number = (min(self.char_filter_multiple * window_size, 200) - if filters_number is None else filters_number) - for _ in range(self.char_conv_layers - 1): - curr_output = Conv2D(curr_filters_number, (1, window_size), - padding="same", activation="relu", - data_format="channels_last")(curr_output) - if self.conv_dropout > 0.0: - curr_output = Dropout(self.conv_dropout)(curr_output) - curr_output = Conv2D(curr_filters_number, (1, window_size), - padding="same", activation="relu", - data_format="channels_last")(curr_output) - conv_outputs.append(curr_output) - self.char_output_dim_ += curr_filters_number - if len(conv_outputs) > 1: - conv_output = Concatenate(axis=-1)(conv_outputs) - else: - conv_output = conv_outputs[0] - highway_input = Lambda(K.max, arguments={"axis": -2})(conv_output) - if self.intermediate_dropout > 0.0: - highway_input = Dropout(self.intermediate_dropout)(highway_input) - for i in range(self.char_highway_layers - 1): - highway_input = Highway(activation="relu")(highway_input) - if self.highway_dropout > 0.0: - highway_input = Dropout(self.highway_dropout)(highway_input) - highway_output = Highway(activation="relu")(highway_input) - return highway_output - - def _build_basic_network(self, word_outputs): - """ - Creates the basic network architecture, - transforming word embeddings to intermediate outputs - """ - if self.word_dropout > 0.0: - lstm_outputs = Dropout(self.word_dropout)(word_outputs) - else: - lstm_outputs = word_outputs - for j in range(self.word_lstm_layers-1): - lstm_outputs = Bidirectional( - LSTM(self.word_lstm_units[j], return_sequences=True, - dropout=self.lstm_dropout))(lstm_outputs) - lstm_outputs = Bidirectional( - LSTM(self.word_lstm_units[-1], return_sequences=True, - dropout=self.lstm_dropout))(lstm_outputs) - pre_outputs = TimeDistributed( - Dense(len(self.tags), activation="softmax", - activity_regularizer=self.regularizer), - name="p")(lstm_outputs) - return pre_outputs, lstm_outputs - - # noinspection PyPep8Naming - def _transform_batch(self, data, labels=None, transform_to_one_hot=True): - data, additional_data = data[0], data[1:] - L = max(len(x) for x in data) - X = np.array([self._make_sent_vector(x, L) for x in data]) - X = [X] + [np.array(x) for x in additional_data] - if labels is not None: - Y = np.array([self._make_tags_vector(y, L) for y in labels]) - if transform_to_one_hot: - Y = to_one_hot(Y, len(self.tags)) - return X, Y - else: - return X - - def train_on_batch(self, *args) -> None: - """Trains the model on a single batch. - - Args: - *args: the list of network inputs. Last element of `args` is the batch of targets, - all previous elements are training data batches - """ - # data: List[Iterable], labels: Iterable[list] - # Args: - # data: a batch of word sequences - # labels: a batch of correct tag sequences - *data, labels = args - # noinspection PyPep8Naming - X, Y = self._transform_batch(data, labels) - self.model_.train_on_batch(X, Y) - - # noinspection PyPep8Naming - def predict_on_batch(self, data: Union[List[np.ndarray], Tuple[np.ndarray]], - return_indexes: bool = False) -> List[List[str]]: - """ - Makes predictions on a single batch - - Args: - data: model inputs for a single batch, data[0] contains input character encodings - and is the only element of data for mist models. Subsequent elements of data - include the output of additional vectorizers, e.g., dictionary-based one. - return_indexes: whether to return tag indexes in vocabulary or the tags themselves - - Returns: - a batch of label sequences - """ - X = self._transform_batch(data) - objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]] - Y = self.model_.predict_on_batch(X) - labels = np.argmax(Y, axis=-1) - answer: List[Optional[List[str]]] = [None] * objects_number - for i, (elem, length) in enumerate(zip(labels, lengths)): - elem = elem[:length] - answer[i] = elem if return_indexes else self.tags.idxs2toks(elem) - return answer - - def __call__(self, *x_batch: np.ndarray, **kwargs) -> Union[List, np.ndarray]: - """ - Predicts answers on batch elements. - - Args: - x_batch: a batch to predict answers on. It can be either a single array - for basic model or a sequence of arrays for a complex one ( - :config:`configuration file ` - or its lemmatized version). - """ - return self.predict_on_batch(x_batch, **kwargs) - - def _make_sent_vector(self, sent: List, bucket_length: int = None) -> np.ndarray: - """Transforms a sentence to Numpy array, which will be the network input. - - Args: - sent: input sentence - bucket_length: the width of the bucket - - Returns: - A 3d array, answer[i][j][k] contains the index of k-th letter - in j-th word of i-th input sentence. - """ - bucket_length = bucket_length or len(sent) - answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32) - for i, word in enumerate(sent): - answer[i, 0] = self.tags["BEGIN"] - m = min(len(word), MAX_WORD_LENGTH) - for j, x in enumerate(word[-m:]): - answer[i, j+1] = self.symbols[x] - answer[i, m+1] = self.tags["END"] - answer[i, m+2:] = self.tags["PAD"] - return answer - - def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray: - """Transforms a sentence of tags to Numpy array, which will be the network target. - - Args: - tags: input sentence of tags - bucket_length: the width of the bucket - - Returns: - A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence. - """ - bucket_length = bucket_length or len(tags) - answer = np.zeros(shape=(bucket_length,), dtype=np.int32) - for i, tag in enumerate(tags): - answer[i] = self.tags[tag] - return answer diff --git a/deeppavlov/models/ranking/bilstm_gru_siamese_network.py b/deeppavlov/models/ranking/bilstm_gru_siamese_network.py deleted file mode 100644 index fe243598a9..0000000000 --- a/deeppavlov/models/ranking/bilstm_gru_siamese_network.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from logging import getLogger - -from tensorflow.keras import backend as K -from tensorflow.keras.layers import Input, GlobalMaxPooling1D, Lambda, Dense, GRU -from tensorflow.keras.models import Model - -from deeppavlov.core.common.registry import register -from deeppavlov.models.ranking.bilstm_siamese_network import BiLSTMSiameseNetwork - -log = getLogger(__name__) - - -@register('bilstm_gru_nn') -class BiLSTMGRUSiameseNetwork(BiLSTMSiameseNetwork): - """The class implementing a siamese neural network with BiLSTM, GRU and max pooling. - - GRU is used to take into account multi-turn dialogue ``context``. - - Args: - len_vocab: A size of the vocabulary to build embedding layer. - seed: Random seed. - shared_weights: Whether to use shared weights in the model to encode ``contexts`` and ``responses``. - embedding_dim: Dimensionality of token (word) embeddings. - reccurent: A type of the RNN cell. Possible values are ``lstm`` and ``bilstm``. - hidden_dim: Dimensionality of the hidden state of the RNN cell. If ``reccurent`` equals ``bilstm`` - ``hidden_dim`` should be doubled to get the actual dimensionality. - max_pooling: Whether to use max-pooling operation to get ``context`` (``response``) vector representation. - If ``False``, the last hidden state of the RNN will be used. - triplet_loss: Whether to use a model with triplet loss. - If ``False``, a model with crossentropy loss will be used. - margin: A margin parameter for triplet loss. Only required if ``triplet_loss`` is set to ``True``. - hard_triplets: Whether to use hard triplets sampling to train the model - i.e. to choose negative samples close to positive ones. - If set to ``False`` random sampling will be used. - Only required if ``triplet_loss`` is set to ``True``. - """ - - def create_model(self) -> Model: - input = [] - if self.use_matrix: - for i in range(self.num_context_turns + 1): - input.append(Input(shape=(self.max_sequence_length,))) - context = input[:self.num_context_turns] - response = input[-1] - emb_layer = self.embedding_layer() - emb_c = [emb_layer(el) for el in context] - emb_r = emb_layer(response) - else: - for i in range(self.num_context_turns + 1): - input.append(Input(shape=(self.max_sequence_length, self.embedding_dim,))) - context = input[:self.num_context_turns] - response = input[-1] - emb_c = context - emb_r = response - lstm_layer = self.lstm_layer() - lstm_c = [lstm_layer(el) for el in emb_c] - lstm_r = lstm_layer(emb_r) - pooling_layer = GlobalMaxPooling1D(name="pooling") - lstm_c = [pooling_layer(el) for el in lstm_c] - lstm_r = pooling_layer(lstm_r) - lstm_c = [Lambda(lambda x: K.expand_dims(x, 1))(el) for el in lstm_c] - lstm_c = Lambda(lambda x: K.concatenate(x, 1))(lstm_c) - gru_layer = GRU(2 * self.hidden_dim, name="gru") - gru_c = gru_layer(lstm_c) - - if self.triplet_mode: - dist = Lambda(self._pairwise_distances)([gru_c, lstm_r]) - else: - dist = Lambda(self._diff_mult_dist)([gru_c, lstm_r]) - dist = Dense(1, activation='sigmoid', name="score_model")(dist) - model = Model(context + [response], dist) - return model - - def create_score_model(self) -> Model: - cr = self.model.inputs - if self.triplet_mode: - emb_c = self.model.get_layer("gru").output - emb_r = self.model.get_layer("pooling").get_output(-1) - dist_score = Lambda(lambda x: self.euclidian_dist(x), name="score_model") - score = dist_score([emb_c, emb_r]) - else: - score = self.model.get_layer("score_model").output - score = Lambda(lambda x: 1. - K.squeeze(x, -1))(score) - score = Lambda(lambda x: 1. - x)(score) - model = Model(cr, score) - return model - - def create_context_model(self) -> Model: - m = Model(self.model.inputs[:-1], - self.model.get_layer("gru").output) - return m - - def create_response_model(self) -> Model: - m = Model(self.model.inputs[-1], - self.model.get_layer("pooling").get_output_at(-1)) - return m diff --git a/deeppavlov/models/ranking/mpm_siamese_network.py b/deeppavlov/models/ranking/mpm_siamese_network.py deleted file mode 100644 index cccc26f508..0000000000 --- a/deeppavlov/models/ranking/mpm_siamese_network.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from logging import getLogger - -from tensorflow.keras import backend as K -from tensorflow.keras.initializers import glorot_uniform, Orthogonal -from tensorflow.keras.layers import Input, LSTM, Lambda, Dense, Dropout, Bidirectional -from tensorflow.keras.models import Model - -from deeppavlov.core.common.registry import register -from deeppavlov.core.layers.keras_layers import AttentiveMatchingLayer, MaxattentiveMatchingLayer -from deeppavlov.core.layers.keras_layers import FullMatchingLayer, MaxpoolingMatchingLayer -from deeppavlov.models.ranking.bilstm_siamese_network import BiLSTMSiameseNetwork - -log = getLogger(__name__) - - -@register('mpm_nn') -class MPMSiameseNetwork(BiLSTMSiameseNetwork): - """The class implementing a siamese neural network with bilateral multi-Perspective matching. - - The network architecture is based on https://arxiv.org/abs/1702.03814. - - Args: - dense_dim: Dimensionality of the dense layer. - perspective_num: Number of perspectives in multi-perspective matching layers. - aggregation dim: Dimensionality of the hidden state in the second BiLSTM layer. - inpdrop_val: Float between 0 and 1. A dropout value for the linear transformation of the inputs. - recdrop_val: Float between 0 and 1. A dropout value for the linear transformation of the recurrent state. - ldrop_val: A dropout value of the dropout layer before the second BiLSTM layer. - dropout_val: A dropout value of the dropout layer after the second BiLSTM layer. - """ - - def __init__(self, - dense_dim: int = 50, - perspective_num: int = 20, - aggregation_dim: int = 200, - recdrop_val: float = 0.0, - inpdrop_val: float = 0.0, - ldrop_val: float = 0.0, - dropout_val: float = 0.0, - *args, - **kwargs) -> None: - - self.dense_dim = dense_dim - self.perspective_num = perspective_num - self.aggregation_dim = aggregation_dim - self.ldrop_val = ldrop_val - self.recdrop_val = recdrop_val - self.inpdrop_val = inpdrop_val - self.dropout_val = dropout_val - self.seed = kwargs.get("triplet_loss") - self.triplet_mode = kwargs.get("triplet_loss") - - super(MPMSiameseNetwork, self).__init__(*args, **kwargs) - - def create_lstm_layer_1(self): - ker_in = glorot_uniform(seed=self.seed) - rec_in = Orthogonal(seed=self.seed) - bioutp = Bidirectional(LSTM(self.hidden_dim, - input_shape=(self.max_sequence_length, self.embedding_dim,), - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - recurrent_dropout=self.recdrop_val, - dropout=self.inpdrop_val, - kernel_initializer=ker_in, - recurrent_initializer=rec_in, - return_sequences=True), merge_mode=None) - return bioutp - - def create_lstm_layer_2(self): - ker_in = glorot_uniform(seed=self.seed) - rec_in = Orthogonal(seed=self.seed) - bioutp = Bidirectional(LSTM(self.aggregation_dim, - input_shape=(self.max_sequence_length, 8 * self.perspective_num,), - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - recurrent_dropout=self.recdrop_val, - dropout=self.inpdrop_val, - kernel_initializer=ker_in, - recurrent_initializer=rec_in, - return_sequences=False), - merge_mode='concat', - name="sentence_embedding") - return bioutp - - def create_model(self) -> Model: - if self.use_matrix: - context = Input(shape=(self.max_sequence_length,)) - response = Input(shape=(self.max_sequence_length,)) - emb_layer = self.embedding_layer() - emb_c = emb_layer(context) - emb_r = emb_layer(response) - else: - context = Input(shape=(self.max_sequence_length, self.embedding_dim,)) - response = Input(shape=(self.max_sequence_length, self.embedding_dim,)) - emb_c = context - emb_r = response - lstm_layer = self.create_lstm_layer_1() - lstm_a = lstm_layer(emb_c) - lstm_b = lstm_layer(emb_r) - - f_layer_f = FullMatchingLayer(self.perspective_num) - f_layer_b = FullMatchingLayer(self.perspective_num) - f_a_forw = f_layer_f([lstm_a[0], lstm_b[0]])[0] - f_a_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_a[1]), - Lambda(lambda x: K.reverse(x, 1))(lstm_b[1])])[0] - f_a_back = Lambda(lambda x: K.reverse(x, 1))(f_a_back) - f_b_forw = f_layer_f([lstm_b[0], lstm_a[0]])[0] - f_b_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_b[1]), - Lambda(lambda x: K.reverse(x, 1))(lstm_a[1])])[0] - f_b_back = Lambda(lambda x: K.reverse(x, 1))(f_b_back) - - mp_layer_f = MaxpoolingMatchingLayer(self.perspective_num) - mp_layer_b = MaxpoolingMatchingLayer(self.perspective_num) - mp_a_forw = mp_layer_f([lstm_a[0], lstm_b[0]])[0] - mp_a_back = mp_layer_b([lstm_a[1], lstm_b[1]])[0] - mp_b_forw = mp_layer_f([lstm_b[0], lstm_a[0]])[0] - mp_b_back = mp_layer_b([lstm_b[1], lstm_a[1]])[0] - - at_layer_f = AttentiveMatchingLayer(self.perspective_num) - at_layer_b = AttentiveMatchingLayer(self.perspective_num) - at_a_forw = at_layer_f([lstm_a[0], lstm_b[0]])[0] - at_a_back = at_layer_b([lstm_a[1], lstm_b[1]])[0] - at_b_forw = at_layer_f([lstm_b[0], lstm_a[0]])[0] - at_b_back = at_layer_b([lstm_b[1], lstm_a[1]])[0] - - ma_layer_f = MaxattentiveMatchingLayer(self.perspective_num) - ma_layer_b = MaxattentiveMatchingLayer(self.perspective_num) - ma_a_forw = ma_layer_f([lstm_a[0], lstm_b[0]])[0] - ma_a_back = ma_layer_b([lstm_a[1], lstm_b[1]])[0] - ma_b_forw = ma_layer_f([lstm_b[0], lstm_a[0]])[0] - ma_b_back = ma_layer_b([lstm_b[1], lstm_a[1]])[0] - - concat_a = Lambda(lambda x: K.concatenate(x, axis=-1))([f_a_forw, f_a_back, - mp_a_forw, mp_a_back, - at_a_forw, at_a_back, - ma_a_forw, ma_a_back]) - concat_b = Lambda(lambda x: K.concatenate(x, axis=-1))([f_b_forw, f_b_back, - mp_b_forw, mp_b_back, - at_b_forw, at_b_back, - ma_b_forw, ma_b_back]) - - concat_a = Dropout(self.ldrop_val)(concat_a) - concat_b = Dropout(self.ldrop_val)(concat_b) - - lstm_layer_agg = self.create_lstm_layer_2() - agg_a = lstm_layer_agg(concat_a) - agg_b = lstm_layer_agg(concat_b) - - agg_a = Dropout(self.dropout_val)(agg_a) - agg_b = Dropout(self.dropout_val)(agg_b) - - reduced = Lambda(lambda x: K.concatenate(x, axis=-1))([agg_a, agg_b]) - - if self.triplet_mode: - dist = Lambda(self._pairwise_distances)([agg_a, agg_b]) - else: - ker_in = glorot_uniform(seed=self.seed) - dense = Dense(self.dense_dim, kernel_initializer=ker_in)(reduced) - dist = Dense(1, activation='sigmoid', name="score_model")(dense) - model = Model([context, response], dist) - return model diff --git a/deeppavlov/models/ranking/siamese_predictor.py b/deeppavlov/models/ranking/siamese_predictor.py deleted file mode 100644 index a42dccc22b..0000000000 --- a/deeppavlov/models/ranking/siamese_predictor.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from logging import getLogger -from typing import List, Iterable, Callable, Union - -import numpy as np - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.simple_vocab import SimpleVocabulary -from deeppavlov.core.models.component import Component -from deeppavlov.models.ranking.keras_siamese_model import SiameseModel - -log = getLogger(__name__) - - -@register('siamese_predictor') -class SiamesePredictor(Component): - """The class for ranking or paraphrase identification using the trained siamese network in the ``interact`` mode. - - Args: - batch_size: A size of a batch. - num_context_turns: A number of ``context`` turns in data samples. - ranking: Whether to perform ranking. - If it is set to ``False`` paraphrase identification will be performed. - attention: Whether any attention mechanism is used in the siamese network. - If ``False`` then calculated in advance vectors of ``responses`` - will be used to obtain similarity score for the input ``context``; - Otherwise the whole siamese architecture will be used - to obtain similarity score for the input ``context`` and each particular ``response``. - The parameter will be used if the ``ranking`` is set to ``True``. - responses: A instance of :class:`~deeppavlov.core.data.simple_vocab.SimpleVocabulary` - with all possible ``responses`` to perform ranking. - Will be used if the ``ranking`` is set to ``True``. - preproc_func: A ``__call__`` function of the - :class:`~deeppavlov.models.preprocessors.siamese_preprocessor.SiamesePreprocessor`. - interact_pred_num: The number of the most relevant ``responses`` which will be returned. - Will be used if the ``ranking`` is set to ``True``. - **kwargs: Other parameters. - """ - - def __init__(self, - model: SiameseModel, - batch_size: int, - num_context_turns: int = 1, - ranking: bool = True, - attention: bool = False, - responses: SimpleVocabulary = None, - preproc_func: Callable = None, - interact_pred_num: int = 3, - *args, **kwargs) -> None: - - super().__init__() - - self.batch_size = batch_size - self.num_context_turns = num_context_turns - self.ranking = ranking - self.attention = attention - self.preproc_responses = [] - self.response_embeddings = None - self.preproc_func = preproc_func - self.interact_pred_num = interact_pred_num - self.model = model - if self.ranking: - self.responses = {el[1]: el[0] for el in responses.items()} - self._build_preproc_responses() - if not self.attention: - self._build_response_embeddings() - - def __call__(self, batch: Iterable[List[np.ndarray]]) -> List[Union[List[str], str]]: - context = next(batch) - try: - next(batch) - log.error("It is not intended to use the `%s` with the batch size greater then 1." % self.__class__) - except StopIteration: - pass - - if self.ranking: - if len(context) == self.num_context_turns: - scores = [] - if self.attention: - for i in range(len(self.preproc_responses) // self.batch_size + 1): - responses = self.preproc_responses[i * self.batch_size: (i + 1) * self.batch_size] - b = [context + el for el in responses] - b = self.model._make_batch(b) - sc = self.model._predict_on_batch(b) - scores += list(sc) - else: - b = self.model._make_batch([context]) - context_emb = self.model._predict_context_on_batch(b) - context_emb = np.squeeze(context_emb, axis=0) - scores = context_emb @ self.response_embeddings.T - ids = np.flip(np.argsort(scores), -1) - return [[self.responses[el] for el in ids[:self.interact_pred_num]]] - else: - return ["Please, provide contexts separated by '&' in the number equal to that used while training."] - - else: - if len(context) == 2: - b = self.model._make_batch([context]) - sc = self.model._predict_on_batch(b)[0] - if sc > 0.5: - return ["This is a paraphrase."] - else: - return ["This is not a paraphrase."] - else: - return ["Please, provide two sentences separated by '&'."] - - def reset(self) -> None: - pass - - def process_event(self) -> None: - pass - - def _build_response_embeddings(self) -> None: - resp_vecs = [] - for i in range(len(self.preproc_responses) // self.batch_size + 1): - resp_preproc = self.preproc_responses[i * self.batch_size: (i + 1) * self.batch_size] - resp_preproc = self.model._make_batch(resp_preproc) - resp_preproc = resp_preproc - resp_vecs.append(self.model._predict_response_on_batch(resp_preproc)) - self.response_embeddings = np.vstack(resp_vecs) - - def _build_preproc_responses(self) -> None: - responses = list(self.responses.values()) - for i in range(len(responses) // self.batch_size + 1): - el = self.preproc_func(responses[i * self.batch_size: (i + 1) * self.batch_size]) - self.preproc_responses += list(el) - - def rebuild_responses(self, candidates) -> None: - self.attention = True - self.interact_pred_num = 1 - self.preproc_responses = list() - self.responses = {idx: sentence for idx, sentence in enumerate(candidates)} - self._build_preproc_responses() diff --git a/docs/apiref/models/morpho_tagger.rst b/docs/apiref/models/morpho_tagger.rst index 8e73a7a9ce..dcfac248fa 100644 --- a/docs/apiref/models/morpho_tagger.rst +++ b/docs/apiref/models/morpho_tagger.rst @@ -1,11 +1,6 @@ deeppavlov.models.morpho_tagger =============================== -.. autoclass:: deeppavlov.models.morpho_tagger.morpho_tagger.MorphoTagger - :members: - - .. automethod:: __call__ - .. autofunction:: deeppavlov.models.morpho_tagger.common.predict_with_model .. autoclass:: deeppavlov.models.morpho_tagger.lemmatizer.UDPymorphyLemmatizer diff --git a/docs/apiref/models/ranking.rst b/docs/apiref/models/ranking.rst index e9331289af..276488b3b8 100644 --- a/docs/apiref/models/ranking.rst +++ b/docs/apiref/models/ranking.rst @@ -7,12 +7,8 @@ Ranking classes. .. autoclass:: deeppavlov.models.ranking.bilstm_siamese_network.BiLSTMSiameseNetwork -.. autoclass:: deeppavlov.models.ranking.bilstm_gru_siamese_network.BiLSTMGRUSiameseNetwork - .. autoclass:: deeppavlov.models.ranking.keras_siamese_model.KerasSiameseModel -.. autoclass:: deeppavlov.models.ranking.mpm_siamese_network.MPMSiameseNetwork - .. autoclass:: deeppavlov.models.ranking.siamese_model.SiameseModel .. automethod:: load @@ -20,7 +16,3 @@ Ranking classes. .. automethod:: train_on_batch .. automethod:: __call__ .. automethod:: reset - -.. autoclass:: deeppavlov.models.ranking.siamese_predictor.SiamesePredictor - - diff --git a/docs/features/models/bert.rst b/docs/features/models/bert.rst index dbb86ed5e7..6a06075f85 100644 --- a/docs/features/models/bert.rst +++ b/docs/features/models/bert.rst @@ -92,8 +92,8 @@ you can use or modify a :config:`BERT embedder configuration ` -and :config:`NER Ontonotes ` configuration files. +Examples of using these embeddings in model training pipelines can be found in +:config:`NER Ontonotes ` configuration file. BERT for Classification diff --git a/docs/features/models/classifiers.rst b/docs/features/models/classifiers.rst index 64905b4803..b84d9faa04 100644 --- a/docs/features/models/classifiers.rst +++ b/docs/features/models/classifiers.rst @@ -6,7 +6,7 @@ which are implemented as a number of different **neural networks** or **sklearn Models can be used for binary, multi-class or multi-label classification. List of available classifiers (more info see below): -* **BERT classifier** (see :doc:`here `) builds BERT [8]_ architecture for classification problem on **TensorFlow** or on **PyTorch**. +* **BERT classifier** (see :doc:`here `) builds BERT [5]_ architecture for classification problem on **TensorFlow** or on **PyTorch**. * **Keras classifier** (see :doc:`here `) builds neural network on Keras with tensorflow backend. @@ -27,18 +27,18 @@ Command line python -m deeppavlov install where ```` is a path to one of the :config:`provided config files ` -or its name without an extension, for example :config:`"intents_snips" `. +or its name without an extension, for example :config:`"insults_kaggle_bert" `. To download pre-trained models, vocabs, embeddings on the dataset of interest one should run the following command providing corresponding name of the config file (see above) -or provide flag ``-d`` for commands like ``interact``, ``telegram``, ``train``, ``evaluate``.: +or provide flag ``-d`` for commands like ``interact``, ``telegram``, ``train``, ``evaluate``: .. code:: bash python -m deeppavlov download where ```` is a path to one of the :config:`provided config files ` -or its name without an extension, for example :config:`"intents_snips" `. +or its name without an extension, for example :config:`"insults_kaggle_bert" `. When using KerasClassificationModel for **Windows** platforms one have to set `KERAS_BACKEND` to `tensorflow`: @@ -53,7 +53,7 @@ When using KerasClassificationModel for **Windows** platforms one have to set `K python -m deeppavlov interact [-d] where ```` is a path to one of the :config:`provided config files ` -or its name without an extension, for example :config:`"intents_snips" `. +or its name without an extension, for example :config:`"insults_kaggle_bert" `. With the optional ``-d`` parameter all the data required to run selected pipeline will be **downloaded**. **TRAIN** After preparing the config file (including change of dataset, pipeline elements or parameters) @@ -73,7 +73,7 @@ Then training can be run in the following way: python -m deeppavlov train where ```` is a path to one of the :config:`provided config files ` -or its name without an extension, for example :config:`"intents_snips" `. +or its name without an extension, for example :config:`"insults_kaggle_bert" `. With the optional ``-d`` parameter all the data required to run selected pipeline will be **downloaded**. Python code @@ -94,34 +94,30 @@ Then one can build and interact a model from configuration file: .. code:: python - from deeppavlov import build_model, configs + from deeppavlov import build_model - CONFIG_PATH = configs.classifiers.intents_snips # could also be configuration dictionary or string path or `pathlib.Path` instance + model = build_model('insults_kaggle_bert', download=True) # in case of necessity to download some data - model = build_model(CONFIG_PATH, download=True) # in case of necessity to download some data + model = build_model('insults_kaggle_bert', download=False) # otherwise - model = build_model(CONFIG_PATH, download=False) # otherwise + print(model(["You are dumb", "He lay flat on the brown, pine-needled floor of the forest"])) - print(model(["What is the weather in Boston today?"])) - - >>> [['GetWeather']] + >>> ['Insult', 'Not Insult'] **TRAIN** Also training can be run in the following way: .. code:: python - from deeppavlov import train_model, configs - - CONFIG_PATH = configs.classifiers.intents_snips # could also be configuration dictionary or string path or `pathlib.Path` instance + from deeppavlov import train_model - model = train_model(CONFIG_PATH, download=True) # in case of necessity to download some data + model = train_model('insults_kaggle_bert', download=True) # in case of necessity to download some data - model = train_model(CONFIG_PATH, download=False) # otherwise + model = train_model('insults_kaggle_bert', download=False) # otherwise BERT models ----------- -BERT (Bidirectional Encoder Representations from Transformers) [8]_ is a Transformer pre-trained on masked language model +BERT (Bidirectional Encoder Representations from Transformers) [5]_ is a Transformer pre-trained on masked language model and next sentence prediction tasks. This approach showed state-of-the-art results on a wide range of NLP tasks in English. @@ -188,7 +184,7 @@ Therefore, for sklearn component classifier one should set ``ensure_list_output` Pre-trained models ------------------ -We also provide with **pre-trained models** for classification on DSTC 2 dataset, SNIPS dataset, "AG News" dataset, +We also provide with **pre-trained models** for classification on DSTC 2 dataset, "AG News" dataset, "Detecting Insults in Social Commentary", Twitter sentiment in Russian dataset. `DSTC 2 dataset `__ does not initially contain information about **intents**, @@ -226,24 +222,6 @@ In the original dataset this user reply has characteristics This message contains two intents ``(thankyou, bye)``. Train, valid and test division is the same as on web-site. -`SNIPS dataset `__ -contains **intent classification** task for 7 intents (approximately 2.4 -samples per intent): - -- GetWeather -- BookRestaurant -- PlayMusic -- AddToPlaylist -- RateBook -- SearchScreeningEvent -- SearchCreativeWork - -Initially, classification model on SNIPS dataset [7]_ was trained only as an -example of usage that is why we provide pre-trained model for SNIPS with -embeddings trained on DSTC-2 dataset that is not the best choice for -this task. Train set is divided to train and validation sets to -illustrate ``basic_classification_iterator`` work. - `Detecting Insults in Social Commentary dataset `__ contains binary classification task for **detecting insults** for participants of conversation. Train, valid and test division is the same @@ -291,69 +269,24 @@ of sentences. Each sentence were initially labelled with floating point value fr the floating point labels are converted to integer labels according to the intervals `[0, 0.2], (0.2, 0.4], (0.4, 0.6], (0.6, 0.8], (0.8, 1.0]` corresponding to `very negative`, `negative`, `neutral`, `positive`, `very positive` classes. -`Yelp Reviews `__ contains 5-classes **sentiment classification** of product reviews. -The labels are `1`, `2`, `3`, `4`, `5` corresponding to `very negative`, `negative`, `neutral`, `positive`, `very positive` classes. -The reviews are long enough (cut up to 200 subtokens). - +------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ | Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | +==================+====================+======+=================================================================================================+=============+========+========+===========+ -| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 7 intents | `SNIPS-2017`_ [7]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |`Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | +| Insult detection | `Insults`_ | En | :config:`English BERT ` | ROC-AUC | 0.9327 | 0.8602 | 1.1 Gb | +------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ | Sentiment |`SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | -+ +--------------------+ +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| |`Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | +------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ | Sentiment |`Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | + +--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| |`RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`Twitter emb super-convergence ` [6]_ | | 0.7301 | 0.7576 | 3.4 Gb | -+ + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ -| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | +| |`RuSentiment`_ | | :config:`ELMo ` | F1-weighted | 0.7519 | 0.7875 | 700 Mb | + + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ | | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | + + + +-------------------------------------------------------------------------------------------------+ +--------+--------+-----------+ | | | | :config:`Conversational RuBERT ` | | 0.7548 | 0.7742 | 657 Mb | -+------------------+--------------------+ +-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ -| Intent |Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | +------------------+--------------------+------+-------------------------------------------------------------------------------------------------+-------------+--------+--------+-----------+ .. _`DSTC 2`: http://camdial.org/~mh521/dstc/ -.. _`SNIPS-2017`: https://github.com/snipsco/nlu-benchmark/tree/master/2017-06-custom-intent-engines .. _`Insults`: https://www.kaggle.com/c/detecting-insults-in-social-commentary .. _`AG News`: https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .. _`Twitter mokoron`: http://study.mokoron.com/ @@ -362,7 +295,6 @@ The reviews are long enough (cut up to 200 subtokens). .. _`Yahoo-L31`: https://webscope.sandbox.yahoo.com/catalog.php?datatype=l .. _`Yahoo-L6`: https://webscope.sandbox.yahoo.com/catalog.php?datatype=l .. _`SST`: https://nlp.stanford.edu/sentiment/index.html -.. _`Yelp`: https://www.yelp.com/dataset GLUE Benchmark -------------- @@ -422,43 +354,13 @@ Then training process can be run in the same way: python -m deeppavlov train -Comparison ----------- - -The comparison of the presented model is given on **SNIPS** dataset [7]_. The -evaluation of model scores was conducted in the same way as in [3]_ to -compare with the results from the report of the authors of the dataset. -The results were achieved with tuning of parameters and embeddings -trained on Reddit dataset. - -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| Model | AddToPlaylist | BookRestaurant | GetWheather | PlayMusic | RateBook | SearchCreativeWork | SearchScreeningEvent | -+========================+=================+==================+===============+==============+==============+======================+========================+ -| api.ai | 0.9931 | 0.9949 | 0.9935 | 0.9811 | 0.9992 | 0.9659 | 0.9801 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| ibm.watson | 0.9931 | 0.9950 | 0.9950 | 0.9822 | 0.9996 | 0.9643 | 0.9750 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| microsoft.luis | 0.9943 | 0.9935 | 0.9925 | 0.9815 | 0.9988 | 0.9620 | 0.9749 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| wit.ai | 0.9877 | 0.9913 | 0.9921 | 0.9766 | 0.9977 | 0.9458 | 0.9673 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| snips.ai | 0.9873 | 0.9921 | 0.9939 | 0.9729 | 0.9985 | 0.9455 | 0.9613 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| recast.ai | 0.9894 | 0.9943 | 0.9910 | 0.9660 | 0.9981 | 0.9424 | 0.9539 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| amazon.lex | 0.9930 | 0.9862 | 0.9825 | 0.9709 | 0.9981 | 0.9427 | 0.9581 | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ -| Shallow-and-wide CNN | **0.9956** | **0.9973** | **0.9968** | **0.9871** | **0.9998** | **0.9752** | **0.9854** | -+------------------------+-----------------+------------------+---------------+--------------+--------------+----------------------+------------------------+ - How to improve the performance ------------------------------ -- One can use FastText [4]_ to train embeddings that are better suited +- One can use FastText [3]_ to train embeddings that are better suited for considered datasets. - One can use some custom preprocessing to clean texts. -- One can use ELMo [5]_ or BERT [8]_. +- One can use ELMo [4]_ or BERT [5]_. - All the parameters should be tuned on the validation set. References @@ -468,14 +370,8 @@ References .. [2] Ю. В. Рубцова. Построение корпуса текстов для настройки тонового классификатора // Программные продукты и системы, 2015, №1(109), –С.72-78 -.. [3] https://www.slideshare.net/KonstantinSavenkov/nlu-intent-detection-benchmark-by-intento-august-2017 - -.. [4] P. Bojanowski\ *, E. Grave*, A. Joulin, T. Mikolov, Enriching Word Vectors with Subword Information. - -.. [5] Peters, Matthew E., et al. "Deep contextualized word representations." arXiv preprint arXiv:1802.05365 (2018). - -.. [6] Smith L. N., Topin N. Super-convergence: Very fast training of residual networks using large learning rates. – 2018. +.. [3] P. Bojanowski\ *, E. Grave*, A. Joulin, T. Mikolov, Enriching Word Vectors with Subword Information. -.. [7] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018. +.. [4] Peters, Matthew E., et al. "Deep contextualized word representations." arXiv preprint arXiv:1802.05365 (2018). -.. [8] Devlin J. et al. Bert: Pre-training of deep bidirectional transformers for language understanding //arXiv preprint arXiv:1810.04805. – 2018. +.. [5] Devlin J. et al. Bert: Pre-training of deep bidirectional transformers for language understanding //arXiv preprint arXiv:1810.04805. – 2018. diff --git a/docs/features/models/morphotagger.rst b/docs/features/models/morphotagger.rst index e8e7769cd5..dd21787edf 100644 --- a/docs/features/models/morphotagger.rst +++ b/docs/features/models/morphotagger.rst @@ -2,60 +2,19 @@ Neural Morphological Tagging ============================ It is an implementation of neural morphological tagger. -As for now (November, 2019) we have two types of models: -the BERT-based ones (available only for Russian) and -the character-based bidirectional LSTM. The BERT model -includes only a dense layer on the top of BERT embedder. +The model includes only a dense layer on the top of BERT embedder. See the `BERT paper `__ for a more complete description, as well as the `BERT section `__ of the documentation. -We plan to release more BERT-based models in near future. -Most of our models follow -`Heigold et al., 2017. An extensive empirical evaluation of -character-based morphological tagging for 14 -languages `__. -They also achieve the state-of-the-art performance among open source -systems. - -The BERT-based model is trained on `Universal -Dependencies corpora `__ -(version 2.3), while all the other models were trained -on Universal Dependencies 2.0 corpora. +The model is trained on `Universal Dependencies corpora `__ +(version 2.3). +----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Language | Code | UDPipe accuracy | UDPipe Future accuracy [#f1]_ | Our top accuracy | Model size (MB)| +| Language | Code | UDPipe accuracy | UDPipe Future accuracy | Our top accuracy | Model size (MB)| +================+==============+=================+===============================+==================+================+ -| Arabic | ar | 88.31 | | 90.85 | 23.7 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Czech | cs | 91.86 | | 94.35 | 41.8 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| English | en | 92.53 | | 93.00 | 16.9 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| French | fr | 95.25 | | 95.45 | 19.0 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| German | de | 76.65 | | 83.83 | 18.6 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Hindi | hi | 87.74 | | 90.01 | 21.9 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Hungarian | hu | 69.52 | | 75.34 | 15.4 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Italian | it | 96.33 | | 96.47 | 32.0 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Russian | ru_syntagrus | 93.57 | | 96.23 | 48.7 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ | Russian (UD2.3)| ru_syntagrus | 93.5 | 96.90 | 97.83 | 661 | +----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Spanish | es_ancora | 96.88 | | 97.00 | 20.8 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ -| Turkish | tr | 86.98 | | 88.03 | 16.1 | -+----------------+--------------+-----------------+-------------------------------+------------------+----------------+ - -.. rubric:: Footnotes - -.. [#f1] No models available, only the source code. The scores are taken from - `Straka. UDPipe 2.0 Prototype at CoNLL 2018 UD Shared Task. `__. - =========================== Usage examples. @@ -65,7 +24,7 @@ Before using the model make sure that all required packages are installed using .. code:: bash - python -m deeppavlov install morpho_ru_syntagrus_pymorphy + python -m deeppavlov install morpho_ru_syntagrus_bert For Windows platform one has to set `KERAS_BACKEND` to `tensorflow` (it could be done only once): @@ -84,82 +43,10 @@ For Windows platform if one did not set `KERAS_BACKEND` to `tensorflow` from com os.environ["KERAS_BACKEND"] = "tensorflow" - -.. code:: python - - from deeppavlov import build_model, configs - model = build_model(configs.morpho_tagger.UD2_0.morpho_ru_syntagrus_pymorphy, download=True) - sentences = ["Я шёл домой по незнакомой улице.", "Девушка пела в церковном хоре о всех уставших в чужом краю."] - for parse in model(sentences): - print(parse) - -If you want to use the obtained tags further in Python, just split the output using tabs and newlines. - -You may also pass the tokenized sentences instead of raw ones: - -.. code:: python - - sentences = [["Я", "шёл", "домой", "по", "незнакомой", "улице", "."]] - for parse in model(sentences): - print(parse) - -If your data is large, you can call -:meth:`~deeppavlov.core.common.chainer.Chainer.batched_call` method of the model, which will additionally -separate you list of sentences into small batches. - -.. code:: python - - from deeppavlov import build_model, configs - model = build_model(configs.morpho_tagger.UD2_0.morpho_ru_syntagrus_pymorphy, download=True) - sentences = ["Я шёл домой по незнакомой улице.", "Девушка пела в церковном хоре о всех уставших в чужом краю."] - for parse in model.batched_call(sentences, batch_size=16): - print(parse) - -:: - - 1 Я PRON,Case=Nom|Number=Sing|Person=1 _ - 2 шёл VERB,Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act _ - 3 домой ADV,Degree=Pos _ - 4 по ADP _ - 5 незнакомой ADJ,Case=Dat|Degree=Pos|Gender=Fem|Number=Sing _ - 6 улице NOUN,Animacy=Inan|Case=Dat|Gender=Fem|Number=Sing _ - 7 . PUNCT _ - - 1 Девушка NOUN,Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing _ - 2 пела VERB,Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act _ - 3 в ADP _ - 4 церковном ADJ,Case=Loc|Degree=Pos|Gender=Masc|Number=Sing _ - 5 хоре NOUN,Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing _ - 6 о ADP _ - 7 всех PRON,Animacy=Anim|Case=Loc|Number=Plur _ - 8 уставших VERB,Aspect=Perf|Case=Loc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act _ - 9 в ADP _ - 10 чужом ADJ,Case=Loc|Degree=Pos|Gender=Masc|Number=Sing _ - 11 краю NOUN,Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing _ - 12 . PUNCT _ - -If you want the output in UD format, try setting ``"data_format": ud`` in the ``tag_output_prettifier`` section -of :config:`configuration file ` -you import. - -Advanced models (BERT and lemmatized models). ---------------------------------------------- - -#. For Russian you can use the BERT-based model. It has much higher performance (97.8% instead of 96.2), - however, you need a more powerful GPU (ideally, 16 GB) to train it. However, the speed - of inference and training on such GPU is comparable with character-based model. - -#. Exclusively for Russian language you can obtain lemmatized UD output by using either the - :config:`BERT model ` - :config:`augmented version ` - of Pymorphy model. Both models select the Pymorphy lemma whose tag correspond to the tag - predicted by the tagger. - .. code:: python from deeppavlov import build_model, configs - model = build_model(configs.morpho_tagger.BERT.morpho_ru_syntagrus_bert, download=True) - # model = build_model(configs.morpho_tagger.UD2_0.morpho_ru_syntagrus_pymorphy_lemmatize, download=True) + model = build_model('morpho_ru_syntagrus_bert', download=True) sentences = ["Я шёл домой по незнакомой улице.", "Девушка пела в церковном хоре о всех уставших в чужом краю."] for parse in model(sentences): print(parse) @@ -187,129 +74,14 @@ Advanced models (BERT and lemmatized models). 11 краю край NOUN _ Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing _ _ _ _ 12 . . PUNCT _ _ _ _ _ _ -Command line: ----------------- - -If you want to use our models from scratch, do the following -(all the examples are for ru\_syntagrus\_pymorphy model, -change the filenames accordingly to invoke models for other languages): - -#. Download data - - .. code:: bash - - python -m deeppavlov download morpho_ru_syntagrus_pymorphy - - To perform all downloads in runtime you can also run all subsequent - commands with ``-d`` key, - -#. To apply a pre-trained ru\_syntagrus\_pymorphy model to ru\_syntagrus test - data provided it was downloaded using the previous command, run - - .. code:: bash - - python -m deeppavlov.models.morpho_tagger morpho_ru_syntagrus_pymorphy \ - > -f ~/.deeppavlov/downloads/UD2.0_source/ru_syntagrus/ru_syntagrus-ud-test.conllu - - ``-f`` argument points to the path to the test data. If you do not pass it the model expects data from ``stdin``. - This command writes the output to stdout, you can redirect it using standard ``>`` notation. - - - By default the ``deeppavlov.models.morpho_tagger`` script expects the data to be in CoNLL-U format, - however, you can specify input format by using the `-i` key. For example, your input can be in one word per line - format, in this case you set this key to ``"vertical"``. Note also that you can pass the data from - - .. code:: bash - - echo -e "Мама\nмыла\nраму\n.\n\nВаркалось\n,\nхливкие\nшорьки\nпырялись\nпо\nнаве\n." \ - > | python -m deeppavlov.models.morpho_tagger morpho_ru_syntagrus_pymorphy -i "vertical" - - :: - - 1 Мама NOUN Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing - 2 мыла VERB Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act - 3 раму NOUN Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing - 4 . PUNCT _ - - 1 Варкалось NOUN Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing - 2 , PUNCT _ - 3 хливкие ADJ Case=Nom|Degree=Pos|Number=Plur - 4 шорьки NOUN Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur - 5 пырялись VERB Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid - 6 по ADP _ - 7 наве NOUN Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing - 8 . PUNCT _ - - - - Untokenized sentences (one sentence per line) can be tagged as well, in this case input format should be ``"text"`` - - .. code:: bash - - echo -e "Мама мыла раму.\nВаркалось, хливкие шорьки пырялись по наве." \ - > | python -m deeppavlov.models.morpho_tagger morpho_ru_syntagrus_pymorphy -i "text" - - :: - 1 Мама NOUN Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing - 2 мыла VERB Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act - 3 раму NOUN Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing - 4 . PUNCT _ - - 1 Варкалось NOUN Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing - 2 , PUNCT _ - 3 хливкие ADJ Case=Nom|Degree=Pos|Number=Plur - 4 шорьки NOUN Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur - 5 пырялись VERB Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid - 6 по ADP _ - 7 наве NOUN Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing - 8 . PUNCT _ - - - You can also obtain the output in CoNLL-U format by passing the ``-o ud`` argument: - - .. code:: bash - - echo -e "Мама мыла раму.\nВаркалось, хливкие шорьки пырялись по наве." \ - > | python -m deeppavlov.models.morpho_tagger morpho_ru_syntagrus_pymorphy -i "text" -o "ud" - - :: - - 1 Мама _ NOUN _ Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing _ _ _ _ - 2 мыла _ VERB _ Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act _ _ _ _ - 3 раму _ NOUN _ Animacy=Inan|Case=Acc|Gender=Fem|Number=Sing _ _ _ _ - 4 . _ PUNCT _ _ _ _ _ _ - - 1 Варкалось _ NOUN _ Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing _ _ _ _ - 2 , _ PUNCT _ _ _ _ _ _ - 3 хливкие _ ADJ _ Case=Nom|Degree=Pos|Number=Plur _ _ _ _ - 4 шорьки _ NOUN _ Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur _ _ _ _ - 5 пырялись _ VERB _ Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|Voice=Mid _ _ _ _ - 6 по _ ADP _ _ _ _ _ _ - 7 наве _ NOUN _ Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing _ _ _ _ - 8 . _ PUNCT _ _ _ _ _ _ - - -#. To evaluate ru\_syntagrus model on ru\_syntagrus test subset, run - - .. code:: bash - - python -m deeppavlov evaluate morpho_ru_syntagrus_pymorphy - -#. To retrain model on ru\_syntagrus dataset, run one of the following - (the first is for Pymorphy-enriched model) - - .. code:: bash - - python -m deeppavlov train morpho_ru_syntagrus_pymorphy - python -m deeppavlov train morpho_ru_syntagrus - - Be careful, one epoch takes 2-60 minutes depending on your GPU. - -#. To tag Russian sentences from stdin, run - - .. code:: bash +You may also pass the tokenized sentences instead of raw ones: - python -m deeppavlov interact morpho_ru_syntagrus_pymorphy +.. code:: python -Read the detailed readme below. + sentences = [["Я", "шёл", "домой", "по", "незнакомой", "улице", "."]] + for parse in model(sentences): + print(parse) Task description ---------------- @@ -349,336 +121,3 @@ project `__. 6 were AUX Mood=Ind|Tense=Past|VerbForm=Fin 7 married VERB Tense=Past|VerbForm=Part|Voice=Pass 8 . PUNCT _ - -The full UD format (see below) includes more columns including lemma and -syntactic information. - -Training data -~~~~~~~~~~~~~ - -Our tagger accepts the data in `CONLL-U -format `__: - -:: - - 1 Four four NUM CD NumType=Card 2 nummod _ _ - 2 months month NOUN NNS Number=Plur 3 obl:npmod _ _ - 3 later later ADV RB _ 7 advmod _ SpaceAfter=No - 4 , , PUNCT , _ 7 punct _ _ - 5 we we PRON PRP Case=Nom|Number=Plur|Person=1|PronType=Prs 7 nsubj:pass _ _ - 6 were be AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 7 aux:pass _ _ - 7 married marry VERB VBN Tense=Past|VerbForm=Part|Voice=Pass 0 root _ SpaceAfter=No - 8 . . PUNCT . _ 7 punct _ _ - -It does not take into account the contents except the columns number -2, 4, 6 -(the word itself, POS label and morphological tag), however, in the -default setting the reader -expects the word to be in column 2, the POS label in column 4 and the -detailed tag description -in column 6. - -Test data -~~~~~~~~~ - -When annotating unlabeled text, our model expects the data in -10-column UD format as well. However, it does not pay attention to any column except the first one, -which should be a number, and the second, which must contain a word. -You can also pass only the words with exactly one word on each line -by adding ``"from_words": True`` to ``dataset_reader`` section. -Sentences are separated with blank lines. - -You can also pass the unlemmatized text as input. In this case it is preliminarly lemmatized using the -NLTK ``word_tokenize`` function. - -Algorithm description ---------------------- - -We adopt a neural model for morphological tagging from -`Heigold et al., 2017. An extensive empirical evaluation of -character-based morphological tagging for 14 -languages `__. -We refer the reader to the paper for complete description of the -algorithm. The tagger consists -of two parts: a character-level network which creates embeddings for -separate words and word-level -recurrent network which transforms these embeddings to morphological -tags. - -The character-level part implements the model from -`Kim et al., 2015. Character-aware language -models `__. -First it embeds the characters into dense vectors, then passes these -vectors through multiple -parallel convolutional layers and concatenates the output of these -convolutions. The convolution -output is propagated through a highway layer to obtain the final word -representation. - -You can optionally use a morphological dictionary during tagging. In -this case our model collects -a 0/1 vector with ones corresponding to the dictionary tags of a -current word. This vector is -passed through a one-layer perceptron to obtain an embedding of -dictionary information. -This embedding is concatenated with the output of character-level -network. - -As a word-level network we utilize a Bidirectional LSTM, its outputs -are projected through a dense -layer with a softmax activation. In principle, several BiLSTM layers -may be stacked as well -as several convolutional or highway layers on character level; -however, we did not observed -any sufficient gain in performance and use shallow architecture -therefore. - -Model configuration. --------------------- - -Training configuration -~~~~~~~~~~~~~~~~~~~~~~ - -We distribute pre-trained models for 11 languages trained on Universal Dependencies data. -Configuration files for reproducible training are also available in -:config:`deeppavlov/configs/morpho_tagger/UD2.0 `, for -example -:config:`deeppavlov/configs/morpho_tagger/UD2.0/morpho_en.json `. -The configuration file consists of several parts: - -Dataset Reader -^^^^^^^^^^^^^^ - -The dataset reader describes the instance of -:class:`~deeppavlov.dataset_readers.morphotagging_dataset_reader.MorphotaggerDatasetReader` class. - -:: - - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": "{DOWNLOADS_PATH}/UD2.0_source", - "language": "en", "data_types": ["train", "dev", "test"] - } - -``class_name`` field refers to the class MorphotaggerDatasetReader, -``data_path`` contains the path to data directory, the ``language`` -field is used to derive the name of training and development file. -Alternatively, you can specify these files separately by full (or absolute) paths -like - -:: - - "dataset_reader": { - "class_name": "morphotagger_dataset_reader", - "data_path": ["{DOWNLOADS_PATH}/UD2.0_source/en-ud-train.conllu", - "{DOWNLOADS_PATH}/UD2.0_source/en-ud-dev.conllu", - "{DOWNLOADS_PATH}/UD2.0_source/en-ud-test.conllu"] - "data_types": ["train", "dev", "test"] - } - -By default you need only the train file, the dev file is used to -validate -your model during training and the test file is for model evaluation -after training. Since you need some validation data anyway, without -the dev part -you need to resplit your data as described in `Dataset -Iterator <#dataset-iterator>`__ section. - -Your data should be in CONLL-U format. It refers to ``predict`` mode also, but in this case only word -column is taken into account. If your data is in single word per line format and you do not want to -reformat it, add ``"from_words": True`` to ``dataset_reader`` section. You can also specify -which columns contain words, tags and detailed tags, for documentation see -:func:`Documentation `. - -Dataset iterator -^^^^^^^^^^^^^^^^ - -:class:`Dataset iterator ` class -performs simple batching and shuffling. - -:: - - "dataset_iterator": { - "class_name": "morphotagger_dataset" - } - -By default it has no parameters, but if your training and validation -data -are in the same file, you may specify validation split here: - -:: - - "dataset_iterator": { - "class_name": "morphotagger_dataset", - "validation_split": 0.2 - } - -Chainer -^^^^^^^ - -The ``chainer`` part of the configuration file contains the -specification of the neural network model and supplementary things such as vocabularies. -Chainer refers to an instance of :class:`~deeppavlov.core.common.chainer.Chainer`, see -:doc:`configuration ` for a complete description. - -The major part of ``chainer`` is ``pipe``. The ``pipe`` contains -vocabularies and the network itself as well -as some pre- and post- processors. The first part lowercases the input -and normalizes it (see :class:`~deeppavlov.models.preprocessors.capitalization.CapitalizationPreprocessor`). - -:: - - "pipe": [ - { - "id": "lowercase_preprocessor", - "class_name": "lowercase_preprocessor", - "in": ["x"], - "out": ["x_processed"] - }, - -The second part is the tag vocabulary which transforms tag labels the -model should predict to tag indexes. - -:: - - { - "id": "tag_vocab", - "class_name": "simple_vocab", - "fit_on": ["y"], - "special_tokens": ["PAD", "BEGIN", "END"], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tag_en.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/tag_en.dict" - }, - -The third part is the character vocabulary used to represent words as sequences of indexes. Only the -symbols which occur at least ``min_freq`` times in the training set are kept. - -:: - - { - "id": "char_vocab", - "class_name": "simple_vocab", - "min_freq": 3, - "fit_on": ["x_processed"], - "special_tokens": ["PAD", "BEGIN", "END"], - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/char_en.dict", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/char_en.dict" - }, - - -If you want to utilize external morphological knowledge, you can do it in two ways. -The first is to use :class:`~deeppavlov.models.vectorizers.word_vectorizer.DictionaryVectorizer`. -:class:`~deeppavlov.models.vectorizers.word_vectorizer.DictionaryVectorizer` is instantiated from a dictionary file. -Each line of a dictionary file contains two columns: -a word and a space-separated list of its possible tags. Tags can be in any possible format. The config part for -:class:`~deeppavlov.models.vectorizers.word_vectorizer.DictionaryVectorizer` looks as - -:: - - { - "id": "dictionary_vectorizer", - "class_name": "dictionary_vectorizer", - "load_path": PATH_TO_YOUR_DICTIONARY_FILE, - "save_path": PATH_TO_YOUR_DICTIONARY_FILE, - "in": ["x"], - "out": ["x_possible_tags"] - } - - -The second variant for external morphological dictionary, available only for Russian, -is `Pymorphy2 `_. In this case the vectorizer list all Pymorphy2 tags -for a given word and transforms them to UD2.0 format using -`russian-tagsets `_ library. Possible UD2.0 tags -are listed in a separate distributed with the library. This part of the config look as -(see :config:`config `)) - -:: - - { - "id": "pymorphy_vectorizer", - "class_name": "pymorphy_vectorizer", - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ru_syntagrus/tags_russian.txt", - "max_pymorphy_variants": 5, - "in": ["x"], - "out": ["x_possible_tags"] - } - -The next part performs the tagging itself. Together with general parameters it describes -the input parameters of :class:`~deeppavlov.models.morpho_tagger.morpho_tagger.MorphoTagger`) class. - -:: - - { - "in": ["x_processed"], - "in_y": ["y"], - "out": ["y_predicted"], - "class_name": "morpho_tagger", - "main": true, - "save_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ud_en.hdf5", - "load_path": "{MODELS_PATH}/morpho_tagger/UD2.0/ud_en.hdf5", - "tags": "#tag_vocab", - "symbols": "#char_vocab", - "verbose": 1, - "char_embeddings_size": 32, "char_window_size": [1, 2, 3, 4, 5, 6, 7], - "word_lstm_units": 128, "conv_dropout": 0.0, "char_conv_layers": 1, - "char_highway_layers": 1, "highway_dropout": 0.0, "word_lstm_layers": 1, - "char_filter_multiple": 50, "intermediate_dropout": 0.0, "word_dropout": 0.2, - "lstm_dropout": 0.3, "regularizer": 0.01, "lm_dropout": 0.3 - } - - -When an additional vectorizer is used, the first line is changed to -``"in": ["x_processed", "x_possible_tags"]`` and an additional parameter -``"word_vectorizers": [["#pymorphy_vectorizer.dim", 128]]`` is appended. - -Config includes general parameters of :class:`~deeppavlov.core.models.component.Component` class, -described in the :doc:`configuration ` and specific -:class:`~deeppavlov.models.morpho_tagger.morpho_tagger.MorphoTagger` -parameters. The latter include - -- ``tags`` - tag vocabulary. ``#tag_vocab`` refers to an already defined model with ``"id" = "tag_vocab"``. -- ``symbols`` - character vocabulary. ``#char_vocab`` refers to an already defined model with ``"id" = "char_vocab"``. - -and other specific parameters of the network, available in :class:`~deeppavlov.models.morpho_tagger.morpho_tagger.MorphoTagger` documentation. - -The ``"train"`` section of ``"chainer"`` contains training parameters, such as number of epochs, -batch_size and logging frequency, see general readme for more details. - -**chainer** also includes the ``"prettifier"`` subsection, which describes the parameters -of :class:`~deeppavlov.core.models.morpho_tagger.common.TagOutputPrettifier` -which transforms the predictions of the tagger to a readable form. - -:: - - { - "in": ["x", "y_predicted"], - "out": ["y_prettified"], - "class_name": "tag_output_prettifier", - "end": "\\n" - } - - -It takes two inputs — source sequence of words and predicted sequence of tags -and produces the output of the format - -:: - - 1 Это PRON Animacy=Inan|Case=Acc|Gender=Neut|Number=Sing - 2 чутко ADV Degree=Pos - 3 фиксируют VERB - Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act - 4 энциклопедические ADJ Case=Nom|Degree=Pos|Number=Plur - 5 издания NOUN Animacy=Inan|Case=Nom|Gender=Neut|Number=Plur - 6 . PUNCT _ - - 1 Four NUM NumType=Card - 2 months NOUN Number=Plur - 3 later ADV * - 4 , PUNCT * - 5 we PRON Case=Nom|Number=Plur|Person=1|PronType=Prs - 6 were AUX Mood=Ind|Tense=Past|VerbForm=Fin - 7 married VERB Tense=Past|VerbForm=Part|Voice=Pass - 8 . PUNCT _ - -To generate output in 10 column CONLL-U format add ``"format_mode": "ud"`` to the described section. diff --git a/docs/features/models/multitask_bert.rst b/docs/features/models/multitask_bert.rst index 3f0f33021c..71e49c8622 100644 --- a/docs/features/models/multitask_bert.rst +++ b/docs/features/models/multitask_bert.rst @@ -25,7 +25,6 @@ insults, analyzes sentiment, and recognises named entities. Multi-task BERT conf :config:`mt_bert_train_tutorial.json ` and for inference :config:`mt_bert_inference_tutorial.json ` are based on configs :config:`insults_kaggle_bert.json `, -:config:`sentiment_sst_multi_bert.json `, :config:`ner_conll2003_bert.json `. We start with the ``metadata`` field of the configuration file. Multi-task BERT model is saved in diff --git a/docs/features/models/ner.rst b/docs/features/models/ner.rst index 3663bb84ef..b096a00a3e 100644 --- a/docs/features/models/ner.rst +++ b/docs/features/models/ner.rst @@ -34,7 +34,7 @@ Here is the list of all available configs: +------------------------------------------------------------------------+--------------------+----------+-----------------+------------+------------+ | Model | Dataset | Language | Embeddings Size | Model Size | F1 score | +========================================================================+====================+==========+=================+============+============+ - | :config:`ner_rus_bert_torch ` | Collection3 [1]_ | Ru | 700 MB | 2.0 GB | **97.7** | + | :config:`ner_rus_bert ` | Collection3 [1]_ | Ru | 700 MB | 2.0 GB | **97.7** | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`ner_collection3_m1 ` | | | 1.1 GB | 1 GB | 97.8 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ @@ -44,8 +44,6 @@ Here is the list of all available configs: +------------------------------------------------------------------------+ +----------+-----------------+------------+------------+ | :config:`ner_ontonotes_bert_torch ` | | En | 400 MB | 1.3 GB | 87.9 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ - | :config:`ner_ontonotes_m1 ` | | | 347 MB | 379.4 MB | 87.7 | - +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`ner_ontonotes ` | | | 331 MB | 7.8 MB | 86.7 | +------------------------------------------------------------------------+--------------------+ +-----------------+------------+------------+ | :config:`ner_conll2003_bert ` | CoNLL-2003 | | 400 MB | 850 MB | 91.7 | @@ -55,10 +53,6 @@ Here is the list of all available configs: | :config:`ner_conll2003 ` | | | 331 MB | 3.1 MB | 89.9 | +------------------------------------------------------------------------+ + +-----------------+------------+------------+ | :config:`conll2003_m1 ` | | | 339 MB | 359.7 MB | **91.9** | - +------------------------------------------------------------------------+--------------------+ +-----------------+------------+------------+ - | :config:`ner_dstc2 ` | DSTC2 | | --- | 626 KB | 97.1 | - +------------------------------------------------------------------------+--------------------+----------+-----------------+------------+------------+ - | :config:`vlsp2016_full ` | VLSP-2016 | Vi | 520 MB | 37.2 MB | 93.4 | +------------------------------------------------------------------------+--------------------+----------+-----------------+------------+------------+ Models can be used from Python using the following code: @@ -265,81 +259,6 @@ quality. Typical partition of a dataset into train, validation, and test are 80%, 10%, 10%, respectively. - -Few-shot Language-Model based ------------------------------ - -It is possible to get a cold-start baseline from just a few samples of labeled data in a couple of seconds. The solution -is based on a Language Model trained on open domain corpus. On top of the LM a SVM classification layer is placed. It is -possible to start from as few as 10 sentences containing entities of interest. - -The data for training this model should be collected in the following way. Given a collection of `N` sentences without -markup, sequentially markup sentences until the total number of sentences with entity of interest become equal -`K`. During the training both sentences with and without markup are used. - - -Mean chunk-wise F1 scores for Russian language on 10 sentences with entities : - -+---------+-------+ -|PER | 84.85 | -+---------+-------+ -|LOC | 68.41 | -+---------+-------+ -|ORG | 32.63 | -+---------+-------+ - -(the total number of training sentences is bigger and defined by the distribution of sentences with / without entities). - -The model can be trained using CLI: - -.. code:: bash - - python -m deeppavlov train ner_few_shot_ru - -you have to provide the `train.txt`, `valid.txt`, and `test.txt` files in the format described in the `Training data`_ -section. The files must be in the `ner_few_shot_data` folder as described in the `dataset_reader` part of the config -:config:`ner/ner_few_shot_ru_train.json ` . - -To train and use the model from python code the following snippet can be used: - -.. code:: python - - from deeppavlov import configs, train_model - - ner_model = train_model(configs.ner.ner_few_shot_ru, download=True) - - ner_model(['Example sentence']) - -Warning! This model can take a lot of time and memory if the number of sentences is greater than 1000! - -If a lot of data is available the few-shot setting can be simulated with special `dataset_iterator`. For this purpose -the config -:config:`ner/ner_few_shot_ru_train.json ` . The following code can be used for this -simulation: - -.. code:: python - - from deeppavlov import configs, train_model - - ner_model = train_model(configs.ner.ner_few_shot_ru_simulate, download=True) - -In this config the `Collection dataset `__ is used. However, if -there are files `train.txt`, `valid.txt`, and `test.txt` in the `ner_few_shot_data` folder they will be used instead. - - -To use existing few-shot model use the following python interface can be used: - -.. code:: python - - from deeppavlov import configs, build_model - - ner_model = build_model(configs.ner.ner_few_shot_ru) - - ner_model([['Example', 'sentence']]) - ner_model(['Example sentence']) - - - NER-based Model for Sentence Boundary Detection Task ---------------------------------------------------- diff --git a/docs/features/models/neural_ranking.rst b/docs/features/models/neural_ranking.rst index bef692cb83..e4a480889f 100644 --- a/docs/features/models/neural_ranking.rst +++ b/docs/features/models/neural_ranking.rst @@ -104,7 +104,7 @@ As an example of configuration file see :config:`ranking_ubuntu_v2_mt_word2vec_smn.json `. If the model with multi-turn context is used -(such as :class:`~deeppavlov.models.ranking.bilstm_gru_siamese_network.BiLSTMGRUSiameseNetwork` +(such as :class:`~deeppavlov.models.ranking.sequential_matching_network.SMNNetwork` with the parameter ``num_context_turns`` set to the value higher than 1 in the configuration JSON file) then the ``context`` to evaluate should consist of ``num_context_turns`` strings connected by the ampersand. Some of these strings can be empty, i.e. equal to ``''``. @@ -120,15 +120,15 @@ Before using the model make sure that all required packages are installed runnin .. code:: bash - python -m deeppavlov install paraphrase_ident_paraphraser + python -m deeppavlov install paraphraser_bert -To train the model on the `paraphraser.ru`_ dataset with fasttext embeddings one can use the following code in python: +To train the model on the `paraphraser.ru`_ dataset one can use the following code in python: .. code:: python from deeppavlov import configs, train_model - para_model = train_model(configs.ranking.paraphrase_ident_paraphraser, download=True) + para_model = train_model('paraphraser_bert', download=True) Training and inference on your own data --------------------------------------- @@ -144,14 +144,13 @@ three separate files in the default data format described below: binary, i.e. 1 or 0 corresponding to the correct or incorrect ``response`` for the given ``context``, or it can be multi-class label. In the latter case, each unique ``context`` has the unique class ``label`` and the only correct ``response`` is indicated for each ``context``. Currently, all ranking and paraphrase identification models support `cross-entropy loss` training with binary labels. -Some models, such as :class:`~deeppavlov.models.ranking.bilstm_siamese_network.BiLSTMSiameseNetwork`, -:class:`~deeppavlov.models.ranking.bilstm_gru_siamese_network.BiLSTMGRUSiameseNetwork` -and :class:`~deeppavlov.models.ranking.mpm_siamese_network.MPMSiameseNetwork` support also training with `triplet loss` -(the parameter ``triplet_loss`` should be set to ``true`` for the model in the configuration JSON file in this case) -which can give potentially few percent of performance over the `cross-entropy loss` training. +Some models, such as :class:`~deeppavlov.models.ranking.bilstm_siamese_network.BiLSTMSiameseNetwork` +support also training with `triplet loss` (the parameter ``triplet_loss`` should be set to ``true`` for the model +in the configuration JSON file in this case) which can give potentially few percent of performance +over the `cross-entropy loss` training. If the model with multi-turn context is used -(such as :class:`~deeppavlov.models.ranking.bilstm_gru_siamese_network.BiLSTMGRUSiameseNetwork` +(such as :class:`~deeppavlov.models.ranking.sequential_matching_network.SMNNetwork` with the parameter ``num_context_turns`` set to the value higher than 1 in the configuration JSON file) then the ``context`` should be specified with ``num_context_turns`` strings separated by the tab key instead of a single string. Some of these strings can be empty, i.e. equal to ``''``. diff --git a/docs/features/models/slot_filling.rst b/docs/features/models/slot_filling.rst deleted file mode 100644 index 39b6e0c230..0000000000 --- a/docs/features/models/slot_filling.rst +++ /dev/null @@ -1,264 +0,0 @@ -Neural Named Entity Recognition and Slot Filling -================================================ - -This model solves Slot-Filling task using Levenshtein search and different neural network architectures for NER. -To read about NER without slot filling please address :doc:`NER documentation `. -This model serves for solving DSTC 2 Slot-Filling task. In most of the cases, NER task can be formulated as: - -*Given a sequence of tokens (words, and maybe punctuation symbols) -provide a tag from a predefined set of tags for each token in the -sequence.* - -For NER task there are some common types of entities used as tags: - -- persons -- locations -- organizations -- expressions of time -- quantities -- monetary values - -Furthermore, to distinguish adjacent entities with the same tag many -applications use BIO tagging scheme. Here "B" denotes beginning of an -entity, "I" stands for "inside" and is used for all words comprising the -entity except the first one, and "O" means the absence of entity. -Example with dropped punctuation: - -:: - - Restaraunt O - in O - the O - west B-LOC - of O - the O - city O - serving O - modern B-FOOD - european I-FOOD - cuisine O - -In the example above, ``FOOD`` means food tag, ``LOC`` means location -tag, and "B-" and "I-" are prefixes identifying beginnings and -continuations of the entities. - -Slot Filling is a typical step after the NER. It can be formulated as: - -*Given an entity of a certain type and a set of all possible values of -this entity type provide a normalized form of the entity.* - -In this model, the Slot Filling task is solved by Levenshtein -Distance search across all known entities of a given type. - -For example, there is an entity of "food" type: - -*chainese* - -It is definitely misspelled. The set of all known food entities is -{'chinese', 'russian', 'european'}. The nearest known entity from the -given set is *chinese*. So the output of the Slot Filling system will be -*chinese*. - -Configuration of the model --------------------------- - -Configuration of the model can be performed in code or in JSON configuration file. -To train the model you need to specify four groups of parameters: - -- ``dataset_reader`` -- ``dataset_iterator`` -- ``chainer`` -- ``train`` - -In the subsequent text we show the parameter specification in config -file. However, the same notation can be used to specify parameters in -code by replacing the JSON with python dictionary. - -Dataset Reader -~~~~~~~~~~~~~~ - -The dataset reader is a class which reads and parses the data. It -returns a dictionary with three fields: "train", "test", and "valid". -The basic dataset reader is "ner\_dataset\_reader." The dataset reader -config part with "ner\_dataset\_reader" should look like: - -:: - - "dataset_reader": { - "class_name": "dstc2_datasetreader", - "data_path": "dstc2" - } - -where ``class_name`` refers to the basic ner dataset reader class and ``data_path`` -is the path to the folder with DSTC 2 dataset. - -Dataset Iterator -~~~~~~~~~~~~~~~~ - -For simple batching and shuffling you can use "dstc2\_ner\_iterator". -The part of the configuration file for the dataset iterator looks like: -``"dataset_iterator": { "class_name": "dstc2_ner_iterator" }`` - -There are no additional parameters in this part. - -Chainer -~~~~~~~ - -The chainer part of the configuration file contains the specification of -the neural network model and supplementary things such as vocabularies. -The chainer part must have the following form: - -:: - - "chainer": { - "in": ["x"], - "in_y": ["y"], - "pipe": [ - ... - ], - "out": ["y_predicted"] - } - -The inputs and outputs must be specified in the pipe. "in" means regular -input that is used for inference and train mode. "in\_y" is used for -training and usually contains ground truth answers. "out" field stands -for model prediction. The model inside the pipe must have output -variable with name "y\_predicted" so that "out" knows where to get -predictions. - -The major part of "chainer" is "pipe". The "pipe" contains the -pre-processing modules, vocabularies and model. However, we can use -existing pipelines: - -:: - - "pipe": [ - { - "in": ["x"], - "class_name": "lazy_tokenizer", - "out": ["x"] - }, - { - "in": ["x"], - "config_path": "../deeppavlov/configs/ner/ner_dstc2.json", - "out": ["tags"] - }, - ... - ] - -This part will initialize already existing pre-trained NER module. The -only thing need to be specified is path to existing config. The -preceding lazy tokenizer serves to extract tokens for raw string of -text. - -The following component in the pipeline is the ``slotfiller``: - -:: - - "pipe": [ - { - "in": ["x_lower", "tags"], - "class_name": "dstc_slotfilling", - "save_path": "slotfill_dstc2/dstc_slot_vals.json", - "load_path": "slotfill_dstc2/dstc_slot_vals.json", - "out": ["slots"] - } - -The ``slotfiller`` takes the tags and tokens to perform normalization of -extracted entities. The normalization is performed via fuzzy Levenshtein -search in dstc\_slot\_vals dictionary. The output of this component is -dictionary of slot values found in the input utterances. - -The main part of the ``dstc_slotfilling`` componet is the slot values -dictionary. The dicttionary has the following structure: - -:: - - { - "entity_type_0": { - "entity_value_0": [ - "entity_value_0_variation_0", - "entity_value_0_variation_1", - "entity_value_0_variation_2" - ], - "entity_value_1": [ - "entity_value_1_variation_0" - ], - ... - } - "entity_type_1": { - ... - -Slotfiller will perform fuzzy search through the all variations of all -entity values of given entity type. The entity type is determined by the -NER component. - -The last part of the config is metadata: - -:: - - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "NER_CONFIG_PATH": "{DEEPPAVLOV_PATH}/configs/ner/ner_dstc2.json", - "DATA_PATH": "{ROOT_PATH}/downloads/dstc2", - "SLOT_VALS_PATH": "{DATA_PATH}/dstc_slot_vals.json", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/slotfill_dstc2" - }, - "download": [ - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz", - "subdir": "{DATA_PATH}" - }, - { - "url": "http://files.deeppavlov.ai/deeppavlov_data/slotfill_dstc2.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] - } - -It contains information for deployment of the model and urls for -download pre-trained models. - -You can see all parts together in ``deeeppavlov/configs/ner/slotfill_dstc2.json`` - -Usage of the model ------------------- - -Please see an example of training a Slot Filling model and using it for -prediction: - -.. code:: python - - from deeppavlov import build_model, configs - - PIPELINE_CONFIG_PATH = configs.ner.slotfill_dstc2 - slotfill_model = build_model(PIPELINE_CONFIG_PATH, download=True) - slotfill_model(['I would like some chinese food', 'The west part of the city would be nice']) - -This example assumes that the working directory is the root of the -project. - -Slotfilling without NER ------------------------ - -An alternative approach to Slot Filling problem could be fuzzy search -for each instance of each slot value inside the text. This approach is -realized in ``slotfill_raw`` component. The component uses needle in -haystack - -The main advantage of this approach is elimination of a separate Named -Entity Recognition module. However, absence of NER module make this -model less robust to noise (words with similar spelling) especially for -long utterances. - -Usage example: - -.. code:: python - - from deeppavlov import build_model, configs - - PIPELINE_CONFIG_PATH = configs.ner.slotfill_dstc2_raw - slotfill_model = build_model(PIPELINE_CONFIG_PATH, download=True) - slotfill_model(['I would like some chinese food', 'The west part of the city would be nice']) diff --git a/docs/features/models/spelling_correction.rst b/docs/features/models/spelling_correction.rst index e5d16ba3db..43827a506b 100644 --- a/docs/features/models/spelling_correction.rst +++ b/docs/features/models/spelling_correction.rst @@ -49,7 +49,7 @@ lines to stdout: from deeppavlov import build_model, configs - CONFIG_PATH = configs.spelling_correction.brillmoore_kartaslov_ru + CONFIG_PATH = configs.spelling_correction.levenshtein_corrector_ru model = build_model(CONFIG_PATH, download=True) for line in sys.stdin: @@ -185,14 +185,9 @@ on Automatic Spelling Correction for Russian: +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | :config:`Damerau Levenshtein 1 + lm` | 59.38 | 53.44 | 56.25 | 39.3 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ -| :config:`Brill Moore top 4 + lm` | 51.92 | 53.94 | 52.91 | 0.6 | -+-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | Hunspell + lm | 41.03 | 48.89 | 44.61 | 2.1 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | JamSpell | 44.57 | 35.69 | 39.64 | 136.2 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ -| :config:`Brill Moore top 1 ` | 41.29 | 37.26 | 39.17 | 2.4 | -+-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | Hunspell | 30.30 | 34.02 | 32.06 | 20.3 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ - diff --git a/docs/features/models/squad.rst b/docs/features/models/squad.rst index 128ea07627..65922201d2 100644 --- a/docs/features/models/squad.rst +++ b/docs/features/models/squad.rst @@ -225,7 +225,7 @@ Link to SDSJ Task B dataset: http://files.deeppavlov.ai/datasets/sber_squad-v1.1 +========================================================================+================+=================+ | :config:`DeepPavlov RuBERT ` | 66.30+-0.24 | 84.60+-0.11 | +------------------------------------------------------------------------+----------------+-----------------+ -| :config:`DeepPavlov multilingual BERT `| 64.35+-0.39 | 83.39+-0.08 | +| :config:`DeepPavlov multilingual BERT `| 66.24 | 84.71 | +------------------------------------------------------------------------+----------------+-----------------+ | :config:`DeepPavlov R-Net ` | 60.62 | 80.04 | +------------------------------------------------------------------------+----------------+-----------------+ diff --git a/docs/features/models/tfidf_ranking.rst b/docs/features/models/tfidf_ranking.rst index d594b16b12..699c0d1b22 100644 --- a/docs/features/models/tfidf_ranking.rst +++ b/docs/features/models/tfidf_ranking.rst @@ -161,9 +161,7 @@ Scores for **TF-IDF Ranker** model: | Model | Dataset | Recall@5 | +------------------------------------------------------------------------------+----------------+-----------------+ | :config:`enwiki20180211 ` | | 75.6 | -+------------------------------------------------------------------------------+ +-----------------+ -| :config:`enwiki20161221 ` | SQuAD (dev) | 76.2 | -+------------------------------------------------------------------------------+ +-----------------+ ++------------------------------------------------------------------------------+ SQuAD (dev) +-----------------+ | `DrQA`_ enwiki20161221 | | 77.8 | +------------------------------------------------------------------------------+----------------+-----------------+ diff --git a/docs/features/overview.rst b/docs/features/overview.rst index 7a515a40cb..722405774a 100644 --- a/docs/features/overview.rst +++ b/docs/features/overview.rst @@ -23,7 +23,7 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01 +---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ | Dataset | Lang | Model | Test F1 | +=========================================================+=======+============================================================================================+=============+ -| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 98.1 | +| Persons-1000 dataset with additional LOC and ORG markup | Ru | :config:`ner_rus_bert.json ` | 97.7 | + + +--------------------------------------------------------------------------------------------+-------------+ | (Collection 3) | | :config:`ner_rus.json ` | 95.1 | + + +--------------------------------------------------------------------------------------------+-------------+ @@ -42,23 +42,8 @@ which is inspired by Bi-LSTM+CRF architecture from https://arxiv.org/pdf/1603.01 | | | :config:`ner_conll2003_torch_bert.json ` | 88.6 | + + +--------------------------------------------------------------------------------------------+-------------+ | | | :config:`ner_conll2003.json ` | 89.9 | -+---------------------------------------------------------+ +--------------------------------------------------------------------------------------------+-------------+ -| DSTC2 | | :config:`ner_dstc2.json ` | 97.1 | +---------------------------------------------------------+-------+--------------------------------------------------------------------------------------------+-------------+ -Slot filling models :doc:`[docs] ` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Based on fuzzy Levenshtein search to extract normalized slot values from text. The models either rely on NER results -or perform needle in haystack search. - -+---------------------------------------------------------------------------------------------------------------------------+------------------+ -| Dataset | Slots Accuracy | -+===========================================================================================================================+==================+ -| :config:`DSTC 2 ` | 98.85 | -+---------------------------------------------------------------------------------------------------------------------------+------------------+ - - Classification model :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -70,51 +55,13 @@ Several pre-trained models are available and presented in Table below. +------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ | Task | Dataset | Lang | Model | Metric | Valid | Test | Downloads | +==================+=====================+======+====================================================================================================+=============+==================+=================+===========+ -| 28 intents | `DSTC 2`_ | En | :config:`DSTC 2 emb ` | Accuracy | 0.7613 | 0.7733 | 800 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9629 | 0.9617 | 8.5 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`BERT ` | | 0.9673 | 0.9636 | 800 Mb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| 7 intents | `SNIPS-2017`_ [1]_ | | :config:`DSTC 2 emb ` | F1-macro | 0.8591 | -- | 800 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb ` | | 0.9820 | -- | 8.5 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Tfidf + SelectKBest + PCA + Wiki emb ` | | 0.9673 | -- | 8.6 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Wiki emb weighted by Tfidf ` | | 0.9786 | -- | 8.5 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Insult detection | `Insults`_ | | :config:`Reddit emb ` | ROC-AUC | 0.9263 | 0.8556 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English BERT ` | | 0.9255 | 0.8612 | 1200 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English Conversational BERT ` | | 0.9389 | 0.8941 | 1200 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`English BERT on PyTorch ` | | 0.9329 | 0.877 | 1.1 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| 5 topics | `AG News`_ | | :config:`Wiki emb ` | Accuracy | 0.8922 | 0.9059 | 8.5 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Intent | `Yahoo-L31`_ | | :config:`Yahoo-L31 on conversational BERT ` | ROC-AUC | 0.9436 | -- | 1200 Mb | +| Insult detection | `Insults`_ | En | :config:`English BERT` | ROC-AUC | 0.9327 | 0.8602 | 1.1 Gb | +------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ | Sentiment | `SST`_ | | :config:`5-classes SST on conversational BERT ` | Accuracy | 0.6456 | 0.6715 | 400 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`5-classes SST on multilingual BERT ` | | 0.5738 | 0.6024 | 660 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`3-classes SST SWCNN on PyTorch ` | | 0.7379 | 0.6312 | 4.3 Mb | -+ +---------------------+ +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | `Yelp`_ | | :config:`5-classes Yelp on conversational BERT ` | | 0.6925 | 0.6842 | 400 Mb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`5-classes Yelp on multilingual BERT ` | | 0.5896 | 0.5874 | 660 Mb | +------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ | Sentiment | `Twitter mokoron`_ | Ru | :config:`RuWiki+Lenta emb w/o preprocessing ` | | 0.9965 | 0.9961 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`RuWiki+Lenta emb with preprocessing ` | | 0.7823 | 0.7759 | 6.2 Gb | + +---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| | `RuSentiment`_ | | :config:`RuWiki+Lenta emb ` | F1-weighted | 0.6541 | 0.7016 | 6.2 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`Twitter emb super-convergence ` [2]_ | | 0.7301 | 0.7576 | 3.4 Gb | -+ + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ -| | | | :config:`ELMo ` | | 0.7519 | 0.7875 | 700 Mb | +| | `RuSentiment`_ | | :config:`ELMo ` | F1-weighted | 0.7519 | 0.7875 | 700 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ | | | | :config:`Multi-language BERT ` | | 0.6809 | 0.7193 | 1900 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ @@ -123,13 +70,8 @@ Several pre-trained models are available and presented in Table below. | | | | :config:`Conversational DistilRuBERT-tiny ` | | 0.703 ± 0.0031 | 0.7348 ± 0.0028 | 690 Mb | + + + +----------------------------------------------------------------------------------------------------+ +------------------+-----------------+-----------+ | | | | :config:`Conversational DistilRuBERT-base ` | | 0.7376 ± 0.0045 | 0.7645 ± 0.035 | 1.0 Gb | -+------------------+---------------------+ +----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -| Intent | Ru like`Yahoo-L31`_ | | :config:`Conversational vs Informational on ELMo ` | ROC-AUC | 0.9412 | -- | 700 Mb | +------------------+---------------------+------+----------------------------------------------------------------------------------------------------+-------------+------------------+-----------------+-----------+ -.. [1] Coucke A. et al. Snips voice platform: an embedded spoken language understanding system for private-by-design voice interfaces //arXiv preprint arXiv:1805.10190. – 2018. -.. [2] Smith L. N., Topin N. Super-convergence: Very fast training of residual networks using large learning rates. – 2018. - .. _`DSTC 2`: http://camdial.org/~mh521/dstc/ .. _`SNIPS-2017`: https://github.com/snipsco/nlu-benchmark/tree/master/2017-06-custom-intent-engines .. _`Insults`: https://www.kaggle.com/c/detecting-insults-in-social-commentary @@ -139,7 +81,6 @@ Several pre-trained models are available and presented in Table below. .. _`Yahoo-L31`: https://webscope.sandbox.yahoo.com/catalog.php?datatype=l .. _`Yahoo-L6`: https://webscope.sandbox.yahoo.com/catalog.php?datatype=l .. _`SST`: https://nlp.stanford.edu/sentiment/index.html -.. _`Yelp`: https://www.yelp.com/dataset As no one had published intent recognition for DSTC-2 data, the comparison of the presented model is given on **SNIPS** dataset. The @@ -192,14 +133,10 @@ on Automatic Spelling Correction for Russian: +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | :config:`Damerau Levenshtein 1 + lm` | 53.26 | 53.74 | 53.50 | 29.3 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ -| :config:`Brill Moore top 4 + lm` | 51.92 | 53.94 | 52.91 | 0.6 | -+-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | Hunspell + lm | 41.03 | 48.89 | 44.61 | 2.1 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | JamSpell | 44.57 | 35.69 | 39.64 | 136.2 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ -| :config:`Brill Moore top 1 ` | 41.29 | 37.26 | 39.17 | 2.4 | -+-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ | Hunspell | 30.30 | 34.02 | 32.06 | 20.3 | +-----------------------------------------------------------------------------------------+-----------+--------+-----------+---------------------+ @@ -245,8 +182,6 @@ Available pre-trained models for ranking: +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_bert_sep ` | 66.5 | 66.5 | -- | -- | 396 MB | +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ - | `Ubuntu V2`_ | :config:`ranking_ubuntu_v2_mt_interact ` | 59.2 | 58.7 | -- | -- | 8906 MB | - +-------------------+----------------------------------------------------------------------------------------------------------------------+-----------+-------+-------+-------+-----------+ .. _`Ubuntu V2`: https://github.com/rkadlec/ubuntu-ranking-dataset-creator @@ -258,8 +193,6 @@ Available pre-trained models for paraphrase identification: +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | Dataset | Model config | Val (accuracy) | Test (accuracy) | Val (F1) | Test (F1) | Val (log_loss) | Test (log_loss) | Downloads | +========================+======================================================================================================+================+=================+============+============+================+=================+===========+ - | `paraphraser.ru`_ | :config:`paraphrase_ident_paraphraser_ft ` | 83.8 | 75.4 | 87.9 | 80.9 | 0.468 | 0.616 | 5938M | - +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphrase_bert_multilingual ` | 87.4 | 79.3 | 90.2 | 83.4 | -- | -- | 1330M | +------------------------+------------------------------------------------------------------------------------------------------+----------------+-----------------+------------+------------+----------------+-----------------+-----------+ | `paraphraser.ru`_ | :config:`paraphrase_rubert ` | 90.2 | 84.9 | 92.3 | 87.9 | -- | -- | 1325M | @@ -340,9 +273,9 @@ R-Net model is based on `R-NET: Machine Reading Comprehension with Self-matching +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `SQuAD-v1.1`_ | :config:`DeepPavlov R-Net ` | en | 71.49 | 80.34 | ~2.5Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SDSJ Task B`_ | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | +| `SDSJ Task B`_ | :config:`DeepPavlov RuBERT ` | ru | 66.30 ± 0.24 | 84.60 ± 0.11 | 1325Mb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ -| `SDSJ Task B`_ | :config:`DeepPavlov multilingual BERT ` | ru | 64.35 ± 0.39 | 83.39 ± 0.08 | 1323Mb | +| `SDSJ Task B`_ | :config:`DeepPavlov multilingual BERT ` | ru | 66.24 | 84.71 | 1.6 Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ | `SDSJ Task B`_ | :config:`DeepPavlov R-Net ` | ru | 60.62 | 80.04 | ~5Gb | +----------------+---------------------------------------------------------------------------------------------+-------+----------------+-----------------+-----------------+ @@ -362,14 +295,9 @@ model. This model outputs empty string in case if there is no answer in context. Morphological tagging model :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We have a BERT-based model for Russian and character-based models for 11 languages. -The character model is based on `Heigold et al., 2017. An extensive empirical evaluation of -character-based morphological tagging for 14 languages `__. -It is a state-of-the-art model for Russian and near state of the art for several other languages. +We have a BERT-based model for Russian language. Model takes as input tokenized sentences and outputs the corresponding sequence of morphological labels in `UD format `__. -The table below contains word and sentence accuracy on UD2.0 datasets. -For more scores see :doc:`full table `. .. table:: :widths: auto @@ -383,32 +311,8 @@ For more scores see :doc:`full table `. | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ | | :config:`BERT-based model ` | 97.83 | 72.02 | 661 | +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | `Pymorphy`_ + `russian_tagsets`_ (first tag) | 60.93 | 0.00 | | - + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (Russian) | `UD Pipe 1.2`_ (Straka et al., 2017) | 93.57 | 43.04 | | - + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | :config:`Basic model ` | 95.17 | 50.58 | 48.7 | - + +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | :config:`Pymorphy-enhanced model ` | **96.23** | 58.00 | 48.7 | - +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (Czech) | `UD Pipe 1.2`_ (Straka et al., 2017) | 91.86 | 42.28 | | - | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | :config:`Basic model ` | **94.35** | 51.56 | 41.8 | - +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (English) | `UD Pipe 1.2`_ (Straka et al., 2017) | 92.89 | 55.75 | | - | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | :config:`Basic model ` | **93.00** | 55.18 | 16.9 | - +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | `UD2.0`_ (German) | `UD Pipe 1.2`_ (Straka et al., 2017) | 76.65 | 10.24 | | - | +--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ - | | :config:`Basic model ` | **83.83** | 15.25 | 18.6 | - +----------------------+--------------------------------------------------------------------------------------------------------------+---------------+----------------+--------------------+ -.. _`Pymorphy`: https://pymorphy2.readthedocs.io/en/latest/ -.. _`russian_tagsets`: https://github.com/kmike/russian-tagsets -.. _`UD2.0`: https://lindat.mff.cuni.cz/repository/xmlui/handle/11234/1-1983 .. _`UD2.3`: http://hdl.handle.net/11234/1-2895 -.. _`UD Pipe 1.2`: http://ufal.mff.cuni.cz/udpipe .. _`UD Pipe 2.3`: http://ufal.mff.cuni.cz/udpipe .. _`UD Pipe Future`: https://github.com/CoNLL-UD-2018/UDPipe-Future @@ -452,33 +356,12 @@ Skills Goal-oriented bot :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Based on Hybrid Code Networks (HCNs) architecture from `Jason D. Williams, Kavosh Asadi, -Geoffrey Zweig, Hybrid Code Networks: practical and efficient end-to-end dialog control +Based on Hybrid Code Networks (HCNs) architecture from `Jason D. Williams, Kavosh Asadi, +Geoffrey Zweig, Hybrid Code Networks: practical and efficient end-to-end dialog control with supervised and reinforcement learning – 2017 `__. It allows to predict responses in a goal-oriented dialog. The model is customizable: embeddings, slot filler and intent classifier can be switched on and off on demand. -Available pre-trained models and their comparison with existing benchmarks: - -+-----------------------------------+------+------------------------------------------------------------------------------------+---------------+-----------+---------------+ -| Dataset | Lang | Model | Metric | Test | Downloads | -+===================================+======+====================================================================================+===============+===========+===============+ -| `DSTC 2`_ | En | :config:`basic bot ` | Turn Accuracy | 0.380 | 10 Mb | -+ (:ref:`modified `) + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | :config:`bot with slot filler ` | | 0.542 | 400 Mb | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | :config:`bot with slot filler, intents & attention ` | | **0.553** | 8.5 Gb | -+-----------------------------------+ +------------------------------------------------------------------------------------+ +-----------+---------------+ -| `DSTC 2`_ | | Bordes and Weston (2016) | | 0.411 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Eric and Manning (2017) | | 0.480 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Perez and Liu (2016) | | 0.487 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Williams et al. (2017) | | **0.556** | -- | -+-----------------------------------+------+------------------------------------------------------------------------------------+---------------+-----------+---------------+ - - ODQA :doc:`[docs] ` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -491,8 +374,6 @@ based on its Wikipedia knowledge. +================+====================================================================+=======================+========+===========+ | `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2018-02-11) | 35.89 | 9.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ -| `SQuAD-v1.1`_ | :config:`ODQA ` | enwiki (2016-12-21) | 37.83 | 9.3Gb | -+----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ | `SDSJ Task B`_ | :config:`ODQA ` | ruwiki (2018-04-01) | 28.56 | 7.7Gb | +----------------+--------------------------------------------------------------------+-----------------------+--------+-----------+ | `SDSJ Task B`_ | :config:`ODQA with RuBERT ` | ruwiki (2018-04-01) | 37.83 | 4.3Gb | @@ -522,47 +403,29 @@ Word vectors for the Russian language trained on joint `Russian Wikipedia - -- Run goal-oriented bot with console interface: - - .. code-block:: bash - - python -m deeppavlov interact gobot_dstc2 -d - -- Run goal-oriented bot with REST API: - - .. code-block:: bash - - python -m deeppavlov riseapi gobot_dstc2 -d - -- Run slot-filling model with Telegram interface: +- Run insults detection model with Telegram interface: .. code-block:: bash - python -m deeppavlov telegram slotfill_dstc2 -d -t + python -m deeppavlov telegram insults_kaggle_bert -d -t -- Run slot-filling model with console interface: +- Run insults detection model with console interface: .. code-block:: bash - python -m deeppavlov interact slotfill_dstc2 -d + python -m deeppavlov interact insults_kaggle_bert -d -- Run slot-filling model with REST API: +- Run insults detection model with REST API: .. code-block:: bash - python -m deeppavlov riseapi slotfill_dstc2 -d + python -m deeppavlov riseapi insults_kaggle_bert -d -- Predict intents on every line in a file: +- Predict whether it is an insult on every line in a file: .. code-block:: bash - python -m deeppavlov predict intents_snips -d --batch-size 15 < /data/in.txt > /data/out.txt + python -m deeppavlov predict insults_kaggle_bert -d --batch-size 15 < /data/in.txt > /data/out.txt View `video demo `__ of deployment of a diff --git a/docs/features/skills/go_bot.rst b/docs/features/skills/go_bot.rst index e585ab8e55..7a3e81ccaa 100644 --- a/docs/features/skills/go_bot.rst +++ b/docs/features/skills/go_bot.rst @@ -4,13 +4,18 @@ Go-Bot Framework Overview ******** -Go-Bot is an ML-driven framework designed to enable development of the goal-oriented skills for DeepPavlov Dream AI Assistant Platform. +Go-Bot is an ML-driven framework designed to enable development of the goal-oriented skills for +DeepPavlov Dream AI Assistant Platform. -These goal-oriented skills can be written in Python (enabling using their corresponding Go-Bot-trained models natively) or in any other programming language (requiring running their corresponding Go-Bot-trained models as microservices). +These goal-oriented skills can be written in Python (enabling using their corresponding Go-Bot-trained models natively) +or in any other programming language (requiring running their corresponding Go-Bot-trained models as microservices). -To build a Go-Bot-based goal-oriented skill, you need to provide Go-Bot framework with a dataset (in RASA v1 or DSTC2 formats), train model, download it, and then use it by either calling them natively from Python or by rising them as microservices and then calling them via its standard DeepPavlov REST API. +To build a Go-Bot-based goal-oriented skill, you need to provide Go-Bot framework with a dataset (in RASA v1 format), +train model, download it, and then use it by either calling them natively from Python or by rising them as microservices +and then calling them via its standard DeepPavlov REST API. -Currently, we support two different approaches to define domain model and behavior of a given goal-oriented skill - using either a subset of the v1 of the RASA DSLs (domain.yml, nlu.md, stories.md) or a DSTC2 format. As of the latest release, the following subset of functionality is supported: +Currently we support a subset of the v1 of the RASA DSLs (domain.yml, nlu.md, stories.md) to define domain model and +behavior of a given goal-oriented skill. As of the latest release, the following subset of functionality is supported: * Intents * Slots (simple slots requiring custom classifiers for custom data types) @@ -18,28 +23,34 @@ Currently, we support two different approaches to define domain model and behavi * Templated Responses (w/o variables) * **Form-Filling** (basic, added in **v0.14 release**) -In the future, we will expand support for RASA DSLs where appropriate to enable backward compatibility, add integration with the upcoming Intent Catcher component available as part of the DeepPavlov component library, and so on. +In the future, we will expand support for RASA DSLs where appropriate to enable backward compatibility, add integration +with the upcoming Intent Catcher component available as part of the DeepPavlov component library, and so on. -To experiment with the Go-Bot you can follow tutorials for using RASA DSLs, or pick one of the two available pre-trained models designed around the DSTSC2 dataset (English). +To experiment with the Go-Bot you can follow tutorials for using RASA DSLs. RASA DSLs Format Support ************************ Overview ======== -While DSTC-2 schema format is quite rich, preparing this kind of dataset with all required annotations might be challenging. To simplify the process of building goal-oriented bots using DeepPavlov technology, in `v0.12.0 `_ we have introduced a (limited) support for defining them using RASA DSLs. +To simplify the process of building goal-oriented bots using DeepPavlov technology, +in `v0.12.0 `_ we have introduced +a (limited) support for defining them using RASA DSLs. .. note:: DSLs, known as Domain-Specific Languages, provide a rich mechanism to define the behavior, or "the what", while the underlying system uses the parser to transform these definitions into commands that implement this behavior, or "the how" using the system's components. -RASA.ai is an another well-known Open Source Conversational AI Framework. Their approach to defining the domain model and behavior of the goal-oriented bots is quite simple for building simple goal-oriented bots. In this section you will learn how to use key parts of RASA DSLs (configuration files) to build your own goal-oriented skill based on the DeepPavlov's Go-Bot framework. +RASA.ai is an another well-known Open Source Conversational AI Framework. Their approach to defining the domain model +and behavior of the goal-oriented bots is quite simple for building simple goal-oriented bots. +In this section you will learn how to use key parts of RASA DSLs (configuration files) +to build your own goal-oriented skill based on the DeepPavlov's Go-Bot framework. - - -While there are several configuration files used by the RASA platform, each with their own corresponding DSL (mostly re-purposed Markdown and YAML), for now only three essential files: ``stories.md``, +While there are several configuration files used by the RASA platform, each with their own +corresponding DSL (mostly re-purposed Markdown and YAML), for now only three essential files: ``stories.md``, ``nlu.md``, ``domain.yml`` are supported by the DeepPavlov Go-Bot Framework. -These files allows you to define user stories that match intents and bot actions, intents with slots and entities, as well as the training data for the NLU components. +These files allows you to define user stories that match intents and bot actions, intents with slots and entities, +as well as the training data for the NLU components. .. note:: As mentioned in our `blog post `__, **this is the very beginning of our work** focused on supporting RASA DSLs as a way to configure DeepPavlov-based goal-oriented chatbots. @@ -220,9 +231,10 @@ How Do I: Build Go-Bot Skill with RASA DSLs (v1) Tutorials ^^^^^^^^^ -We encourage you to explore the tutorials below to get better understanding of how to build basic and more advanced goal-oriented skills with these RASA DSLs: +We encourage you to explore the tutorials below to get better understanding of how to build basic and more advanced +goal-oriented skills with these RASA DSLs: -* `Original Tutorial Notebook Featuring Simple and DSTC2-based Skills `_ +* `Original Tutorial Notebook Featuring Simple and DSTC2-based Skills `_ * `Tutorial Notebook Featuring Harvesters Maintenance Go-Bot Skill from Deepy 3000 Demo `_ @@ -230,7 +242,8 @@ We encourage you to explore the tutorials below to get better understanding of h How Do I: Integrate Go-Bot-based Goal-Oriented Skill into DeepPavlov Deepy ============================================================================ -To integrate your Go-Bot-based goal-oriented skill into your Multiskill AI Assistant built using DeepPavlov Conversational AI Stack, follow the following instructions: +To integrate your Go-Bot-based goal-oriented skill into your Multiskill AI Assistant built +using DeepPavlov Conversational AI Stack, follow the following instructions: 1. Clone `Deepy repository `_ 2. Replace ``docker-compose.yml`` in the root of the repository and ``pipeline_conf.json`` in the ``/agent/`` subdirectory with the corresponding files from the `deepy_gobot_base `_ **Deepy Distribution** @@ -252,150 +265,7 @@ Tutorials Follow this tutorial to experiment with the Form-Filling functionality in Go-Bot-based goal-oriented skills built using RASA DSLs (v1): -* `Tutorial Notebook Featuring Basic Form-Filling `_ - - -DSTC2 Format Support -********************** - -Overview -========== - -The DeepPavlov Go-Bot Framework is based on [1]_ which introduces -Hybrid Code Networks (HCNs) that combine an RNN with domain-specific -knowledge and system action templates. Originally, the DSTC2 format was used for the dataset to train a Go-Bot-based goal-oriented skills upon. - -|alt text| **Diagram 1.** Dotted lines correspond to unrequired -(optional) modules, black squares to trained models, trapezes are -modules that depend on a dataset and must be provided by software -developer. - -Here is a simple example of interaction with a trained goal-oriented skill -(available for download): - -.. note:: - - Models that rely on fasttext word embeddings will require 8.5 GB of disk space, those that use only one-hot encoding of words will require less than 300Mb of disk space. - -.. code:: bash - - >> Hello, welcome to the Cambridge restaurant system. You can ask for restaurants by area, price range or food type. How may I help you? - x::hi, i want cheap food - 2018-05-16 12:28:48.459 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'cheap'}, got 22 results. - >> The lucky star serves chinese food in the cheap price range. - x::is there anything with french food? - 2018-05-16 12:29:02.795 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'cheap', 'food': 'french'}, got 0 results. - >> Sorry there is no french restaurant in the cheap price range. - x::ok, maybe there is something in expensive pricerange? - 2018-05-16 12:29:23.337 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'expensive', 'food': 'french'}, got 2 results. - >> Cote serves french food in the expensive price range. - x::in what part of town is it? - 2018-05-16 12:29:31.193 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'expensive', 'food': 'french'}, got 2 results. - >> Restaurant two two is a nice place in the north of town serving tasty french food. - x::ok, give me their phone number - >> The phone number of restaurant two two is 01223 351880. - x::address and post code too - >> Restaurant two two is on 22 chesterton road chesterton, c.b 4, 3 a.x. - x::ok, thank you! - >> Restaurant two two serves french food in the expensive price range. - x::and if i wanted an italian restuarant? - 2018-05-16 12:30:15.98 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'expensive', 'food': 'italian'}, got 4 results. - >> Frankie and bennys serves italian food in the expensive price range. - x::more cheaply priced? - 2018-05-16 12:30:36.560 INFO in 'deeppavlov.models.go_bot.bot'['bot'] at line 346: Made api_call with {'pricerange': 'cheap', 'food': 'italian'}, got 5 results. - >> Da vinci pizzeria serves italian food in the cheap price range. - x::ok, give me their phone number and address, please - >> The phone number of da vinci pizzeria is 01223 351707 and it is on 20 milton road chesterton. - x::thanks - >> Da vinci pizzeria serves italian food in the cheap price range. - x::bye - >> You are welcome! - - -Quick Demo -============ - -To quickly try out the Go-Bot capabilities you can use one of the two available pretrained models for DSTC2 dataset (English). Check them out by running this code: - -.. code:: python - - from deeppavlov import build_model, configs - - bot1 = build_model(configs.go_bot.gobot_dstc2, download=True) - - bot1(['hi, i want restaurant in the cheap pricerange']) - bot1(['bye']) - - bot2 = build_model(configs.go_bot.gobot_dstc2_best, download=True) - - bot2(['hi, i want chinese restaurant']) - bot2(['bye']) - -If some required packages are missing, install all the requirements by running in command line: - -.. code:: bash - - python -m deeppavlov install gobot_dstc2 - -How Do I: Build Go-Bot with DSTC2 -=================================== -DSTC is a set of competitions originally known as "Dialog State Tracking Challenges" (DSTC, for short). First challenge -was organized in 2012-2013. Starting as an initiative to provide a common testbed for the task of Dialog State Tracking, -the first Dialog State Tracking Challenge (DSTC) was organized in 2013, followed by DSTC2&3 in 2014, DSTC4 in 2015, -and DSTC5 in 2016. Given the remarkable success of the first five editions, and understanding both, the complexity -of the dialog phenomenon and the interest of the research community in a wider variety of dialog related problems, -the DSTC rebranded itself as "Dialog System Technology Challenges" for its sixth edition. Then, DSTC6 and DSTC7 have -been completed in 2017 and 2018, respectively. - -DSTC-2 released a large number of training dialogs related to restaurant search. Compared to DSTC (which was in the bus -timetables domain), DSTC 2 introduced changing user goals, tracking 'requested slots' as well as the new Restaurants domain. - -Historically, DeepPavlov's Go-Bot used this DSTC-2 approach to defining domain model and behavior of the goal-oriented bots. -In this section you will learn how to use this approach to build a DSTC-2-based Go-Bot. - -Requirements -^^^^^^^^^^^^ - -**TO TRAIN** a go\_bot model you should have: - -1. (*optional, but recommended*) pretrained named entity recognition model (NER) - - - config :config:`configs/ner/slotfill_dstc2.json ` is recommended -2. (*optional, but recommended*) pretrained intents classifier model - - - config :config:`configs/classifiers/intents_dstc2_big.json ` is recommended -3. (*optional*) any sentence (word) embeddings for english - - - fasttext embeddings can be downloaded - - - via link https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.zip - - or using deeppavlov with :code:`python3 -m deeppavlov download `, - where ```` is one of the :config:`provided config files `. - -**TO INFER** from a go\_bot model you should **additionally** have: - -4. pretrained vocabulary of dataset utterance tokens - - - it is trained in the same config as go\_bot model - -5. pretrained goal-oriented bot model - - - config :config:`configs/go_bot/gobot_dstc2.json ` is recommended - - ``slot_filler`` section of go\_bot's config should match NER's configuration - - ``intent_classifier`` section of go\_bot's config should match classifier's configuration - -Configs -^^^^^^^ - -For a working exemplary config see -:config:`configs/go_bot/gobot_dstc2.json ` (model without embeddings). - -A minimal model without ``slot_filler``, ``intent_classifier`` and ``embedder`` is configured -in :config:`configs/go_bot/gobot_dstc2_minimal.json `. - -The best state-of-the-art model (with attention mechanism, relies on ``embedder`` and -does not use bag-of-words) is configured in -:config:`configs/go_bot/gobot_dstc2_best.json `. +* `Tutorial Notebook Featuring Basic Form-Filling `_ Usage example ^^^^^^^^^^^^^ @@ -417,12 +287,7 @@ You can also train your own model by running: The ``-d`` parameter downloads - data required to train your model (embeddings, etc.); - - a pretrained model if available (provided not for all configs). - -**Pretrained for DSTC2** models are available for - - - :config:`configs/go_bot/gobot_dstc2.json ` and - - :config:`configs/go_bot/gobot_dstc2.json `. + - a pretrained model if available (provided not for all configs). After downloading required files you can use the configs in your python code. To infer from a pretrained model with config path equal to ````: @@ -442,132 +307,9 @@ To infer from a pretrained model with config path equal to ````: Config parameters ^^^^^^^^^^^^^^^^^ -To configure your own pipelines that contain a ``"go_bot"`` component, refer to documentation for :class:`~deeppavlov.models.go_bot.bot.GoalOrientedBot` and :class:`~deeppavlov.models.go_bot.network.GoalOrientedBotNetwork` classes. - -Datasets -======== - -.. _dstc2_dataset: - -DSTC2 -^^^^^ - -The Hybrid Code Network model was trained and evaluated on a modification of a dataset from Dialogue State Tracking -Challenge 2 [2]_. The modifications were as follows: - -- **new turns with api calls** - - - added api\_calls to restaurant database (example: - ``{"text": "api_call area=\"south\" food=\"dontcare\" pricerange=\"cheap\"", "dialog_acts": ["api_call"]}``) - -- **new actions** - - - bot dialog actions were concatenated into one action (example: - ``{"dialog_acts": ["ask", "request"]}`` -> - ``{"dialog_acts": ["ask_request"]}``) - - if a slot key was associated with the dialog action, the new act - was a concatenation of an act and a slot key (example: - ``{"dialog_acts": ["ask"], "slot_vals": ["area"]}`` -> - ``{"dialog_acts": ["ask_area"]}``) - -- **new train/dev/test split** - - - original dstc2 consisted of three different MDP policies, the original train - and dev datasets (consisting of two policies) were merged and - randomly split into train/dev/test - -- **minor fixes** - - - fixed several dialogs, where actions were wrongly annotated - - uppercased first letter of bot responses - - unified punctuation for bot responses - -See :class:`deeppavlov.dataset_readers.dstc2_reader.DSTC2DatasetReader` for implementation. - -Your data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Dialogs -""""""" - -If your model uses DSTC2 and relies on ``"dstc2_reader"`` -(:class:`~deeppavlov.dataset_readers.dstc2_reader.DSTC2DatasetReader`), -all needed files, if not present in the -:attr:`DSTC2DatasetReader.data_path ` directory, -will be downloaded from web. - -If your model needs to be trained on different data, you have several ways of -achieving that (sorted by increase in the amount of code): - -1. Use ``"dialog_iterator"`` in dataset iterator config section and - ``"dstc2_reader"`` in dataset reader config section - (**the simplest, but not the best way**): - - - set ``dataset_reader.data_path`` to your data directory; - - your data files should have the same format as expected in - :meth:`DSTC2DatasetReader.read() ` - method. - -2. Use ``"dialog_iterator"`` in dataset iterator config section and - ``"your_dataset_reader"`` in dataset reader config section (**recommended**): - - - clone :class:`deeppavlov.dataset_readers.dstc2_reader.DSTC2DatasetReader` to - ``YourDatasetReader``; - - register as ``"your_dataset_reader"``; - - rewrite so that it implements the same interface as the origin. - Particularly, ``YourDatasetReader.read()`` must have the same output as - :meth:`DSTC2DatasetReader.read() `. - - - ``train`` — training dialog turns consisting of tuples: - - - first tuple element contains first user's utterance info - (as dictionary with the following fields): - - - ``text`` — utterance string - - ``intents`` — list of string intents, associated with user's utterance - - ``db_result`` — a database response *(optional)* - - ``episode_done`` — set to ``true``, if current utterance is - the start of a new dialog, and ``false`` (or skipped) otherwise *(optional)* - - - second tuple element contains second user's response info - - - ``text`` — utterance string - - ``act`` — an act, associated with the user's utterance - - - ``valid`` — validation dialog turns in the same format - - ``test`` — test dialog turns in the same format - -3. Use your own dataset iterator and dataset reader (**if 2. doesn't work for you**): - - - your ``YourDatasetIterator.gen_batches()`` class method output should match the - input format for chainer from - :config:`configs/go_bot/gobot_dstc2.json `. - -Templates -""""""""" - -You should provide a maping from actions to text templates in the format - -.. code:: text - - action1template1 - action2template2 - ... - actionNtemplateN - -where filled slots in templates should start with "#" and mustn't contain whitespaces. - -For example, - -.. code:: text - - bye You are welcome! - canthear Sorry, I can't hear you. - expl-conf_area Did you say you are looking for a restaurant in the #area of town? - inform_area+inform_food+offer_name #name is a nice place in the #area of town serving tasty #food food. - -It is recommended to use ``"DefaultTemplate"`` value for ``template_type`` parameter. - +To configure your own pipelines that contain a ``"go_bot"`` component, refer to documentation +for :class:`~deeppavlov.models.go_bot.bot.GoalOrientedBot` +and :class:`~deeppavlov.models.go_bot.network.GoalOrientedBotNetwork` classes. Database (Optional) ===================== @@ -579,62 +321,5 @@ Otherwise, you should 1. provide sql table with requested items or 2. construct such table from provided in train samples ``db_result`` items. - This can be done with the following script: - - - .. code:: bash - - python -m deeppavlov train configs/go_bot/database_.json - - where ``configs/go_bot/database_.json`` is a copy - of ``configs/go_bot/database_dstc2.json`` with configured - ``save_path``, ``primary_keys`` and ``unknown_value``. - -Comparison -************ - -Scores for different modifications of our bot model and comparison with existing benchmarks: - -+-----------------------------------+------+------------------------------------------------------------------------------------+---------------+-----------+---------------+ -| Dataset | Lang | Model | Metric | Test | Downloads | -+===================================+======+====================================================================================+===============+===========+===============+ -| `DSTC 2`_ | En | :config:`basic bot ` | Turn Accuracy | 0.380 | 10 Mb | -+ (:ref:`modified `) + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | :config:`bot with slot filler ` | | 0.542 | 400 Mb | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | :config:`bot with slot filler, intents & attention ` | | **0.553** | 8.5 Gb | -+-----------------------------------+ +------------------------------------------------------------------------------------+ +-----------+---------------+ -| `DSTC 2`_ | | Bordes and Weston (2016) [3]_ | | 0.411 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Eric and Manning (2017) [4]_ | | 0.480 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Perez and Liu (2016) [5]_ | | 0.487 | -- | -+ + +------------------------------------------------------------------------------------+ +-----------+---------------+ -| | | Williams et al. (2017) [1]_ | | **0.556** | -- | -+-----------------------------------+------+------------------------------------------------------------------------------------+---------------+-----------+---------------+ - -.. _`DSTC 2`: http://camdial.org/~mh521/dstc/ - -References -************ - -.. [1] `Jason D. Williams, Kavosh Asadi, Geoffrey Zweig "Hybrid Code - Networks: practical and efficient end-to-end dialog control with - supervised and reinforcement learning" – - 2017 `_ - -.. [2] `Dialog State Tracking Challenge 2 - dataset `_ - -.. [3] `Antoine Bordes, Y-Lan Boureau & Jason Weston "Learning end-to-end - goal-oriented dialog" - 2017 `_ - -.. [4] `Mihail Eric, Christopher D. Manning "A Copy-Augmented - Sequence-to-Sequence Architecture Gives Good Performance on - Task-Oriented Dialogue" - 2017 `_ - -.. [5] `Fei Liu, Julien Perez "Gated End-to-end Memory Networks" - - 2016 `_ - .. |alt text| image:: ../../_static/gobot_diagram.png diff --git a/docs/features/skills/odqa.rst b/docs/features/skills/odqa.rst index d71c1f1504..199e65f94b 100644 --- a/docs/features/skills/odqa.rst +++ b/docs/features/skills/odqa.rst @@ -121,10 +121,6 @@ There are several ODQA configs available: | | of TF-IDF ranker and reader. Searches for an | | | answer in ``enwiki20180211`` Wikipedia dump. | +----------------------------------------------------------------------------------------+-------------------------------------------------+ -|:config:`en_odqa_infer_enwiki20161221 ` | Basic config for **English** language. Consists | -| | of TF-IDF ranker and reader. Searches for an | -| | answer in ``enwiki20161221`` Wikipedia dump. | -+----------------------------------------------------------------------------------------+-------------------------------------------------+ |:config:`ru_odqa_infer_wiki ` | Basic config for **Russian** language. Consists | | | of TF-IDF ranker and reader. Searches for an | | | answer in ``ruwiki20180401`` Wikipedia dump. | @@ -140,23 +136,25 @@ Comparison Scores for **ODQA** skill: -+-------------------------------------------------------------------------------------+------+----------------------+----------------+---------------------+---------------------+ -| | | | | Ranker@5 | Ranker@25 | -| | | | +----------+----------+-----------+---------+ -| Model | Lang | Dataset | WikiDump | F1 | EM | F1 | EM | -+-------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ -|:config:`DeppPavlov ` | | | enwiki20180211 | 35.89 | 29.21 | 39.96 | 32.64 | -+-------------------------------------------------------------------------------------+ + +----------------+----------+----------+-----------+---------+ -|:config:`DeepPavlov ` | En | SQuAD (dev) | | **37.83**|**31.26** | 41.86 | 34.73 | -+-------------------------------------------------------------------------------------+ + + +----------+----------+-----------+---------+ -|`DrQA`_ | | | | \- | 27.1 | \- | \- | -+-------------------------------------------------------------------------------------+ + + +----------+----------+-----------+---------+ -|`R3`_ | | | enwiki20161221 | 37.5 | 29.1 | \- | \- | -+-------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ -|:config:`DeepPavlov with RuBERT reader ` | | | | **42.02**|**29.56** | \- | \- | -+-------------------------------------------------------------------------------------+ Ru + SDSJ Task B (dev) + ruwiki20180401 +----------+----------+-----------+---------+ -|:config:`DeepPavlov ` | | | | 28.56 | 18.17 | \- | \- | -+-------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ ++----------------------------------------------------------------------------------------------------------------------------------+------+----------------------+----------------+---------------------+---------------------+ +| | | | | Ranker@5 | Ranker@25 | +| | | | +----------+----------+-----------+---------+ +| Model | Lang | Dataset | WikiDump | F1 | EM | F1 | EM | ++----------------------------------------------------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ +|:config:`DeppPavlov ` | | | enwiki20180211 | 35.89 | 29.21 | 39.96 | 32.64 | ++----------------------------------------------------------------------------------------------------------------------------------+ + +----------------+----------+----------+-----------+---------+ +|`DeepPavlov `_ [*]_ | En | SQuAD (dev) | | **37.83**|**31.26** | 41.86 | 34.73 | ++----------------------------------------------------------------------------------------------------------------------------------+ + + +----------+----------+-----------+---------+ +|`DrQA`_ | | | | \- | 27.1 | \- | \- | ++----------------------------------------------------------------------------------------------------------------------------------+ + + +----------+----------+-----------+---------+ +|`R3`_ | | | enwiki20161221 | 37.5 | 29.1 | \- | \- | ++----------------------------------------------------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ +|:config:`DeepPavlov with RuBERT reader ` | | | | **42.02**|**29.56** | \- | \- | ++----------------------------------------------------------------------------------------------------------------------------------+ Ru + SDSJ Task B (dev) + ruwiki20180401 +----------+----------+-----------+---------+ +|:config:`DeepPavlov ` | | | | 28.56 | 18.17 | \- | \- | ++----------------------------------------------------------------------------------------------------------------------------------+------+----------------------+----------------+----------+----------+-----------+---------+ + +.. [*] Model is removed in DeepPavlov 0.18.0. Use older library versions to run the model. EM stands for "exact-match accuracy". Metrics are counted for top 5 and top 25 documents returned by retrieval module. diff --git a/docs/index.rst b/docs/index.rst index 723d7c8539..d6ca9489b3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,7 +35,6 @@ Welcome to DeepPavlov's documentation! Morphological Tagger Named Entity Recognition Neural Ranking - Slot filling Speech recognition and synthesis Spelling Correction Syntactic Parser diff --git a/docs/integrations/rest_api.rst b/docs/integrations/rest_api.rst index 12245ab073..422321024d 100644 --- a/docs/integrations/rest_api.rst +++ b/docs/integrations/rest_api.rst @@ -72,7 +72,7 @@ to the model by ``server_utils`` label in ``metadata`` section of the model config. Value of ``server_utils`` label from model config should match with properties key from ``model_defaults`` section of ``server_config.json``. -For example, adding ``metadata/server_utils`` key to ``go_bot/gobot_dstc2.json`` +For example, adding ``metadata/server_utils`` key to ``go_bot/gobot_dstc2_minimal.json`` with value *GoalOrientedBot* will initiate the search of *GoalOrientedBot* tag at ``model_defaults`` section of ``server_config.json``. Therefore, if this section is present, all parameters with non empty (i.e. not ``""``, diff --git a/docs/integrations/socket_api.rst b/docs/integrations/socket_api.rst index 48214a6196..eb827a83fc 100644 --- a/docs/integrations/socket_api.rst +++ b/docs/integrations/socket_api.rst @@ -42,7 +42,7 @@ to the model by ``server_utils`` label in ``metadata`` section of the model config. Value of ``server_utils`` label from model config should match with properties key from ``model_defaults`` section of ``server_config.json``. -For example, adding ``metadata/server_utils`` key to ``go_bot/gobot_dstc2.json`` +For example, adding ``metadata/server_utils`` key to ``go_bot/gobot_dstc2_minimal.json`` with value *GoalOrientedBot* will initiate the search of *GoalOrientedBot* tag at ``model_defaults`` section of ``server_config.json``. Therefore, if this section is present, all parameters with non empty (i.e. not ``""``, diff --git a/docs/intro/choose_framework.rst b/docs/intro/choose_framework.rst index aa208cb7c0..b1ed1f693f 100644 --- a/docs/intro/choose_framework.rst +++ b/docs/intro/choose_framework.rst @@ -23,8 +23,8 @@ The full list of available models is :doc:`here `. python -m deeppavlov install -d - where ```` is path to the chosen model's config file (e.g. ``deeppavlov/configs/ner/slotfill_dstc2.json``) - or just the file name without the `.json` extension (e.g. ``slotfill_dstc2``); + where ```` is path to the chosen model's config file (e.g. ``deeppavlov/configs/classifiers/insults_kaggle_bert.json``) + or just the file name without the `.json` extension (e.g. ``insults_kaggle_bert``); ``-d`` downloads required data -- pretrained model files and embeddings (optional). Trainer diff --git a/docs/intro/configuration.rst b/docs/intro/configuration.rst index 9f873c5e9c..429180abd0 100644 --- a/docs/intro/configuration.rst +++ b/docs/intro/configuration.rst @@ -83,7 +83,7 @@ from ``metadata.variables`` element: { "in": ["x"], "out": ["y_predicted"], - "config_path": "{CONFIGS_PATH}/classifiers/intents_snips.json" + "config_path": "{CONFIGS_PATH}/classifiers/insults_kaggle_bert.json" } ], "out": ["y_predicted"] @@ -177,10 +177,7 @@ and ``train``: Simplified version of training pipeline contains two elements: ``dataset`` and ``train``. The ``dataset`` element -currently can be used for train from classification data in ``csv`` and ``json`` formats. You can find complete examples -of how to use simplified training pipeline in -:config:`intents_sample_csv.json ` and -:config:`intents_sample_json.json ` config files. +currently can be used for train from classification data in ``csv`` and ``json`` formats. Train Parameters diff --git a/docs/intro/quick_start.rst b/docs/intro/quick_start.rst index 45c3859ab4..7d0c8d5469 100644 --- a/docs/intro/quick_start.rst +++ b/docs/intro/quick_start.rst @@ -27,8 +27,8 @@ Before making choice of an interface, install model's package requirements python -m deeppavlov install * where ```` is path to the chosen model's config file (e.g. - ``deeppavlov/configs/ner/slotfill_dstc2.json``) or just name without - `.json` extension (e.g. ``slotfill_dstc2``) + ``deeppavlov/configs/classifiers/insults_kaggle_bert.json``) or just name without + `.json` extension (e.g. ``insults_kaggle_bert``) Command line interface (CLI) @@ -252,7 +252,7 @@ related to. +----------+------------------------------------------------------------------------------------------------+-------------------------------------------+ | Language | DeepPavlov config | Demo | +==========+================================================================================================+===========================================+ - | En | :config:`insults_kaggle_conv_bert ` | https://demo.deeppavlov.ai/#/en/insult | + | En | :config:`insults_kaggle_bert ` | https://demo.deeppavlov.ai/#/en/insult | +----------+------------------------------------------------------------------------------------------------+-------------------------------------------+ diff --git a/examples/Pseudo-labeling for classification.ipynb b/examples/Pseudo-labeling for classification.ipynb deleted file mode 100644 index 8d01922069..0000000000 --- a/examples/Pseudo-labeling for classification.ipynb +++ /dev/null @@ -1,210 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from pathlib import Path\n", - "import numpy as np\n", - "from copy import deepcopy\n", - "import pandas as pd\n", - "\n", - "from deeppavlov.core.commands.train import read_data_by_config, train_evaluate_model_from_config\n", - "from deeppavlov.core.commands.infer import interact_model, build_model\n", - "from deeppavlov.core.commands.utils import expand_path, parse_config\n", - "from deeppavlov.core.common.params import from_params\n", - "from deeppavlov.core.common.errors import ConfigError" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read unlabelled data for label propagation\n", - "def read_unlabelled_data(UNLABELLED_DATA_PATH):\n", - " with open(UNLABELLED_DATA_PATH, \"r\") as f:\n", - " unlabelled_data = f.read().splitlines()\n", - " unlabelled_data = [x for x in unlabelled_data if x != '']\n", - " return unlabelled_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "def make_pl_config(CONFIG_PATH):\n", - " config_path_pl = Path(CONFIG_PATH).parent / Path(Path(CONFIG_PATH).stem + \"_pl.json\")\n", - "\n", - " with open(CONFIG_PATH, \"r\") as f:\n", - " config = json.load(f)\n", - " \n", - " config_pl = deepcopy(config)\n", - " config_pl[\"dataset_reader\"][\"train\"] = Path(config_pl[\"dataset_reader\"].get(\"train\", \"train.csv\")).stem + \"_pl.csv\"\n", - " \n", - " with open(config_path_pl, \"w\") as f:\n", - " json.dump(config_pl, f, indent=2)\n", - " \n", - " return config, config_pl" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def save_extended_data(config, samples, labels, new_config = None):\n", - " train_data = read_data_by_config(deepcopy(config))\n", - " \n", - " for i in range(len(samples)):\n", - " train_data[\"train\"].append((samples[i], labels[i]))\n", - " df = pd.DataFrame(train_data[\"train\"], \n", - " columns=[config[\"dataset_reader\"][\"x\"], \n", - " config[\"dataset_reader\"][\"y\"]])\n", - " df[config[\"dataset_reader\"][\"y\"]] = df[config[\"dataset_reader\"][\"y\"]].apply(\n", - " lambda x: config[\"dataset_reader\"].get(\"class_sep\", \",\").join(x))\n", - " \n", - " if new_config is not None:\n", - " config = new_config\n", - " file = expand_path(Path(config[\"dataset_reader\"][\"data_path\"]) / \n", - " Path(config[\"dataset_reader\"][\"train\"]))\n", - "\n", - " if config[\"dataset_reader\"].get(\"format\", \"csv\") == \"csv\":\n", - " keys = ('sep', 'header', 'names')\n", - " df.to_csv(file, \n", - " index=False,\n", - " sep=config[\"dataset_reader\"].get(\"sep\", \",\")\n", - " )\n", - " elif config[\"dataset_reader\"].get(\"format\", \"csv\") == \"json\":\n", - " keys = ('orient', 'lines')\n", - " df.to_json(file, \n", - " index=False,\n", - " orient=config[\"dataset_reader\"].get(\"orient\", None),\n", - " lines=config[\"dataset_reader\"].get(\"lines\", False)\n", - " )\n", - " else:\n", - " raise ConfigError(\"Can not work with current data format\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# manually given parameters for pseudo-labeling\n", - "\n", - "# path to config file\n", - "CONFIG_PATH = \"../deeppavlov/configs/classifiers/convers_vs_info.json\"\n", - "# read config, compose new one, save it\n", - "config, config_pl = make_pl_config(CONFIG_PATH)\n", - "config, config_pl = parse_config(config), parse_config(config_pl)\n", - "config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# path to file with unlabelled data\n", - "UNLABELLED_DATA_PATH = expand_path(Path(config[\"dataset_reader\"][\"data_path\"])) / Path(\"question_L6.txt\")\n", - "# number of samples that are going to be labelled during one iteration of label propagation\n", - "ONE_ITERATION_PORTION = 100\n", - "# number of iterations\n", - "N_ITERATIONS = 10\n", - "CLASSES_VOCAB_ID_IN_PIPE = 0\n", - "CONFIDENT_PROBA = 0.9" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# read unlabelled dataset\n", - "unlabelled_data = read_unlabelled_data(UNLABELLED_DATA_PATH)\n", - "\n", - "# save initial dataset as extended\n", - "save_extended_data(config, [], [], new_config=config_pl)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "available_unlabelled_ids = np.arange(len(unlabelled_data))\n", - "\n", - "np.random.seed(42)\n", - "\n", - "for i in range(N_ITERATIONS):\n", - " samples = []\n", - " labels = []\n", - " \n", - " ids_to_label = available_unlabelled_ids[\n", - " np.random.randint(low=0, \n", - " high=len(available_unlabelled_ids), \n", - " size=ONE_ITERATION_PORTION)]\n", - " available_unlabelled_ids = np.delete(available_unlabelled_ids, ids_to_label)\n", - " train_evaluate_model_from_config(deepcopy(config_pl))\n", - " model = build_model(deepcopy(config_pl))\n", - " classes = np.array(list(from_params(\n", - " deepcopy(config_pl[\"chainer\"][\"pipe\"][CLASSES_VOCAB_ID_IN_PIPE])).keys()))\n", - "\n", - " for j, sample_id in enumerate(ids_to_label):\n", - " prediction = model([unlabelled_data[sample_id]])[0]\n", - " if len(np.where(np.array(prediction) > CONFIDENT_PROBA)[0]):\n", - " samples.append(unlabelled_data[sample_id])\n", - " labels.append(classes[np.where(np.array(prediction) > CONFIDENT_PROBA)])\n", - " \n", - " print(\"Iteration {}: add {} samples to train dataset\".format(i, len(samples)))\n", - " save_extended_data(config_pl, samples, labels)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "accelerator": "GPU", - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index e594ae7003..0000000000 --- a/examples/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Examples & Tutorials - -* Tutorial for simple bot [[notebook]](gobot_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_tutorial.ipynb) - -* Tutorial for advanced goal-oriented bot [[notebook]](gobot_extended_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_extended_tutorial.ipynb) - -* Tutorial for intent classifier [[notebook]](classification_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/classification_tutorial.ipynb) - -* Morphotagger model usage example [[notebook]](morphotagger_example.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/morphotagger_example.ipynb) - -* Pseudo-labeling for classication task [[notebook]](Pseudo-labeling%20for%20classification.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/Pseudo-labeling%20for%20classification.ipynb) - -* Optimal learning rate search in DeepPavlov [[notebook]](super_convergence_tutorial.ipynb) [[colab]](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/super_convergence_tutorial.ipynb) - -# Links - -More examples are available: -* [github.com/deepmipt/dp_tutorials/](https://github.com/deepmipt/dp_tutorials) -* [github.com/deepmipt/db_notebooks/](https://github.com/deepmipt/dp_notebooks). diff --git a/examples/classification_tutorial.ipynb b/examples/classification_tutorial.ipynb deleted file mode 100644 index e7792ccd1e..0000000000 --- a/examples/classification_tutorial.ipynb +++ /dev/null @@ -1,2961 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/classification_tutorial.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!pip3 install deeppavlov" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Classification on DeepPavlov" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Task**:\n", - "Intent recognition on SNIPS dataset: https://github.com/snipsco/nlu-benchmark/tree/master/2017-06-custom-intent-engines that has already been recomposed to `csv` format and can be downloaded from http://files.deeppavlov.ai/datasets/snips_intents/train.csv\n", - "\n", - "FastText English word embeddings ~8Gb: http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plan of the notebook with documentation links:\n", - "\n", - "1. [Data aggregation](#Data-aggregation)\n", - " * [DatasetReader](#DatasetReader): [docs link](https://deeppavlov.readthedocs.io/en/latest/apiref/dataset_readers.html)\n", - " * [DatasetIterator](#DatasetIterator): [docs link](https://deeppavlov.readthedocs.io/en/latest/apiref/dataset_iterators.html)\n", - "2. [Data preprocessing](#Data-preprocessing): [docs link](https://deeppavlov.readthedocs.io/en/latest/components/data_processors.html)\n", - " * [Lowercasing](#Lowercasing)\n", - " * [Tokenization](#Tokenization)\n", - " * [Vocabulary](#Vocabulary)\n", - "3. [Featurization](#Featurization): [docs link](https://deeppavlov.readthedocs.io/en/latest/components/data_processors.html), [pre-trained embeddings link](https://deeppavlov.readthedocs.io/en/latest/intro/pretrained_vectors.html)\n", - " * [Bag-of-words embedder](#Bag-of-words)\n", - " * [TF-IDF vectorizer](#TF-IDF-Vectorizer)\n", - " * [GloVe embedder](#GloVe-embedder)\n", - " * [Mean GloVe embedder](#Mean-GloVe-embedder)\n", - " * [GloVe weighted by TF-IDF embedder](#GloVe-weighted-by-TF-IDF-embedder)\n", - "4. [Models](#Models): [docs link](https://deeppavlov.readthedocs.io/en/latest/components/classifiers.html)\n", - " * [Building models in python](#Models-in-python)\n", - " - [Sklearn component classifiers](#SklearnComponent-classifier-on-Tfidf-features-in-python)\n", - " - [Keras classification model on GloVe emb](#KerasClassificationModel-on-GloVe-embeddings-in-python)\n", - " - [Sklearn component classifier on GloVe weighted emb](#SklearnComponent-classifier-on-GloVe-weighted-by-TF-IDF-embeddings-in-python)\n", - " * [Building models from configs](#Models-from-configs)\n", - " - [Sklearn component classifiers](#SklearnComponent-classifier-on-Tfidf-features-from-config)\n", - " - [Keras classification model](#KerasClassificationModel-on-fastText-embeddings-from-config)\n", - " - [Sklearn component classifier on GloVe weighted emb](#SklearnComponent-classifier-on-GloVe-weighted-by-TF-IDF-embeddings-from-config)\n", - " * [Bonus: pre-trained CNN model in DeepPavlov](#Bonus:-pre-trained-CNN-model-in-DeepPavlov)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data aggregation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First of all, let's download and look into data we will work with." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:21.101 INFO in 'deeppavlov.core.data.utils'['utils'] at line 63: Downloading from http://files.deeppavlov.ai/datasets/snips_intents/train.csv to snips/train.csv\n", - "100%|██████████| 981k/981k [00:00<00:00, 63.5MB/s]\n" - ] - } - ], - "source": [ - "from deeppavlov.core.data.utils import simple_download\n", - "\n", - "#download train data file for SNIPS\n", - "simple_download(url=\"http://files.deeppavlov.ai/datasets/snips_intents/train.csv\", \n", - " destination=\"./snips/train.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "text,intents\r\n", - "Add another song to the Cita RomГЎntica playlist. ,AddToPlaylist\r\n", - "add clem burke in my playlist Pre-Party R&B Jams,AddToPlaylist\r\n", - "Add Live from Aragon Ballroom to Trapeo,AddToPlaylist\r\n", - "add Unite and Win to my night out,AddToPlaylist\r\n", - "Add track to my Digster Future Hits,AddToPlaylist\r\n", - "add the piano bar to my Cindy Wilson,AddToPlaylist\r\n", - "Add Spanish Harlem Incident to cleaning the house,AddToPlaylist\r\n", - "add The Greyest of Blue Skies in Indie EspaГ±ol my playlist,AddToPlaylist\r\n", - "Add the name kids in the street to the plylist New Indie Mix,AddToPlaylist\r\n", - "add album radar latino,AddToPlaylist\r\n", - "Add Tranquility to the Latin Pop Rising playlist. ,AddToPlaylist\r\n", - "Add d flame to the Dcode2016 playlist.,AddToPlaylist\r\n", - "Add album to my fairy tales,AddToPlaylist\r\n", - "I need another artist in the New Indie Mix playlist. ,AddToPlaylist\r\n" - ] - } - ], - "source": [ - "! head -n 15 snips/train.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### DatasetReader\n", - "\n", - "Read data using `BasicClassificationDatasetReader` из DeepPavlov" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "from deeppavlov.dataset_readers.basic_classification_reader import BasicClassificationDatasetReader" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:23.376 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find snips/valid.csv file\n", - "2019-02-12 12:14:23.376 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find snips/test.csv file\n" - ] - } - ], - "source": [ - "# read data from particular columns of `.csv` file\n", - "dr = BasicClassificationDatasetReader().read(\n", - " data_path='./snips/',\n", - " train='train.csv',\n", - " x = 'text',\n", - " y = 'intents'\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We don't have a ready train/valid/test split." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('train', 15884), ('valid', 0), ('test', 0)]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# check train/valid/test sizes\n", - "[(k, len(dr[k])) for k in dr.keys()]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### DatasetIterator\n", - "\n", - "Use `BasicClassificationDatasetIterator` to split `train` on `train` and `valid` and to generate batches of samples." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.dataset_iterators.basic_classification_iterator import BasicClassificationDatasetIterator" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:23.557 INFO in 'deeppavlov.dataset_iterators.basic_classification_iterator'['basic_classification_iterator'] at line 73: Splitting field <> to new fields <<['train', 'valid']>>\n" - ] - } - ], - "source": [ - "# initialize data iterator splitting `train` field to `train` and `valid` in proportion 0.8/0.2\n", - "train_iterator = BasicClassificationDatasetIterator(\n", - " data=dr,\n", - " field_to_split='train', # field that will be splitted\n", - " split_fields=['train', 'valid'], # fields to which the fiald above will be splitted\n", - " split_proportions=[0.8, 0.2], #proportions for splitting\n", - " split_seed=23, # seed for splitting dataset\n", - " seed=42) # seed for iteration over dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's look into training samples. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "x: Is it freezing in Offerman, California?\n", - "y: ['GetWeather']\n", - "=================\n", - "x: put this song in the playlist Trap Land\n", - "y: ['AddToPlaylist']\n", - "=================\n", - "x: show me a textbook with a rating of 2 and a maximum rating of 6 that is current\n", - "y: ['RateBook']\n", - "=================\n", - "x: Will the weather be okay in Northern Luzon Heroes Hill National Park 4 and a half months from now?\n", - "y: ['GetWeather']\n", - "=================\n", - "x: Rate the current album a four\n", - "y: ['RateBook']\n", - "=================\n" - ] - } - ], - "source": [ - "# one can get train instances (or any other data type including `all`)\n", - "x_train, y_train = train_iterator.get_instances(data_type='train')\n", - "for x, y in list(zip(x_train, y_train))[:5]:\n", - " print('x:', x)\n", - " print('y:', y)\n", - " print('=================')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data preprocessing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will be using lowercasing and tokenization as data preparation. \n", - "\n", - "DeepPavlov also contains several other preprocessors and tokenizers." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Lowercasing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`str_lower` lowercases texts." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package punkt to /home/vimary/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package stopwords to /home/vimary/nltk_data...\n", - "[nltk_data] Package stopwords is already up-to-date!\n", - "[nltk_data] Downloading package perluniprops to\n", - "[nltk_data] /home/vimary/nltk_data...\n", - "[nltk_data] Package perluniprops is already up-to-date!\n", - "[nltk_data] Downloading package nonbreaking_prefixes to\n", - "[nltk_data] /home/vimary/nltk_data...\n", - "[nltk_data] Package nonbreaking_prefixes is already up-to-date!\n" - ] - } - ], - "source": [ - "from deeppavlov.models.preprocessors.str_lower import str_lower" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['is it freezing in offerman, california?']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "str_lower(['Is it freezing in Offerman, California?'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Tokenization\n", - "\n", - "`NLTKTokenizer` can split string to tokens." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.models.tokenizers.nltk_moses_tokenizer import NLTKMosesTokenizer" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['Is', 'it', 'freezing', 'in', 'Offerman', ',', 'California', '?']]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenizer = NLTKMosesTokenizer()\n", - "tokenizer(['Is it freezing in Offerman, California?'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's preprocess all `train` part of the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "train_x_lower_tokenized = str_lower(tokenizer(train_iterator.get_instances(data_type='train')[0]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Vocabulary\n", - "\n", - "Now we are ready to use `vocab`. They are very usefull for:\n", - "* extracting class labels and converting labels to indices and vice versa,\n", - "* building of characters or tokens vocabularies." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.core.data.simple_vocab import SimpleVocabulary" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# initialize simple vocabulary to collect all appeared in the dataset classes\n", - "classes_vocab = SimpleVocabulary(\n", - " save_path='./snips/classes.dict',\n", - " load_path='./snips/classes.dict')" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:25.35 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n" - ] - } - ], - "source": [ - "classes_vocab.fit((train_iterator.get_instances(data_type='train')[1]))\n", - "classes_vocab.save()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's see what classes the dataset contains and their indices in the vocabulary." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('GetWeather', 0),\n", - " ('PlayMusic', 1),\n", - " ('SearchScreeningEvent', 2),\n", - " ('BookRestaurant', 3),\n", - " ('RateBook', 4),\n", - " ('SearchCreativeWork', 5),\n", - " ('AddToPlaylist', 6)]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(classes_vocab.items())" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# also one can collect vocabulary of textual tokens appeared 2 and more times in the dataset\n", - "token_vocab = SimpleVocabulary(\n", - " save_path='./snips/tokens.dict',\n", - " load_path='./snips/tokens.dict',\n", - " min_freq=2,\n", - " special_tokens=('', '',),\n", - " unk_token='')" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:25.157 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/vimary/ipavlov/Pilot/examples/tutorials/snips/tokens.dict]\n" - ] - } - ], - "source": [ - "token_vocab.fit(train_x_lower_tokenized)\n", - "token_vocab.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4564" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# number of tokens in dictionary\n", - "len(token_vocab)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('the', 6953),\n", - " ('a', 3917),\n", - " ('in', 3265),\n", - " ('to', 3203),\n", - " ('for', 2814),\n", - " ('of', 2401),\n", - " ('.', 2400),\n", - " ('i', 2079),\n", - " ('at', 1935),\n", - " ('play', 1703)]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 10 most common words and number of times their appeared\n", - "token_vocab.freqs.most_common()[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[13, 36, 244, 4, 1, 29, 996, 20]]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "token_ids = token_vocab(str_lower(tokenizer(['Is it freezing in Offerman, California?'])))\n", - "token_ids" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['is it freezing in , california?']" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenizer(token_vocab(token_ids))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Featurization\n", - "\n", - "This part contains several possible ways of featurization of text samples. One can chose any appropriate vectorizer/embedder according to available resources and given task.\n", - "\n", - "Bag-of-words (BoW) and TF-IDF vectorizers converts text samples to vectors (one vector per sample) while fastText, GloVe, fastText weighted by TF-IDF embedders either produce an embedding vector per token or an embedding vector per text sample (if `mean` set to True)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bag-of-words\n", - "\n", - "Matches a vector to each text sample indicating which words appeared in the given sample: text -> binary vector $v$: \\[0, 1, 0, 0, 0, 1, ..., ...1, 0, 1\\]. \n", - "\n", - "Dimensionality of vector $v$ is equal to vocabulary size.\n", - "\n", - "$v_i$ == 1, if word $i$ is in the text,\n", - "\n", - "$v_i$ == 0, else." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from deeppavlov.models.embedders.bow_embedder import BoWEmbedder" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([0, 1, 0, ..., 0, 0, 0], dtype=int32)]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# initialize bag-of-words embedder giving total number of tokens\n", - "bow = BoWEmbedder(depth=token_vocab.len)\n", - "# it assumes indexed tokenized samples\n", - "bow(token_vocab(str_lower(tokenizer(['Is it freezing in Offerman, California?']))))" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "8" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# all 8 tokens are in the vocabulary\n", - "sum(bow(token_vocab(str_lower(tokenizer(['Is it freezing in Offerman, California?']))))[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### TF-IDF Vectorizer\n", - "\n", - "Matches a vector to each text sample: text -> vector $v$ from $R^N$ where $N$ is a vocabulary size.\n", - "\n", - "$TF-IDF(token, document) = TF(token, document) * IDF(token, document)$\n", - "\n", - "$TF$ is a term frequency:\n", - "\n", - "$TF(token, document) = \\frac{n_{token}}{\\sum_{k}n_k}.$\n", - "\n", - "$IDF$ is a inverse document frequency:\n", - "\n", - "$IDF(token, all\\_documents) = \\frac{Total\\ number\\ of\\ documents}{number\\ of\\ documents\\ where\\ token\\ appeared}.$" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`SklearnComponent` in DeepPavlov is a universal wrapper for any vecotirzer/estimator from `sklearn` package. The only requirement to specify component usage is following: model class and name of infer method should be passed as parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.models.sklearn import SklearnComponent" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:25.268 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v0.pkl\n", - "2019-02-12 12:14:25.269 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.feature_extraction.text:TfidfVectorizer from scratch\n" - ] - } - ], - "source": [ - "# initialize TF-IDF vectorizer sklearn component with `transform` as infer method\n", - "tfidf = SklearnComponent(\n", - " model_class=\"sklearn.feature_extraction.text:TfidfVectorizer\",\n", - " infer_method=\"transform\",\n", - " save_path='./tfidf_v0.pkl',\n", - " load_path='./tfidf_v0.pkl',\n", - " mode='train')" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:25.296 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.feature_extraction.text:TfidfVectorizer\n", - "2019-02-12 12:14:25.395 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v0.pkl\n" - ] - } - ], - "source": [ - "# fit on textual train instances and save it\n", - "tfidf.fit(str_lower(train_iterator.get_instances(data_type='train')[0]))\n", - "tfidf.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<1x10709 sparse matrix of type ''\n", - "\twith 6 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tfidf(str_lower(['Is it freezing in Offerman, California?']))" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10709" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# number of tokens in the TF-IDF vocabulary\n", - "len(tfidf.model.vocabulary_)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### GloVe embedder\n", - "\n", - "[GloVe](https://nlp.stanford.edu/projects/glove/) is an unsupervised learning algorithm for obtaining vector representations for words. Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and the resulting representations showcase interesting linear substructures of the word vector space." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "from deeppavlov.models.embedders.glove_embedder import GloVeEmbedder" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's download GloVe embedding file" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:26.153 INFO in 'deeppavlov.core.data.utils'['utils'] at line 63: Downloading from http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt to glove.6B.100d.txt\n", - "347MB [00:06, 50.0MB/s] \n" - ] - } - ], - "source": [ - "simple_download(url=\"http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt\", \n", - " destination=\"./glove.6B.100d.txt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:33.99 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n" - ] - } - ], - "source": [ - "embedder = GloVeEmbedder(load_path='./glove.6B.100d.txt',\n", - " dim=100, pad_zero=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1, 8, (100,))" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# output shape is (batch_size x max_num_tokens_in_the_batch x embedding_dim)\n", - "embedded_batch = embedder(str_lower(tokenizer(['Is it freezing in Offerman, California?']))) \n", - "len(embedded_batch), len(embedded_batch[0]), embedded_batch[0][0].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Mean GloVe embedder" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Embedder returns a vector per token while we want to get a vector per text sample. Therefore, let's calculate mean vector of embeddings of tokens. \n", - "For that we can either init `GloVeEmbedder` with `mean=True` parameter (`mean=false` by default), or pass `mean=true` while calling function (this way `mean` value is assigned only for this call)." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1, (100,))" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# output shape is (batch_size x embedding_dim)\n", - "embedded_batch = embedder(str_lower(tokenizer(['Is it freezing in Offerman, California?'])), mean=True) \n", - "len(embedded_batch), embedded_batch[0].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### GloVe weighted by TF-IDF embedder\n", - "\n", - "One of the possible ways to combine TF-IDF vectorizer and any token embedder is to weigh token embeddings by TF-IDF coefficients (therefore, `mean` set to True is obligatory to obtain embeddings of interest while it still **by default** returns embeddings of tokens." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.models.embedders.tfidf_weighted_embedder import TfidfWeightedEmbedder" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "weighted_embedder = TfidfWeightedEmbedder(\n", - " embedder=embedder, # our GloVe embedder instance\n", - " tokenizer=tokenizer, # our tokenizer instance\n", - " mean=True, # to return one vector per sample\n", - " vectorizer=tfidf # our TF-IDF vectorizer\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1, (100,))" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# output shape is (batch_size x embedding_dim)\n", - "embedded_batch = weighted_embedder(str_lower(tokenizer(['Is it freezing in Offerman, California?']))) \n", - "len(embedded_batch), embedded_batch[0].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Models" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.metrics.accuracy import sets_accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "# get all train and valid data from iterator\n", - "x_train, y_train = train_iterator.get_instances(data_type=\"train\")\n", - "x_valid, y_valid = train_iterator.get_instances(data_type=\"valid\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Models in python" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### SklearnComponent classifier on Tfidf-features in python" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:53.75 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v0.pkl\n", - "2019-02-12 12:14:53.75 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.linear_model:LogisticRegression from scratch\n" - ] - } - ], - "source": [ - "# initialize sklearn classifier, all parameters for classifier could be passed\n", - "cls = SklearnComponent(\n", - " model_class=\"sklearn.linear_model:LogisticRegression\",\n", - " infer_method=\"predict\",\n", - " save_path='./logreg_v0.pkl',\n", - " load_path='./logreg_v0.pkl',\n", - " C=1,\n", - " mode='train')" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:53.591 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.linear_model:LogisticRegression\n", - "2019-02-12 12:14:53.756 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v0.pkl\n" - ] - } - ], - "source": [ - "# fit sklearn classifier and save it\n", - "cls.fit(tfidf(x_train), y_train)\n", - "cls.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "y_valid_pred = cls(tfidf(x_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Text sample: I need seating at Floating restaurant in Tennessee for a group of 9\n", - "True label: ['BookRestaurant']\n", - "Predicted label: BookRestaurant\n" - ] - } - ], - "source": [ - "# Let's look into obtained result\n", - "print(\"Text sample: {}\".format(x_valid[0]))\n", - "print(\"True label: {}\".format(y_valid[0]))\n", - "print(\"Predicted label: {}\".format(y_valid_pred[0]))" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.982373308152345" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# let's calculate sets accuracy (because each element is a list of labels)\n", - "sets_accuracy(np.squeeze(y_valid), y_valid_pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### KerasClassificationModel on GloVe embeddings in python" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov.models.classifiers.keras_classification_model import KerasClassificationModel\n", - "from deeppavlov.models.preprocessors.one_hotter import OneHotter\n", - "from deeppavlov.models.classifiers.proba2labels import Proba2Labels" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:14:54.421 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 272: [initializing `KerasClassificationModel` from scratch as cnn_model]\n", - "2019-02-12 12:14:54.818 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 136: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, 15, 100) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, 15, 128) 38528 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, 15, 128) 64128 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, 15, 128) 89728 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, 15, 128) 512 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, 15, 128) 512 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, 15, 128) 512 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, 15, 128) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, 15, 128) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, 15, 128) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 128) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 128) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 128) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 384) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 384) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 38500 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 233,555\n", - "Trainable params: 232,573\n", - "Non-trainable params: 982\n", - "__________________________________________________________________________________________________\n" - ] - } - ], - "source": [ - "# Intialize `KerasClassificationModel` that composes CNN shallow-and-wide network \n", - "# (name here as`cnn_model`)\n", - "cls = KerasClassificationModel(save_path=\"./cnn_model_v0\", \n", - " load_path=\"./cnn_model_v0\", \n", - " embedding_size=embedder.dim,\n", - " n_classes=classes_vocab.len,\n", - " model_name=\"cnn_model\",\n", - " text_size=15, # number of tokens\n", - " kernel_sizes_cnn=[3, 5, 7],\n", - " filters_cnn=128,\n", - " dense_size=100,\n", - " optimizer=\"Adam\",\n", - " learning_rate=0.1,\n", - " learning_rate_decay=0.01,\n", - " loss=\"categorical_crossentropy\")" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "# `KerasClassificationModel` assumes one-hotted distribution of classes per sample.\n", - "# `OneHotter` converts indices to one-hot vectors representation.\n", - "# To obtain indices we can use our `classes_vocab` intialized and fitted above\n", - "onehotter = OneHotter(depth=classes_vocab.len, single_vector=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "# Train for 10 epochs\n", - "for ep in range(10):\n", - " for x, y in train_iterator.gen_batches(batch_size=64, \n", - " data_type=\"train\"):\n", - " x_embed = embedder(tokenizer(str_lower(x)))\n", - " y_onehot = onehotter(classes_vocab(y))\n", - " cls.train_on_batch(x_embed, y_onehot)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:22.184 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v0_opt.json]\n" - ] - } - ], - "source": [ - "# Save model weights and parameters\n", - "cls.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "# Infering on validation data we get probability distribution on given data.\n", - "y_valid_pred = cls(embedder(tokenizer(str_lower(x_valid))))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "# To convert probability distribution to labels, \n", - "# we first need to convert probabilities to indices,\n", - "# and then using vocabulary `classes_vocab` convert indices to labels.\n", - "# \n", - "# `Proba2Labels` converts probabilities to indices and supports three different modes:\n", - "# if `max_proba` is true, returns indices of the highest probabilities\n", - "# if `confidence_threshold` is given, returns indices with probabiltiies higher than threshold\n", - "# if `top_n` is given, returns `top_n` indices with highest probabilities\n", - "prob2labels = Proba2Labels(max_proba=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Text sample: I need seating at Floating restaurant in Tennessee for a group of 9\n", - "True label: ['BookRestaurant']\n", - "Predicted probability distribution: {'GetWeather': 4.443174475454725e-05, 'PlayMusic': 0.0002085473679471761, 'SearchScreeningEvent': 6.492184911621734e-05, 'BookRestaurant': 0.9995043277740479, 'RateBook': 0.00021818796813022345, 'SearchCreativeWork': 0.0013526129769161344, 'AddToPlaylist': 8.029041782720014e-05}\n", - "Predicted label: ['BookRestaurant']\n" - ] - } - ], - "source": [ - "# Let's look into obtained result\n", - "print(\"Text sample: {}\".format(x_valid[0]))\n", - "print(\"True label: {}\".format(y_valid[0]))\n", - "print(\"Predicted probability distribution: {}\".format(dict(zip(classes_vocab.keys(), \n", - " y_valid_pred[0]))))\n", - "print(\"Predicted label: {}\".format(classes_vocab(prob2labels(y_valid_pred))[0]))" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.982373308152345" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# calculate sets accuracy\n", - "sets_accuracy(y_valid, classes_vocab(prob2labels(y_valid_pred)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### SklearnComponent classifier on GloVe weighted by TF-IDF embeddings in python" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:22.961 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v1.pkl\n", - "2019-02-12 12:15:22.962 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.linear_model:LogisticRegression from scratch\n" - ] - } - ], - "source": [ - "# initialize sklearn classifier, all parameters for classifier could be passed\n", - "cls = SklearnComponent(\n", - " model_class=\"sklearn.linear_model:LogisticRegression\",\n", - " infer_method=\"predict\",\n", - " save_path='./logreg_v1.pkl',\n", - " load_path='./logreg_v1.pkl',\n", - " C=1,\n", - " mode='train')" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:44.521 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.linear_model:LogisticRegression\n", - "2019-02-12 12:15:46.59 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v1.pkl\n" - ] - } - ], - "source": [ - "# fit sklearn classifier and save it\n", - "cls.fit(weighted_embedder(str_lower(tokenizer(x_train))), y_train)\n", - "cls.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [], - "source": [ - "y_valid_pred = cls(weighted_embedder(str_lower(tokenizer(x_valid))))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Text sample: I need seating at Floating restaurant in Tennessee for a group of 9\n", - "True label: ['BookRestaurant']\n", - "Predicted label: BookRestaurant\n" - ] - } - ], - "source": [ - "# Let's look into obtained result\n", - "print(\"Text sample: {}\".format(x_valid[0]))\n", - "print(\"True label: {}\".format(y_valid[0]))\n", - "print(\"Predicted label: {}\".format(y_valid_pred[0]))" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9184765502045955" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# let's calculate sets accuracy (because each element is a list of labels)\n", - "sets_accuracy(np.squeeze(y_valid), y_valid_pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's free our memory from embeddings and models" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "embedder.reset()\n", - "cls.reset()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Models from configs" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "from deeppavlov import build_model\n", - "from deeppavlov import train_model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### SklearnComponent classifier on Tfidf-features from config" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "logreg_config = {\n", - " \"dataset_reader\": {\n", - " \"class_name\": \"basic_classification_reader\",\n", - " \"x\": \"text\",\n", - " \"y\": \"intents\",\n", - " \"data_path\": \"./snips\"\n", - " },\n", - " \"dataset_iterator\": {\n", - " \"class_name\": \"basic_classification_iterator\",\n", - " \"seed\": 42,\n", - " \"split_seed\": 23,\n", - " \"field_to_split\": \"train\",\n", - " \"split_fields\": [\n", - " \"train\",\n", - " \"valid\"\n", - " ],\n", - " \"split_proportions\": [\n", - " 0.9,\n", - " 0.1\n", - " ]\n", - " },\n", - " \"chainer\": {\n", - " \"in\": [\n", - " \"x\"\n", - " ],\n", - " \"in_y\": [\n", - " \"y\"\n", - " ],\n", - " \"pipe\": [\n", - " {\n", - " \"id\": \"classes_vocab\",\n", - " \"class_name\": \"simple_vocab\",\n", - " \"fit_on\": [\n", - " \"y\"\n", - " ],\n", - " \"save_path\": \"./snips/classes.dict\",\n", - " \"load_path\": \"./snips/classes.dict\",\n", - " \"in\": \"y\",\n", - " \"out\": \"y_ids\"\n", - " },\n", - " {\n", - " \"in\": [\n", - " \"x\"\n", - " ],\n", - " \"out\": [\n", - " \"x_vec\"\n", - " ],\n", - " \"fit_on\": [\n", - " \"x\",\n", - " \"y_ids\"\n", - " ],\n", - " \"id\": \"tfidf_vec\",\n", - " \"class_name\": \"sklearn_component\",\n", - " \"save_path\": \"tfidf_v1.pkl\",\n", - " \"load_path\": \"tfidf_v1.pkl\",\n", - " \"model_class\": \"sklearn.feature_extraction.text:TfidfVectorizer\",\n", - " \"infer_method\": \"transform\"\n", - " },\n", - " {\n", - " \"in\": \"x\",\n", - " \"out\": \"x_tok\",\n", - " \"id\": \"my_tokenizer\",\n", - " \"class_name\": \"nltk_moses_tokenizer\",\n", - " \"tokenizer\": \"wordpunct_tokenize\"\n", - " },\n", - " {\n", - " \"in\": [\n", - " \"x_vec\"\n", - " ],\n", - " \"out\": [\n", - " \"y_pred\"\n", - " ],\n", - " \"fit_on\": [\n", - " \"x_vec\",\n", - " \"y\"\n", - " ],\n", - " \"class_name\": \"sklearn_component\",\n", - " \"main\": True,\n", - " \"save_path\": \"logreg_v2.pkl\",\n", - " \"load_path\": \"logreg_v2.pkl\",\n", - " \"model_class\": \"sklearn.linear_model:LogisticRegression\",\n", - " \"infer_method\": \"predict\",\n", - " \"ensure_list_output\": True\n", - " }\n", - " ],\n", - " \"out\": [\n", - " \"y_pred\"\n", - " ]\n", - " },\n", - " \"train\": {\n", - " \"batch_size\": 64,\n", - " \"metrics\": [\n", - " \"accuracy\"\n", - " ],\n", - " \"validate_best\": True,\n", - " \"test_best\": False\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:52.310 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/valid.csv file\n", - "2019-02-12 12:15:52.310 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/test.csv file\n", - "2019-02-12 12:15:52.311 INFO in 'deeppavlov.dataset_iterators.basic_classification_iterator'['basic_classification_iterator'] at line 73: Splitting field <> to new fields <<['train', 'valid']>>\n", - "2019-02-12 12:15:52.314 WARNING in 'deeppavlov.core.commands.train'['train'] at line 108: \"validate_best\" and \"test_best\" parameters are deprecated. Please, use \"evaluation_targets\" list instead\n", - "2019-02-12 12:15:52.322 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:52.339 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:52.340 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v1.pkl\n", - "2019-02-12 12:15:52.341 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.feature_extraction.text:TfidfVectorizer from scratch\n", - "2019-02-12 12:15:52.389 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.feature_extraction.text:TfidfVectorizer\n", - "2019-02-12 12:15:52.493 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v1.pkl\n", - "2019-02-12 12:15:52.510 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:52.510 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.linear_model:LogisticRegression from scratch\n", - "2019-02-12 12:15:53.67 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.linear_model:LogisticRegression\n", - "2019-02-12 12:15:53.254 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:53.255 WARNING in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 295: Using NNTrainer for a pipeline without batched training\n", - "2019-02-12 12:15:53.256 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:53.257 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:53.258 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v1.pkl\n", - "2019-02-12 12:15:53.263 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:15:53.264 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:15:53.266 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:53.266 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:15:53.267 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:15:53.346 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:53.347 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v1.pkl\n", - "2019-02-12 12:15:53.352 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:15:53.352 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:15:53.354 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:53.354 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:15:53.355 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"accuracy\": 0.983}, \"time_spent\": \"0:00:01\"}}\n" - ] - } - ], - "source": [ - "# we can train and evaluate model from config\n", - "m = train_model(logreg_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:53.359 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:53.360 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v1.pkl\n", - "2019-02-12 12:15:53.366 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:15:53.367 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:15:53.368 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v2.pkl\n", - "2019-02-12 12:15:53.369 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:15:53.369 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n" - ] - } - ], - "source": [ - "# or we can just load pre-trained model (conicides with what we did above)\n", - "m = build_model(logreg_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['GetWeather']]" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m([\"Is it freezing in Offerman, California?\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### KerasClassificationModel on GloVe embeddings from config" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "cnn_config = {\n", - " \"dataset_reader\": {\n", - " \"class_name\": \"basic_classification_reader\",\n", - " \"x\": \"text\",\n", - " \"y\": \"intents\",\n", - " \"data_path\": \"snips\"\n", - " },\n", - " \"dataset_iterator\": {\n", - " \"class_name\": \"basic_classification_iterator\",\n", - " \"seed\": 42,\n", - " \"split_seed\": 23,\n", - " \"field_to_split\": \"train\",\n", - " \"split_fields\": [\n", - " \"train\",\n", - " \"valid\"\n", - " ],\n", - " \"split_proportions\": [\n", - " 0.9,\n", - " 0.1\n", - " ]\n", - " },\n", - " \"chainer\": {\n", - " \"in\": [\n", - " \"x\"\n", - " ],\n", - " \"in_y\": [\n", - " \"y\"\n", - " ],\n", - " \"pipe\": [\n", - " {\n", - " \"id\": \"classes_vocab\",\n", - " \"class_name\": \"simple_vocab\",\n", - " \"fit_on\": [\n", - " \"y\"\n", - " ],\n", - " \"level\": \"token\",\n", - " \"save_path\": \"./snips/classes.dict\",\n", - " \"load_path\": \"./snips/classes.dict\",\n", - " \"in\": \"y\",\n", - " \"out\": \"y_ids\"\n", - " },\n", - " {\n", - " \"in\": \"x\",\n", - " \"out\": \"x_tok\",\n", - " \"id\": \"my_tokenizer\",\n", - " \"class_name\": \"nltk_tokenizer\",\n", - " \"tokenizer\": \"wordpunct_tokenize\"\n", - " },\n", - " {\n", - " \"in\": \"x_tok\",\n", - " \"out\": \"x_emb\",\n", - " \"id\": \"my_embedder\",\n", - " \"class_name\": \"glove\",\n", - " \"load_path\": \"./glove.6B.100d.txt\",\n", - " \"dim\": 100,\n", - " \"pad_zero\": True\n", - " },\n", - " {\n", - " \"in\": \"y_ids\",\n", - " \"out\": \"y_onehot\",\n", - " \"class_name\": \"one_hotter\",\n", - " \"depth\": \"#classes_vocab.len\",\n", - " \"single_vector\": True\n", - " },\n", - " {\n", - " \"in\": [\n", - " \"x_emb\"\n", - " ],\n", - " \"in_y\": [\n", - " \"y_onehot\"\n", - " ],\n", - " \"out\": [\n", - " \"y_pred_probas\"\n", - " ],\n", - " \"main\": True,\n", - " \"class_name\": \"keras_classification_model\",\n", - " \"save_path\": \"./cnn_model_v1\",\n", - " \"load_path\": \"./cnn_model_v1\",\n", - " \"embedding_size\": \"#my_embedder.dim\",\n", - " \"n_classes\": \"#classes_vocab.len\",\n", - " \"kernel_sizes_cnn\": [\n", - " 1,\n", - " 2,\n", - " 3\n", - " ],\n", - " \"filters_cnn\": 256,\n", - " \"optimizer\": \"Adam\",\n", - " \"learning_rate\": 0.01,\n", - " \"learning_rate_decay\": 0.1,\n", - " \"loss\": \"categorical_crossentropy\",\n", - " \"coef_reg_cnn\": 1e-4,\n", - " \"coef_reg_den\": 1e-4,\n", - " \"dropout_rate\": 0.5,\n", - " \"dense_size\": 100,\n", - " \"model_name\": \"cnn_model\"\n", - " },\n", - " {\n", - " \"in\": \"y_pred_probas\",\n", - " \"out\": \"y_pred_ids\",\n", - " \"class_name\": \"proba2labels\",\n", - " \"max_proba\": True\n", - " },\n", - " {\n", - " \"in\": \"y_pred_ids\",\n", - " \"out\": \"y_pred_labels\",\n", - " \"ref\": \"classes_vocab\"\n", - " }\n", - " ],\n", - " \"out\": [\n", - " \"y_pred_labels\"\n", - " ]\n", - " },\n", - " \"train\": {\n", - " \"epochs\": 10,\n", - " \"batch_size\": 64,\n", - " \"metrics\": [\n", - " \"sets_accuracy\",\n", - " \"f1_macro\",\n", - " {\n", - " \"name\": \"roc_auc\",\n", - " \"inputs\": [\"y_onehot\", \"y_pred_probas\"]\n", - " }\n", - " ],\n", - " \"validation_patience\": 5,\n", - " \"val_every_n_epochs\": 1,\n", - " \"log_every_n_epochs\": 1,\n", - " \"show_examples\": True,\n", - " \"validate_best\": True,\n", - " \"test_best\": False\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:15:54.311 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/valid.csv file\n", - "2019-02-12 12:15:54.312 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/test.csv file\n", - "2019-02-12 12:15:54.313 INFO in 'deeppavlov.dataset_iterators.basic_classification_iterator'['basic_classification_iterator'] at line 73: Splitting field <> to new fields <<['train', 'valid']>>\n", - "2019-02-12 12:15:54.316 WARNING in 'deeppavlov.core.commands.train'['train'] at line 108: \"validate_best\" and \"test_best\" parameters are deprecated. Please, use \"evaluation_targets\" list instead\n", - "2019-02-12 12:15:54.319 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:54.335 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:15:54.337 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n", - "2019-02-12 12:16:14.207 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 272: [initializing `KerasClassificationModel` from scratch as cnn_model]\n", - "2019-02-12 12:16:14.548 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 136: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 100) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 25856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 51456 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 77056 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 235,475\n", - "Trainable params: 233,725\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n", - "/home/vimary/tensorflow/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.\n", - " 'precision', 'predicted', average, warn_for)\n", - "2019-02-12 12:16:14.932 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.1479\n", - "2019-02-12 12:16:14.932 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:14.933 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.1479, \"f1_macro\": 0.044, \"roc_auc\": 0.5499}, \"time_spent\": \"0:00:01\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 0, \"batches_seen\": 0, \"train_examples_seen\": 0, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:19.387 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9434\n", - "2019-02-12 12:16:19.388 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:19.388 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9375, \"f1_macro\": 0.9421, \"roc_auc\": 0.9938}, \"time_spent\": \"0:00:05\", \"examples\": [{\"x\": \"Please find me the work, Instrumental Directions.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What weather will it be in Battlement Mesa?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play theme by Yanni on Vimeo\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate the Beyond Black saga a one\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find the schedule for The Tooth Will Out at sunrise.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate Lords of the Rim zero stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"play an Masaki Aiba tune\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need a table for 5 at the restaurant I ate at last Oct.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"book a table at a restaurant in Lucerne Valley that serves chicken nugget\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"add the tune to my viajes playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play some thrash metal.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Need to find the soundtrack called Fire in the Valley\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Get Jump Down painting\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Is it chillier in Baconton KY\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find I Could Fall in Love.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Evolution and the Theory of Games gets a five out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Is Outlaw of Gor showing at thenearest movie house at 5 A.M.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate Pillar of Fire and Other Plays a three\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play something by Holly Cole on lastfm \", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need some ambient music. \", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate Steps two out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the movie times at the Loews Cineplex\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Find THUNDER IN THE EAST.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this album zero stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"play music from the sixties\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Find a television show called Twisted.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Show The Late Great Townes Van Zandt\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will it get hotter around elevenses in KS?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play a cohesive playlist for me\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will there be rainfall at one PM in Catahoula\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play the greatest record by Leroi Moore\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"this essay should get 1 of the points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"book a table in Connecticut in Robinette for one second from now\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook 1 of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"how's the weather going to be at fourteen o'clock in Falkland Islands\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"She me the Sons of Satan Praise the Lord picture\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Please book a Uncommon Grounds Coffeehouse restaurant \", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Make me a reservation at Illinois Central Railroad Freight Depot in Singapore with vickie rodriguez, lila reyes and ruby\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Is Love Is a Ball playing right now?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"The Far Side of the World chronicle deserves three out of 6 points.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Book a restaurant for one in AL.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"add tune to my instrumental funk playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate my current book 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Which films are playing at the closest movie house?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate this book series zero out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a painting called Beyond the Neighbourhood.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is Longwave going to be playing?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Show me Dangers of the Canadian Mounted\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"around here find movie schedule for films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want to see Teeny Little Super Guy at Malco Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Will it be windy in NM?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Locate the best pub in Apache Junction\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"party for 2 in Cleveland\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"find Dickinson Theatres showing From Bondage to Freedom\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add potje met vet to my electronic gaming playlis\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I'm looking for the movie called The Beast that Shouted Love at the Heart of the World.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play White Noise.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Find Just South of Heaven\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Put Al Jarreau on the ConcentraciГіn playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"add banking violence and the inner life today to my retro gaming playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"find a movie house with Colic: The Movie that is nearest\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I am looking for any creative work with the title of Journal of Pharmacy and Pharmacology\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"add the album by Cham to my Cloud Rap playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find an album called List of Re: Hamatora episodes.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}], \"epochs_done\": 1, \"batches_seen\": 224, \"train_examples_seen\": 14295, \"loss\": 1.386025922106845}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9434, \"f1_macro\": 0.9427, \"roc_auc\": 0.9965}, \"time_spent\": \"0:00:05\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 1, \"batches_seen\": 224, \"train_examples_seen\": 14295, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:21.734 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9515\n", - "2019-02-12 12:16:21.735 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:21.735 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9688, \"f1_macro\": 0.9623, \"roc_auc\": 0.999}, \"time_spent\": \"0:00:08\", \"examples\": [{\"x\": \"She me movie times\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I'd like a table in a smoking room in a taverna on sep. 23, 2023\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"find a movie called No More Sadface\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"A rating of 5 of 6 points goes to Dickson McCunn trilogy\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"For the book The Mirrored Heavens I give one of a possiable 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Patagonia, South Africa?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Where can I watch the trailer for Home Economics\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Book a restaurant on san jacinto day in Anderson for me and my colleagues.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What time is The Man Who Dared playing at the movie theatre?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What will the temperature be at midnight in NE\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I need a reservation for two at a diner in Venezuela\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me the forecast for the distant area of ME at three pm\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I want to eat at the Trout Creek restaurant for 9 people for bougatsa that is the best\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current novel four out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What will the weather be at Noon in Durbin OH?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the album Follow That Camel\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Can you play some Andrew Cash music on Slacker\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need a table right now for four in ME\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"play Peja tunes\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"add this track by clem burke to my atmospheric black metal playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to play the game Show Me the Wonder\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need a table for four at a restaurant in AL\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"what is the forecast for Orienta for hotter weather\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"when are animated movies playing at Goodrich Quality Theaters\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Will there be a blizzard in Egypt?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find me the soundtrack called Enter the Chicken\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"find the video game called Turnin Me On\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Check movie schedules and find which animated movies are being aired in the neighborhood\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"renee sanders, marlene and jewel want to go to a gastropub in the spa\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I want to watch The Original Recordings\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"book a table in Fort Loudon at a restaurant for 5\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"in 1 week is there going to be a depression in Washington\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the schedule for Grand Canyon Trail.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Where Can I watch Chaos and Desire?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add the album to the Six string peacefulness playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What movies are playing at the closest cinema\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Search for the Halfway Home TV show\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a painting called The Book of Folly.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What is the weather forecast here?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play In The Arms Of God on Zvooq by Nimal Mendis\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it get colder in Cape Fair\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play some seventies track from top Rie Tomosaka\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be temperate near Neylandville\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play the most popular album on Google Music by sasu ripatti\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate the book series Sons of Destiny a five\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What's the movie schedule for B&B Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Show the movie schedules at KB Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Put the bill berry track on elrow Guest List\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Tell me the weather forecast in 4 years and a half in GA\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"will the weather be warm far from Niger at 15 o'clock\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"find the closest cinema for films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I am giving this current book album 0 out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add song to my Pop Brasil\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find a reservation at a brasserie restaurant nearby SC for a party of ten\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"can you find Leadership in my library, please?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"give four stars out of 6 to current book\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Let's hear something from Elena Risteska\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Include dschiwan gasparjan in beth's rare groove playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Book a table at a brasserie type restaurant that serves jain for a party of 8\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Book PM Park, Clear Lake, Iowa at 5 am for 6 people.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Tell me the weather forecast for Molino, Washington\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I want to add a tune to my spanish metalblood playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Can you find me the Back When I Knew It All album?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate the current novel four of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 2, \"batches_seen\": 448, \"train_examples_seen\": 28590, \"loss\": 1.2655515175844942}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9515, \"f1_macro\": 0.9509, \"roc_auc\": 0.9973}, \"time_spent\": \"0:00:08\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 2, \"batches_seen\": 448, \"train_examples_seen\": 28590, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:24.94 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9553\n", - "2019-02-12 12:16:24.94 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:24.95 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9844, \"f1_macro\": 0.9859, \"roc_auc\": 0.9998}, \"time_spent\": \"0:00:10\", \"examples\": [{\"x\": \"find the trailer for Hit the Ice\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Can I get the movies showtimes for the closest movie house.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want to give this book zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"give The Creator zero points out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find the movie schedules for Cineplex Odeon Corporation.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Get soundtrack of Comprehensive Knowledge Archive Network\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"play Pandora tunes from the fourties\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"A Sport and a Pastime is a solid 5 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I give Life During Wartime a one out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Rate this essay two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What is the cloud coverage in my current place\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the movie schedule for animated movies in the neighborhood.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add ali lohan songs in Club Hits\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play some alternative music on Vimeo\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Tell me if it'll be freezing here in 21 seconds\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add inconfundible to the piano in the background playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Are there any animated movies playing in the neighborhood?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Showtimes for animated movies in Malco Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Show me the movie schedule for movies opening today close by\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate this album a 3\\n\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Can you add this tune to the night out playlist?\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"rate this textbook 3 out 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Rate the Michel Strogoff saga four of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to listen to the soundtrack Bed of Roses\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What weather will HI have will there be hail twenty one minutes from now\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"what is the forecast for in 1 second at Monte Sereno for freezing temps\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"give Jackass Investing a three\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Give this album a three\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"play 1951 tunes\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Can you please find me Season of Glass?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"play Going Down To The River on Pandora\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play 2004 on pandora\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Add radhae unakku kobam aagathadi to my Women of Metal playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play Alone, Again from Mike Viola\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"For the current essay I rate 1 out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"will there be a cloud next year in Kewanee\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"what is the weather of Sri Lanka\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I give the Knife of Dreams saga a 0 of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to give The Plague Lords of Ruel 0 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Can I see Standing on the Edge of the Noise in the nearest cinema\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Play the top music from Epic Mazur.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play Suite Sudarmoricaine by Afi on itunes\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a reservation for 6 at a restaurant in Deersville\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"play laura love songs from 1959\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"what is the MT forecast for 22\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play the top caleigh peters.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate this novel five points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"book a table for me and bettye at Washington, D.C. Jewish Community Center in Montana\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"add Puzzles Like You in my playlist Reggae\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"book for 3 in U.S. Virgin Islands\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play the top fifty record from Alan Jardine\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a reservation for three at a top-rated sicilian restaurant in Portugal\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"book cornelia and bettie a table at a brasserie restaurant in Colombia\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Will it get warmer in Berkley\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"add the artist to my emotron playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add in the heart of the world to the Epic Gaming playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find me the movie times\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you give me a local and current movie schedule \", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current chronicle 2 stars.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add the name the magnificent tree to playlist this is Rosana\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play a new ballad by Valy on Iheart\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book spot for 7 at NH Theressa\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Add Jermaine Fagan to spring music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I need a table for 7 people at a bar that specialises in being a protein bar.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}], \"epochs_done\": 3, \"batches_seen\": 672, \"train_examples_seen\": 42885, \"loss\": 1.2179965520543712}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9553, \"f1_macro\": 0.9546, \"roc_auc\": 0.9977}, \"time_spent\": \"0:00:10\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 3, \"batches_seen\": 672, \"train_examples_seen\": 42885, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:26.435 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9566\n", - "2019-02-12 12:16:26.435 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:26.436 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9531, \"f1_macro\": 0.9521, \"roc_auc\": 0.999}, \"time_spent\": \"0:00:12\", \"examples\": [{\"x\": \"Book a northeastern brazilian restaurant for 10 am\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate The Life and Loves of a She-Devil 5 out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"what is the forecast for Montana at dinner\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Where is The Toxic Avenger II playing\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Play some music on Last Fm\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Tell me the weather forecast one year from now in Kulpsville, Togo\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"show freezing forcast now within the same area in North Dakota\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play some music from 1993 on Itunes.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Which animated movies are playing at the nearest movie house?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want a table for 4 in Florida\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"book spot for my mother in law and I at 18 o'clock\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play some House music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a spot for me and sonja at a popular pizzeria\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I'm in the mood to listen to meditative music.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play a new tune by louis silvers.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate this series 5 out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to listen to the soundtrack Bed of Roses\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will the sun be out close-by Admiralty Island National Monument?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play Wynton Kelly music on Netflix sort by popular\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate Who Moved My Cheese? a one\\n\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Please find me Glass Cloud – Single.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Show me The Courts of Chaos\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Look for Hail Satanas We Are The Black Legions\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play the greatest soundtrack by Nhat Son on Last Fm.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Can you find me the trailer of the Hippocratic Oath?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"what movies are at the nearest movie house\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Play Barbra Streisand music from 1997.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Please use pandora to play a record from 1993\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play the theme music from 1963 by Yuki Koyanagi\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy in Tequesta?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Weather for Coaldale Arkansas \", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Where can I find the album The Adventures of Lolo II\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add Buddy DeSylva to my this is j balvin playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie times for movies premiering in the neighbourhood \", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Curse song to my playlist Guest List Engadget\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"book a table 1 year from now for corinne, tisha and I at a restaurant in Guernsey that is top-rated\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I'd like to watch Wish You Were Dead\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Add to my list the tune summer of love\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to listen to Roger Daltrey from the sixties on slacker\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate this novel a two\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"book me a reservation at a highly rated tavern in Hornersville\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Give the current chronicle 2 stars.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Show me the Caribbean Blue television show\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play a song from the thirties by Bruno Pelletier\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Open Vimeo and play music.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate the current album a 5 out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Book me a table for one at Blue Ribbon Barbecue\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I need to book a restaurant with a smoking room in AL\\n\\n\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Will it be hotter neighboring ME on august eighteenth, 2025?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Will it get colder in Alaska?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Will it snow in AMy\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the schedule for Vanishing of the Bees at a movie house.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"find To Each His Own Cinema, an album\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"add Funtwo to disco fever track\\n\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Can you play some music from my road trip album\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Can I get the movie schedule for the Bow Tie Cinemas.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What is the movie schedules for films in the neighborhood\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate this book a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Show me the album Til the Morning\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add album to my Country Hits\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"give one rank to this album\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add this Roy Orbison song onto Women of Comedy\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play the newest Roger Troutman track possible\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a restaurant at sixteen o'clock in SC\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}], \"epochs_done\": 4, \"batches_seen\": 896, \"train_examples_seen\": 57180, \"loss\": 1.1904606600957257}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9566, \"f1_macro\": 0.9559, \"roc_auc\": 0.9978}, \"time_spent\": \"0:00:12\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 4, \"batches_seen\": 896, \"train_examples_seen\": 57180, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:28.776 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9585\n", - "2019-02-12 12:16:28.776 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:28.777 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9688, \"f1_macro\": 0.9702, \"roc_auc\": 1.0}, \"time_spent\": \"0:00:15\", \"examples\": [{\"x\": \"Play Pandora on Last Fm\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"add this artist to my SinfonГ­a Hipster\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play some movement by Franky Gee\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"When and where is Nefertiti, Queen of the Nile playing?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What movies are playing at Loews Cineplex?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Include hohenfriedberger marsch to my Novedades Pop list.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find movie schedules at IMAX Corporation\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate A Tale of Love and Darkness 0 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play ClГЎsicos del Hip Hop EspaГ±ol\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book for 10 in a restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the Return to Grace saga\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What is the movie schedule 1 second from now\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add tune to my this is animal collective\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play new track from the fifties\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"add Blag Dahlia to Pura Vida\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Book a restasurant in Pohick Delaware.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"show creative photograph of Icewind Dale: Heart of Winter\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I want to book a delicatessen serving testaroli in Somalia for 7/25/2027.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Book spot in Fults in Federated States Of Micronesia\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"can you find me a showing for Before the Music Dies in one second ?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"find Remedial Chaos Theory, a soundtrack\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Show me the photograph A Woman from the Street\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add this artist to my This Is Philip Glass playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"i need to book a table for three in Lesotho\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Book me a restaurant reservation at 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Need a table for sep. first in Haiti for a party of three\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"book a restaurant for three on feb. 18\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I'd like to watch movies at Amco Entertainment\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"play the Gary Chaw album\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Add this tomoyasu hotei song to my concentraciГіn playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate the current textbook 1 of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to listen to Space music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"find Dickinson Theatres showing From Bondage to Freedom\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"will it rain on Jan. 18th, 2029 in Kanopolis Arkansas\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the schedule for animated movies nearby\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"find Plitt Theatres movie schedules\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Add this tune by Rafet el Roman to my Latin Pop Rising playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Book a reservation for a southern brazilian restaurant for 10 people within walking distance of Broadway-Lafayette St\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play music from the list Indie Electronics\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Give this book a rating of four out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"my feelin' good playlist needs some Mai Selim in it. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like to book a restaurant in Poncha Springs for 8 at 00:32 am\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate this current essay a 5.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a restaurant for marylou and I within walking distance of my mum's hotel\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"find a movie theatre showing The Tailor of Panama\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show creative game Elements of Life: Remixed\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play the best music by Arthur Johnston.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"add Diarios de Bicicleta to my la la playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to add michelle heaton to this is chopin\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Is it going to be warm here for brunch\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add an album to my week end playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please play something that's freak folk on Google Music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play the most popular Johnny Clarke on Deezer.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Please find me the work, Instrumental Directions.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find me the novel called Ressha Sentai ToQger\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"what animated movies are at the nearest movie house\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Play breed the killers on Itunes\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a restaurant that serves rolled oyster in Merkel\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What will the humidity be like on june eighteenth in my current location\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Tell me when it'll be cloudy in Woodport\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"show movie times\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what time is Bordertown Trail showing\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a table for one in a bar serving saucisse for meal in Calistoga CO\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Will there be snowfall in KY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}], \"epochs_done\": 5, \"batches_seen\": 1120, \"train_examples_seen\": 71475, \"loss\": 1.1707027325672763}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9585, \"f1_macro\": 0.9579, \"roc_auc\": 0.998}, \"time_spent\": \"0:00:15\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 5, \"batches_seen\": 1120, \"train_examples_seen\": 71475, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:31.141 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9604\n", - "2019-02-12 12:16:31.141 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:31.142 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9844, \"f1_macro\": 0.9808, \"roc_auc\": 0.9935}, \"time_spent\": \"0:00:17\", \"examples\": [{\"x\": \"Will it be freezing on 4/20/2038 in AMerican Beach NC\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"put live and rare into dancehall official\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What is the weather like in Wyatte\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate The Descendants two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find the movie schedule for animated movies in the neighbourhood.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What time is The Bride’s Journey playing at Star Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Help me find the saga titled The Eternal Return\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"add iemand als jij to my playlist named In The Name Of Blues\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Make me a reservation in Hardesty at a joint the is indoor\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play a song from 2003\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"play Disney Sing It! – High School Musical 3: Senior Year\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I want to book a restaurant in 40 weeks in Iowa.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Will it be hot in Keachi\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate this textbook a one\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What time is A Man for Burning playing\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate this book three points out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"give Heartland chronicle four points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Turn on Spotify to Tiny Tim ep\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"The Postman is awful and only gets a 1 out of 6. \", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Will the weather this week be warmer in Crystal River?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate this novel four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"In Wynnedale AK will it blizzard\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Please give me the movie schedule for Pacific Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Find me the novel of A Dictionary of Slang and Unconventional English\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Tell me when sunrise is in Tennessee\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Please search for Columbia Records 1958–1986.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find the movie schedule close by\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you play some music by andrew diamond\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Is The Strange Case of the End of Civilization as We Know It playing at the movie theatre\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate The Astonishing Life of Octavian Nothing, Traitor to the Nation, Volume II: The Kingdom on the Waves series 2 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to see Wenn Lucy springt now at a movie theatre.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"give 0 out of 6 points to current book\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What will the weather be in Rwanda?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"add the artist a j pero to my Country Gold playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"For this series I give the rating of four of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Locate the Koi to Senkyo to Chocolate television show\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What's the forecast for Pipe Spring National Monument?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"this winter, meredith, betty and erika want to food at a gastropub that is in the same area as fran's location.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Give me the schedule now at the nearest movie house\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want to book a restaurant in Reily VT.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"How cloudy is it in Morrisonville, Kentucky\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play some noise music on Netflix.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate this textbook 4 out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What time are movies showing at Megaplex Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a spot at a highly rated afghan restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What time is The Bride Wore Boots playing\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I give Ruled Britannia a rating of five out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to play the game The Carny\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"add agua y sal in Classic Jazz Funk\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Give the current album 1 star\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a book called The Mad Magician\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What's the weather in Gabon\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Can you play a top song from a chyi chin concerto\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play the album 21st Century Live by Chet Lam on Itunes.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need some Hardcore Hip Hop\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Put gregory douglass in Halloween Teens please\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find the movie schedule for ArcLight Hollywood.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Book something for my girlfriend and I at a food truck that has pizzas in Brookwood on October fifteenth, 2020\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Can you put Musiri Subramania Iyer's song onto the lo-fi love soundtrack?\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"what is the weather forecast for Cuba at eleven am\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Go to the movie The Best of Pirates of the Mississippi\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"search for a photograph of Road Hogs\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Give The Turning Point a 0 out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find the movie schedule\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}], \"epochs_done\": 6, \"batches_seen\": 1344, \"train_examples_seen\": 85770, \"loss\": 1.153284895100764}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9604, \"f1_macro\": 0.9598, \"roc_auc\": 0.9981}, \"time_spent\": \"0:00:17\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 6, \"batches_seen\": 1344, \"train_examples_seen\": 85770, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:33.547 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9622\n", - "2019-02-12 12:16:33.548 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:33.548 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9375, \"f1_macro\": 0.9374, \"roc_auc\": 0.997}, \"time_spent\": \"0:00:19\", \"examples\": [{\"x\": \"How much wind will there be in NM on november 11th\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"find The Many Loves of Dobie Gillis\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Put Jazzy B on Lazy Chill Afternoon playlist\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What time is The Bride from Hell playing at Malco Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I am giving the book After Henry a rating of 0 out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I need to add an artist to one of my playlists, Classical New Releases Spotify Picks.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Will it be warm here in one hour\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Is it freezing in Kelso\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"give The Story of the Last Thought a five\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add Nazad, nazad, Kalino mome to Escapada\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add outside the dream syndicate to millicent's fresh electronic playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate this album two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Book a reservation for me and my step sister in Nebraska in two seconds\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I want to play music from 1979 on Groove Shark.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a restaurant for tortelloni for eight\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Add Shooter Jennings to the All Out 70s playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"find the book Metallica Through the Never\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add Hanging On to my just dance by aftercluv playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play a good John Maher record with Netflix\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I want to rate my current book three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Which animated movies are showing close by?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give 4 stars to the current essay\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to see Sympathy for the Devil\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"She me movie times at Mann Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Looking for the saga called The Scofflaw\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will there be a snowstorm in Pomona, New Mexico?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"What will the weather be in Deer River?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"What's the weather in South Punta Gorda Heights\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find movie times for Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Make me a reservation in Colorado at nine am at National Cash Register Building\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play Elitsa Todorova music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"add this artist to the playlist cool jazz\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"rate this album one out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What will the weather be like at my current spot on january the 19th\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Please look up the television show, Noel Hill & Tony Linnane.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Can you put this tune onto Latin Dance Cardio?\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate current novel two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play some sixties on netflix\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Find a soundtrack called The Dragon.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play Trace Adkins' music from the thirties.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Can i see the Boat People?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add the song to my R&B Movement playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"When is Robotix playing?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Find me An Echo in the Darkness\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Where can I watch the television show called Fangs of the Arctic?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"rate this book 5 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"give one out of 6 stars to Free Market Fairness\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add nina hagen to essential folk\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play The Edge by Deezer on Vans Warped Tour Compilation 2003\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need a reservation for January 9 at a restaurant that serves souvlaki nearby Cypress Av for a party of 1\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find time for College Rock Stars at any movie theatre\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Elvis Presley and America in my playlist Electro Workout\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Give one 6 stars to this book\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add Michael Hayvoronsky to Lo Que Suena Los Angeles\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"How do I rate this book 4 stars?\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"want to eat somewhere windy in NM\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Open Netflix and find a movie with the song heartful\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"What film is playing nearby\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Put any 1972 record on.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate the Beyond This Place chronicle three of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"will Dick Tracy e il gas misterioso start twenty one hours from now\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"play a sound track by Vegard Sverre Tveitan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play the song Victim Of Changes from Hawkshaw Hawkins on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need to book a restaurant for eight nearby Limerick one year from now that serves jerky \", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}], \"epochs_done\": 7, \"batches_seen\": 1568, \"train_examples_seen\": 100065, \"loss\": 1.1379986719361372}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9622, \"f1_macro\": 0.9617, \"roc_auc\": 0.9982}, \"time_spent\": \"0:00:19\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 7, \"batches_seen\": 1568, \"train_examples_seen\": 100065, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:35.979 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 169: Did not improve on the sets_accuracy of 0.9622\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9844, \"f1_macro\": 0.9849, \"roc_auc\": 1.0}, \"time_spent\": \"0:00:22\", \"examples\": [{\"x\": \"can you get me the trailer of The Multiversity?\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find the films at ArcLight Hollywood.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Will the weather be temperate 22 minutes from now in Alba\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I'm looking for a picture titled Rock Painting\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"What's the weather forecast for Haigler?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Plpay my Disco Fever playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Add artist to playlist Epic Gaming\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me Rapid City Muscle Car.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find the schedule for Evening Clothes in 1 second.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add pete shelley to Is It New Wave\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play some fifties tunes by Mike Mccready\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"play the new Feist on deezer\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Find a show called Ichibyōgoto ni Love for You.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play some Rockwell from around 1996\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Add Jarvis Cocker to my Chillin' on a Dirt Road playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please play With Echoes In The Movement Of Stone by Faith Evans.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Is there a blizzard in Tennessee Colony, KS\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"What is the local movie schedule\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want to listen to Swing music on Iheart\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"how's the forecast for my current spot\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"my step aunt and I want to go cheese fries at the tavern\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play me something by Funtwo\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I need to add a tune by Amanda Stern to the playlist cloud rap.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please get me the Before Crisis: Final Fantasy VII television show.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Book a popular bar in Chowchilla\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the TV series I Build the Tower \", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find the schedule for Metallica Through the Never.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show creativity of Doomsday Comfort\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"is it going to be foggy in Jewell Cemetery State Historic Site 7 weeks from now\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play Punk Essentials on Zvooq\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Which films are playing at the closest cinema?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I want to find the video game Masada Anniversary Edition Vol. 3: The Unknown Masada\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"rate this album one stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Is The Right to Strike playing at Star Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"play Latin Dinner\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Rate this current essay a 5.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to hear that tune from 2010\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play thirties concerto music on Google Music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"show movie times at sunrise\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please look up for the work titled We Own The Night.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Play a song from Helena Iren Michaelsen on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"rate the current book two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play Elizeth Cardoso to my Nothing But A Party R&B playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Play the most popular music by Ronald Isley on Google Music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I want to hear something eclectic\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Go to the saga The Quantum Thief\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I'd like to see weather conditions for Ennis.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play some songs from the fifties\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"play By The Sleepy Lagoon by Greg Kurstin\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I want to book a restaurant in the same area where I live in MA for ebony and yolanda.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What is the weather forecast for Agate Fossil Beds National Monument\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"add this track to my global funk\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play Zvooq\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"I'd like to rate this textbook 4 out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Book a reservation for one at a highly rated restaurant in Datil\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"play a symphony that is good from 2000\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"What is the movie schedule today at Neighborhood Cinema Group?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I'd like to eat salads at a restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"find Plitt Theatres movie schedules\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"I think this novel only deserves 2 points out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Is it rainy at the Edward L. Ryerson Conservation Area?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Will it be chillier on october 17 nearby East Glacier Park\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Give the current series a rating of three.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"book an oyster bar in AMerican Samoa for lunch\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}], \"epochs_done\": 8, \"batches_seen\": 1792, \"train_examples_seen\": 114360, \"loss\": 1.127019821533135}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9622, \"f1_macro\": 0.9617, \"roc_auc\": 0.9983}, \"time_spent\": \"0:00:22\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 8, \"batches_seen\": 1792, \"train_examples_seen\": 114360, \"impatience\": 1, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:38.311 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 163: New best sets_accuracy of 0.9629\n", - "2019-02-12 12:16:38.312 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 165: Saving model\n", - "2019-02-12 12:16:38.312 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 386: [saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/cnn_model_v1_opt.json]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 0.9844, \"f1_macro\": 0.9837, \"roc_auc\": 0.9983}, \"time_spent\": \"0:00:24\", \"examples\": [{\"x\": \"Rate my current essay 1 out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What's the weather in FL?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play me some Dom Pachino\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Is cloudy in Lyncourt?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Is temperature in Hanksville freezing ?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play some Bertine Zetlitz record\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"play latest George Ducas music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"For The Curious Incident of the Dog in the Nightdress I rate it 2 of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"book verdure serving restaurant in Bloom City\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I want another song in my rock espaГ±ol playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Give the current essay five points / 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play show of Cissy Houston\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add the artist Gwenno Pipette to the sexy as folk playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I rate Egg Collecting and Bird Life of Australia a zero out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Book me a tibetan restaurant for my boss and I.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I give The Logic of Sense a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate The Ape-Man Within 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"play In The Disco by Danny Hutton\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"What is the weather like right now for Fort Adams?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add Grey Cloudy Lies to the hip hop playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I'd like to hear the song In a Reverie\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add por tu maldito amor to my orgullo gay\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Where can I buy The Lying Game\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"rate the current novel 0 of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Please find the movie, A Jingle with Jillian.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I want to find a restaurant that has a table for two at 5 AM\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"tell me how Bellwood weather is\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find a table for madge and tami at a faraway joint on Sterling St that serves chicken divan\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What cinema has the closest movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give The Blue Equinox series 5 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add this album to my hot house playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I need a table booking for a highly rated sardinian pub.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Can you put some monifah on my disco fever playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I'd like to see movie schedules for animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is F.I.S.T. at Malco Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What is the weather going to be like in Klondike Gold Rush National Historical Park on february the 28th, 2034?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"show creative picture of The Secret Doctrine\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Book a restaurant in CA for my parents and I on oct. the seventeenth\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Need to find the TV series called Administrative Behavior\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find the work I Looked Up\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"rate this album book zero out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to listen to Merrily We Roll Along by Marko Desantis.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Play the album entitled Se Potrei Avere Te.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Please help me find the video game John Michael Montgomery discography.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Can you find the album SimpleScreenRecorder\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will it be warm in Kipp Rhode Island one hour and 9 seconds from now?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find the movie times at Bow Tie Cinemas.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Add Gary Valenciano to the power gaming playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to the Leche con Chocolate playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"show me the movie times in the neighbourhood\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give The Street five points.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want to book a restaurant in Ayer for 2 people.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the Endangered Species song\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"book a gibassier serving tavern in Vermont for nine\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Book me a restaurant reservation for a party of 8 ten hours from now\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Is it warm in Albania at noon\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Is it going to get any hotter in Kerrick?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"book a table for ten in Pollock PA\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"book Guenther House for 6 on Oct. 24, 2035 in Waddy\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"play my melodious playlist\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Add Hallucinations of Despair to my this is trey songz playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I need to book a restaurant in Burkettsville in 2 years for rhoda adams, roxanne and I\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"give me the local movie times\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"open Itunes and play Kenny Cox most popular concerto\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}], \"epochs_done\": 9, \"batches_seen\": 2016, \"train_examples_seen\": 128655, \"loss\": 1.1169953814574651}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9629, \"f1_macro\": 0.9623, \"roc_auc\": 0.9983}, \"time_spent\": \"0:00:24\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 9, \"batches_seen\": 2016, \"train_examples_seen\": 128655, \"impatience\": 0, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:16:40.661 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 169: Did not improve on the sets_accuracy of 0.9629\n", - "2019-02-12 12:16:40.693 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:16:40.693 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"train\": {\"eval_examples_count\": 64, \"metrics\": {\"sets_accuracy\": 1.0, \"f1_macro\": 1.0, \"roc_auc\": 0.9996}, \"time_spent\": \"0:00:26\", \"examples\": [{\"x\": \"book in town for 3 at a restaurant outdoor that is not far\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Need a table for the day after tomorrow in Clarenceville at the Black Rapids Roadhouse\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What will the weather be like this tuesday in the area neighboring Rendezvous Mountain Educational State Forest?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate The CIA and the Cult of Intelligence a 5.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Is the forecast windy in Nigeria on Nov. the 6th\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Book the nearby Meriton Grand Hotel Tallinn in Missouri.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"please give me the movie schedule\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Book a reservation for 4 for Cherry Hut at Noon\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"I think Memorial Day should have a rating value of 3 and a best rating of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Play some G. V. Prakash Kumar\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"is The Clowns at the nearest cinema \", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"when is Letters from a Porcupine showing at Alamo Drafthouse Cinema\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Play some fun-punk\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book Guenther House for 6 on Oct. 24, 2035 in Waddy\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"what is the forecast in North Carolina\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"what's the movie schedules for in the neighborhood at the movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Book a highly rated food court for 2 people on jul. 4th.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Can you put freddie freeloader on the playlist instrumental madness\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please get me the Just the Hits 2 TV show.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Book a reservation for seven people at Fraser Mansion in IL\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Use Spotify to play Who Was In My Room Last Night?\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"What is the forecast in Lono\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I'd like to watch Sherlock Holmes à New York at KB Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Let's listen to the most popular Marty Friedman songs on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"weather in Tioga Colorado\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I give The Monkey and the Tiger a rating of 2 points.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find WxHexEditor.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Book a table for my granddaughter and I at the highly rated restaurant that is close by in Tuvalu.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Add The Maid of Amsterdam to my 80s smash hits\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"find the picture Louder Than Bombs\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will it rain in Barberville\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find a show called Ichibyōgoto ni Love for You.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Will it be warmer near here on jan. the fifteenth?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate The Travels of Lao Can five out of 6\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What time is Sontha Ooru playing\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate The Hindus: An Alternative History 3 of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"play latest George Ducas music\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"find movie schedules for Dickinson Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Rate this book a five \", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find me a restaurant in Pembine Montana\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"What is the movie schedule right now for movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is it going to be foggy at two am in Barberville\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"What will be the forecast for Belarus in the future around sep. the 22nd, 2020?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"I need to find the work Brotherly Love\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find me the Spartan: Total Warrior painting\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I'd like to see the television show Best-Of: Design of a Decade 2003–2013\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add this album to my spotify orchestra cello playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Give this textbook a rating of three.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Give The Irish Filmography saga a rating of 2 out of 6.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What is the weather forecast for Theodore Roosevelt Inaugural National Historic Site\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"play Isaac Hayes on Pandora from love, sweat and beer ep\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be colder in Oswego 16 weeks from now ?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"What is the movies playing at North American Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"What will the weather be a nine in Willow River State Park?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Find me the photograph The Late Music\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Get me a Johnny Cool photograph\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"add artist my laundry playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Is the forecast colder in Idaho 1 second from now\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"book a spot for 3 at the pizza place\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find me a showing of The Vanquished that starts nine hours and 1 second from now.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please help me find the video game John Michael Montgomery discography.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Search for To Heart 2\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Add Nightmares That Surface from Shallow Sleep to michael's Rock Solid playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Find a show called The Inheritors\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}], \"epochs_done\": 10, \"batches_seen\": 2240, \"train_examples_seen\": 142950, \"loss\": 1.10787156862872}}\n", - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9629, \"f1_macro\": 0.9623, \"roc_auc\": 0.9983}, \"time_spent\": \"0:00:27\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}], \"epochs_done\": 10, \"batches_seen\": 2240, \"train_examples_seen\": 142950, \"impatience\": 1, \"patience_limit\": 5}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:17:00.634 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 302: [initializing `KerasClassificationModel` from saved]\n", - "2019-02-12 12:17:00.963 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 312: [loading weights from cnn_model_v1.h5]\n", - "2019-02-12 12:17:01.131 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 136: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 100) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 25856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 51456 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 77056 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 235,475\n", - "Trainable params: 233,725\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n", - "2019-02-12 12:17:01.431 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:17:01.431 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9629, \"f1_macro\": 0.9623, \"roc_auc\": 0.9983}, \"time_spent\": \"0:00:01\", \"examples\": [{\"x\": \"Book a table at Carter House Inn in Saint Bonaventure, Alaska.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Rate the current textbook one of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find a nearby movie schedule for movies\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"what is the Mississippi for the week\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Play me a song from 1968 on Spotify\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Book a table for me, naomi and elisabeth at a brasserie with wifi\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"The current album gets three out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"find Goodrich Quality Theaters films\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"show me the picture Unfinished Monkey Business\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"When is The Third Eye showing at Dickinson Theatres?\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Please get me the Welcome to the Rileys game.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Find a song called Bronco Billy.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"Rate this essay five stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"add tune to my relax & unwind playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"play 2007 tunes by Bunny Berigan\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"book a table for ten downtown at a close-by restaurant\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Find the schedule for for Corn at eleven A.M. at Loews Cineplex Entertainment.\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"1 minute from now, I will need reservations at a restaurant in Vanlue.\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Play hanging in the balance by Nik Kershaw on Zvooq.\", \"y_predicted\": [\"PlayMusic\"], \"y_true\": [\"PlayMusic\"]}, {\"x\": \"Will it be windy at 4 Pm in NY?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate my current textbook 1 out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"What are the weather conditions in Noel?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Add this artist to the laugh list\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I am rating Book of Challenges four stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"rate this textbook a 4\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Put an album by max richter into my this is Rosana playlist. \", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"where can i watch animated movies around here\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Is A Man, a Woman, and a Bank showing in the nearest Neighborhood Cinema Group\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"book a popular food truck in Kentucky\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"Show me animated movies that are playig at Great Escape Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"add Sara Carter to my Nothing But A Party R&B\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I would like an outdoor cafeteria for 3\", \"y_predicted\": [\"BookRestaurant\"], \"y_true\": [\"BookRestaurant\"]}, {\"x\": \"rate the book Whit a zero\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Find a show called Time Is Just the Same.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchCreativeWork\"]}, {\"x\": \"I need the weather in Hubbardston, will it be chillier?\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"rate the previous essay four of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"Add wiktor coj to the Sleep playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Rate Dixie Lullaby: A Story of Music, Race and New Beginnings in a New South five out of 6 points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"please put live with me onto my playlist named CARГЃCTER LATINO\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Please add tobymac's song onto the indiespensables playlist.\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Show me the movie schedule for Caribbean Cinemas\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Can you put this song on the metal xplorer playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"Add this tune to my rage radio playlist\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"I want to go see A Troll in Central Park.\", \"y_predicted\": [\"SearchCreativeWork\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give the current series a one.\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like to watch animated movies at National Amusements\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"my jazz for loving couples needs more push the button\", \"y_predicted\": [\"AddToPlaylist\"], \"y_true\": [\"AddToPlaylist\"]}, {\"x\": \"What are the movie schedules for Kerasotes Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"rate the Dry series two out of 6 stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I want a list of showings of Days of Fire at Harkins Theatres\", \"y_predicted\": [\"SearchScreeningEvent\"], \"y_true\": [\"SearchScreeningEvent\"]}, {\"x\": \"Give White House Diary two points\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}, {\"x\": \"I'd like the weather forecast in Gang Mills four years from now.\", \"y_predicted\": [\"GetWeather\"], \"y_true\": [\"GetWeather\"]}, {\"x\": \"Rate Tropic of Capricorn two stars\", \"y_predicted\": [\"RateBook\"], \"y_true\": [\"RateBook\"]}]}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:17:21.399 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 302: [initializing `KerasClassificationModel` from saved]\n", - "2019-02-12 12:17:21.744 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 312: [loading weights from cnn_model_v1.h5]\n", - "2019-02-12 12:17:21.909 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 136: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 100) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 25856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 51456 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 77056 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 235,475\n", - "Trainable params: 233,725\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n" - ] - } - ], - "source": [ - "# we can train and evaluate model from config\n", - "m = train_model(cnn_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:17:21.914 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:17:21.915 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n", - "2019-02-12 12:17:42.89 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 302: [initializing `KerasClassificationModel` from saved]\n", - "2019-02-12 12:17:42.406 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 312: [loading weights from cnn_model_v1.h5]\n", - "2019-02-12 12:17:42.569 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 136: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 100) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 25856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 51456 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 77056 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 235,475\n", - "Trainable params: 233,725\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n" - ] - } - ], - "source": [ - "# or we can just load pre-trained model (conicides with what we did above)\n", - "m = build_model(cnn_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['GetWeather']]" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m([\"Is it freezing in Offerman, California?\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### SklearnComponent classifier on GloVe weighted by TF-IDF embeddings from config" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "logreg_config = {\n", - " \"dataset_reader\": {\n", - " \"class_name\": \"basic_classification_reader\",\n", - " \"x\": \"text\",\n", - " \"y\": \"intents\",\n", - " \"data_path\": \"snips\"\n", - " },\n", - " \"dataset_iterator\": {\n", - " \"class_name\": \"basic_classification_iterator\",\n", - " \"seed\": 42,\n", - " \"split_seed\": 23,\n", - " \"field_to_split\": \"train\",\n", - " \"split_fields\": [\n", - " \"train\",\n", - " \"valid\"\n", - " ],\n", - " \"split_proportions\": [\n", - " 0.9,\n", - " 0.1\n", - " ]\n", - " },\n", - " \"chainer\": {\n", - " \"in\": [\n", - " \"x\"\n", - " ],\n", - " \"in_y\": [\n", - " \"y\"\n", - " ],\n", - " \"pipe\": [\n", - " {\n", - " \"id\": \"classes_vocab\",\n", - " \"class_name\": \"simple_vocab\",\n", - " \"fit_on\": [\n", - " \"y\"\n", - " ],\n", - " \"save_path\": \"./snips/classes.dict\",\n", - " \"load_path\": \"./snips/classes.dict\",\n", - " \"in\": \"y\",\n", - " \"out\": \"y_ids\"\n", - " },\n", - " {\n", - " \"in\": [\n", - " \"x\"\n", - " ],\n", - " \"out\": [\n", - " \"x_vec\"\n", - " ],\n", - " \"fit_on\": [\n", - " \"x\",\n", - " \"y_ids\"\n", - " ],\n", - " \"id\": \"my_tfidf_vectorizer\",\n", - " \"class_name\": \"sklearn_component\",\n", - " \"save_path\": \"tfidf_v2.pkl\",\n", - " \"load_path\": \"tfidf_v2.pkl\",\n", - " \"model_class\": \"sklearn.feature_extraction.text:TfidfVectorizer\",\n", - " \"infer_method\": \"transform\"\n", - " },\n", - " {\n", - " \"in\": \"x\",\n", - " \"out\": \"x_tok\",\n", - " \"id\": \"my_tokenizer\",\n", - " \"class_name\": \"nltk_moses_tokenizer\"\n", - " },\n", - " {\n", - " \"in\": \"x_tok\",\n", - " \"out\": \"x_emb\",\n", - " \"id\": \"my_embedder\",\n", - " \"class_name\": \"glove\",\n", - " \"save_path\": \"./glove.6B.100d.txt\",\n", - " \"load_path\": \"./glove.6B.100d.txt\",\n", - " \"dim\": 100,\n", - " \"pad_zero\": True\n", - " },\n", - " {\n", - " \"class_name\": \"one_hotter\",\n", - " \"id\": \"my_onehotter\",\n", - " \"depth\": \"#classes_vocab.len\",\n", - " \"in\": \"y_ids\",\n", - " \"out\": \"y_onehot\",\n", - " \"single_vector\": True\n", - " },\n", - " {\n", - " \"in\": \"x_tok\",\n", - " \"out\": \"x_weighted_emb\",\n", - " \"class_name\": \"tfidf_weighted\",\n", - " \"id\": \"my_weighted_embedder\",\n", - " \"embedder\": \"#my_embedder\",\n", - " \"tokenizer\": \"#my_tokenizer\",\n", - " \"vectorizer\": \"#my_tfidf_vectorizer\",\n", - " \"mean\": True\n", - " },\n", - " {\n", - " \"in\": [\n", - " \"x_weighted_emb\"\n", - " ],\n", - " \"out\": [\n", - " \"y_pred\"\n", - " ],\n", - " \"fit_on\": [\n", - " \"x_weighted_emb\",\n", - " \"y\"\n", - " ],\n", - " \"class_name\": \"sklearn_component\",\n", - " \"main\": True,\n", - " \"save_path\": \"logreg_v3.pkl\",\n", - " \"load_path\": \"logreg_v3.pkl\",\n", - " \"model_class\": \"sklearn.linear_model:LogisticRegression\",\n", - " \"infer_method\": \"predict\",\n", - " \"ensure_list_output\": True\n", - " }\n", - " ],\n", - " \"out\": [\n", - " \"y_pred\"\n", - " ]\n", - " },\n", - " \"train\": {\n", - " \"epochs\": 10,\n", - " \"batch_size\": 64,\n", - " \"metrics\": [\n", - " \"sets_accuracy\"\n", - " ],\n", - " \"show_examples\": False,\n", - " \"validate_best\": True,\n", - " \"test_best\": False\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:32:01.417 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/valid.csv file\n", - "2019-02-12 12:32:01.417 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 96: Cannot find /home/vimary/ipavlov/Pilot/examples/tutorials/snips/test.csv file\n", - "2019-02-12 12:32:01.418 INFO in 'deeppavlov.dataset_iterators.basic_classification_iterator'['basic_classification_iterator'] at line 73: Splitting field <> to new fields <<['train', 'valid']>>\n", - "2019-02-12 12:32:01.420 WARNING in 'deeppavlov.core.commands.train'['train'] at line 108: \"validate_best\" and \"test_best\" parameters are deprecated. Please, use \"evaluation_targets\" list instead\n", - "2019-02-12 12:32:01.421 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:32:01.439 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:32:01.440 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v2.pkl\n", - "2019-02-12 12:32:01.441 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.feature_extraction.text:TfidfVectorizer from scratch\n", - "2019-02-12 12:32:01.486 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.feature_extraction.text:TfidfVectorizer\n", - "2019-02-12 12:32:01.587 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v2.pkl\n", - "2019-02-12 12:32:01.603 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n", - "2019-02-12 12:32:21.226 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 218: Cannot load model from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:32:21.227 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 165: Initializing model sklearn.linear_model:LogisticRegression from scratch\n", - "2019-02-12 12:32:43.431 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 108: Fitting model sklearn.linear_model:LogisticRegression\n", - "2019-02-12 12:32:45.621 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:32:45.626 WARNING in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 295: Using NNTrainer for a pipeline without batched training\n", - "2019-02-12 12:32:45.626 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 240: Saving model to /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:32:45.658 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:32:45.659 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v2.pkl\n", - "2019-02-12 12:32:45.664 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:32:45.665 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:32:45.666 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n", - "2019-02-12 12:33:05.258 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:33:05.259 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:33:05.259 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:33:07.749 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:33:07.750 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v2.pkl\n", - "2019-02-12 12:33:07.755 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:33:07.755 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:33:07.756 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9283}, \"time_spent\": \"0:00:03\"}}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:33:27.702 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:33:27.702 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:33:27.703 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n" - ] - } - ], - "source": [ - "# we can train and evaluate model from config\n", - "m = train_model(logreg_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-02-12 12:33:27.742 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 103: [loading vocabulary from /home/vimary/ipavlov/Pilot/examples/tutorials/snips/classes.dict]\n", - "2019-02-12 12:33:27.743 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /home/vimary/ipavlov/Pilot/examples/tutorials/tfidf_v2.pkl\n", - "2019-02-12 12:33:27.748 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters\n", - "2019-02-12 12:33:27.749 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n", - "2019-02-12 12:33:27.750 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/home/vimary/ipavlov/Pilot/examples/tutorials/glove.6B.100d.txt`]\n", - "2019-02-12 12:33:47.483 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 202: Loading model sklearn.linear_model:LogisticRegression from /home/vimary/ipavlov/Pilot/examples/tutorials/logreg_v3.pkl\n", - "2019-02-12 12:33:47.484 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 209: Model sklearn.linear_model.logisticLogisticRegression loaded with parameters\n", - "2019-02-12 12:33:47.484 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 215: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter\n" - ] - } - ], - "source": [ - "# or we can just load pre-trained model (conicides with what we did above)\n", - "m = build_model(logreg_config)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['GetWeather']]" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m([\"Is it freezing in Offerman, California?\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [], - "source": [ - "# let's free memory\n", - "del m" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bonus: pre-trained CNN model in DeepPavlov" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download model files (`wiki.en.bin` 8Gb embeddings):" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "! python -m deeppavlov download intents_snips_big" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Evaluate metrics on validation set (no test set provided):" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "! python -m deeppavlov evaluate intents_snips_big" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or one can use model from python code:" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "import deeppavlov\n", - "from deeppavlov import build_model, evaluate_model\n", - "from deeppavlov.download import deep_download\n", - "\n", - "config_path = Path(deeppavlov.__file__).parent.joinpath('configs/classifiers/intents_snips_big.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2018-12-13 18:44:55.284 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 205: Starting new HTTP connection (1): files.deeppavlov.ai:80\n", - "2018-12-13 18:44:55.341 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 393: http://files.deeppavlov.ai:80 \"GET /datasets/snips_intents/train.csv.md5 HTTP/1.1\" 200 44\n", - "2018-12-13 18:44:55.346 INFO in 'deeppavlov.download'['download'] at line 115: Skipped http://files.deeppavlov.ai/datasets/snips_intents/train.csv download because of matching hashes\n", - "2018-12-13 18:44:55.348 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 205: Starting new HTTP connection (1): files.deeppavlov.ai:80\n", - "2018-12-13 18:44:55.540 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 393: http://files.deeppavlov.ai:80 \"GET /deeppavlov_data/classifiers/intents_snips_v10.tar.gz.md5 HTTP/1.1\" 200 193\n", - "2018-12-13 18:44:55.589 INFO in 'deeppavlov.download'['download'] at line 115: Skipped http://files.deeppavlov.ai/deeppavlov_data/classifiers/intents_snips_v10.tar.gz download because of matching hashes\n", - "2018-12-13 18:44:55.593 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 205: Starting new HTTP connection (1): files.deeppavlov.ai:80\n", - "2018-12-13 18:44:55.629 DEBUG in 'urllib3.connectionpool'['connectionpool'] at line 393: http://files.deeppavlov.ai:80 \"GET /deeppavlov_data/embeddings/wiki.en.bin.md5 HTTP/1.1\" 200 46\n", - "2018-12-13 18:45:11.617 INFO in 'deeppavlov.download'['download'] at line 115: Skipped http://files.deeppavlov.ai/deeppavlov_data/embeddings/wiki.en.bin download because of matching hashes\n" - ] - } - ], - "source": [ - "# let's download all the required data - model files, embeddings, vocabularies\n", - "deep_download(config_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2018-12-13 18:45:11.621 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 100: [loading vocabulary from /home/dilyara/.deeppavlov/models/classifiers/intents_snips_v10/classes.dict]\n", - "2018-12-13 18:45:11.632 INFO in 'deeppavlov.models.embedders.fasttext_embedder'['fasttext_embedder'] at line 52: [loading fastText embeddings from `/home/dilyara/.deeppavlov/downloads/embeddings/wiki.en.bin`]\n", - "2018-12-13 18:45:32.229 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 287: [initializing `KerasClassificationModel` from saved]\n", - "2018-12-13 18:45:32.554 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 297: [loading weights from model.h5]\n", - "2018-12-13 18:45:32.772 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 137: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 300) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 230656 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 384256 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 537856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 1,233,875\n", - "Trainable params: 1,232,125\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n" - ] - } - ], - "source": [ - "# now one can initialize model\n", - "m = build_model(config_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['GetWeather']]" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m([\"Is it freezing in Offerman, California?\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [], - "source": [ - "# let's free memory\n", - "del m" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2018-12-13 18:45:33.675 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 97: Cannot find /home/dilyara/.deeppavlov/downloads/snips/valid.csv file\n", - "2018-12-13 18:45:33.675 WARNING in 'deeppavlov.dataset_readers.basic_classification_reader'['basic_classification_reader'] at line 97: Cannot find /home/dilyara/.deeppavlov/downloads/snips/test.csv file\n", - "2018-12-13 18:45:33.676 INFO in 'deeppavlov.dataset_iterators.basic_classification_iterator'['basic_classification_iterator'] at line 73: Splitting field <> to new fields <<['train', 'valid']>>\n", - "2018-12-13 18:45:33.679 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 100: [loading vocabulary from /home/dilyara/.deeppavlov/models/classifiers/intents_snips_v10/classes.dict]\n", - "2018-12-13 18:45:33.680 INFO in 'deeppavlov.models.embedders.fasttext_embedder'['fasttext_embedder'] at line 52: [loading fastText embeddings from `/home/dilyara/.deeppavlov/downloads/embeddings/wiki.en.bin`]\n", - "2018-12-13 18:45:54.568 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 287: [initializing `KerasClassificationModel` from saved]\n", - "2018-12-13 18:45:54.913 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 297: [loading weights from model.h5]\n", - "2018-12-13 18:45:55.112 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 137: Model was successfully initialized!\n", - "Model summary:\n", - "__________________________________________________________________________________________________\n", - "Layer (type) Output Shape Param # Connected to \n", - "==================================================================================================\n", - "input_1 (InputLayer) (None, None, 300) 0 \n", - "__________________________________________________________________________________________________\n", - "conv1d_1 (Conv1D) (None, None, 256) 230656 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_2 (Conv1D) (None, None, 256) 384256 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "conv1d_3 (Conv1D) (None, None, 256) 537856 input_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_1 (BatchNor (None, None, 256) 1024 conv1d_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_2 (BatchNor (None, None, 256) 1024 conv1d_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_3 (BatchNor (None, None, 256) 1024 conv1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_1 (Activation) (None, None, 256) 0 batch_normalization_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_2 (Activation) (None, None, 256) 0 batch_normalization_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_3 (Activation) (None, None, 256) 0 batch_normalization_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_1 (GlobalM (None, 256) 0 activation_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_2 (GlobalM (None, 256) 0 activation_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "global_max_pooling1d_3 (GlobalM (None, 256) 0 activation_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "concatenate_1 (Concatenate) (None, 768) 0 global_max_pooling1d_1[0][0] \n", - " global_max_pooling1d_2[0][0] \n", - " global_max_pooling1d_3[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_1 (Dropout) (None, 768) 0 concatenate_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_1 (Dense) (None, 100) 76900 dropout_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_4 (BatchNor (None, 100) 400 dense_1[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_4 (Activation) (None, 100) 0 batch_normalization_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dropout_2 (Dropout) (None, 100) 0 activation_4[0][0] \n", - "__________________________________________________________________________________________________\n", - "dense_2 (Dense) (None, 7) 707 dropout_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "batch_normalization_5 (BatchNor (None, 7) 28 dense_2[0][0] \n", - "__________________________________________________________________________________________________\n", - "activation_5 (Activation) (None, 7) 0 batch_normalization_5[0][0] \n", - "==================================================================================================\n", - "Total params: 1,233,875\n", - "Trainable params: 1,232,125\n", - "Non-trainable params: 1,750\n", - "__________________________________________________________________________________________________\n", - "2018-12-13 18:45:55.113 INFO in 'deeppavlov.core.commands.train'['train'] at line 207: Testing the best saved model\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"valid\": {\"eval_examples_count\": 1589, \"metrics\": {\"sets_accuracy\": 0.9824, \"f1_macro\": 0.982, \"roc_auc\": 0.9986}, \"time_spent\": \"0:00:01\"}}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'valid': OrderedDict([('sets_accuracy', 0.9824),\n", - " ('f1_macro', 0.982),\n", - " ('roc_auc', 0.9986)])}" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# or one can evaluate model WITHOUT training\n", - "evaluate_model(config_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "accelerator": "GPU", - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/gobot_extended_tutorial.ipynb b/examples/gobot_extended_tutorial.ipynb deleted file mode 100644 index d3173c5ea0..0000000000 --- a/examples/gobot_extended_tutorial.ipynb +++ /dev/null @@ -1,1387 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "K7nBJnADTgUw" - }, - "source": [ - "### You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_extended_tutorial.ipynb)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "iPbAiv8KTgU4" - }, - "source": [ - "# Goal-oriented bot in DeepPavlov" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "us6IsTUETgU5" - }, - "source": [ - "This tutorial describes how to build a goal/task-oriented dialogue system with DeepPavlov framework. It covers the following steps:\n", - "\n", - "0. [Data preparation](#0.-Data-Preparation)\n", - "1. [Build Database of items](#1.-Build-Database-of-items)\n", - "2. [Build Slot Filler](#2.-Build-Slot-Filler)\n", - "3. [Build and Train a Bot](#3.-Build-and-Train-a-Bot)\n", - "4. [Interact with bot](#4.-Interact-with-Bot)\n", - "\n", - "An example of the final model served as a telegram bot:\n", - "\n", - "![gobot_example.png](https://github.com/deepmipt/DeepPavlov/blob/master/examples/img/gobot_example.png?raw=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 806 - }, - "colab_type": "code", - "id": "Vtu-7ns2TgUz", - "outputId": "8cdc252f-1a35-4ed3-bf0a-f54046d8c6a8" - }, - "outputs": [], - "source": [ - "!pip install deeppavlov\n", - "!python -m deeppavlov install gobot_simple_dstc2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4R066YWhTgU6" - }, - "source": [ - "## 0. Data Preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gppbVe-HTgU7" - }, - "source": [ - "In this tutorial we build a chatbot for restaurant booking. To train our chatbot we use [Dialogue State Tracking Challenge 2 (DSTC-2)](http://camdial.org/~mh521/dstc/) dataset. DSTC-2 provides dialogues of a human talking to a booking system labelled with slots and dialogue actions. These labels will be used for training a dialogue policy network.\n", - "\n", - "First of all let's take a quick look at the data for the task. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 137 - }, - "colab_type": "code", - "id": "K9lF3QFJTgU8", - "outputId": "6ab259e2-3f88-4b25-9371-21d3f38fcef3" - }, - "outputs": [], - "source": [ - "from deeppavlov.dataset_readers.dstc2_reader import SimpleDSTC2DatasetReader\n", - "\n", - "data = SimpleDSTC2DatasetReader().read('my_data')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 50 - }, - "colab_type": "code", - "id": "uu56jAGJTgVD", - "outputId": "1536bb2c-6c1f-45a6-c0a7-a92106ed7dfe" - }, - "outputs": [], - "source": [ - "!ls my_data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HmNmE80MTgVG" - }, - "source": [ - "The training/validation/test data are stored in json files (`simple-dstc2-trn.json`, `simple-dstc2-val.json` and `simple-dstc2-tst.json`):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "colab_type": "code", - "id": "LIm9DQyzTgVH", - "outputId": "0a82c3f1-8afb-42d5-e3e3-0e9dd9178a20" - }, - "outputs": [], - "source": [ - "!head -n 101 my_data/simple-dstc2-trn.json" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "zO4CWg0XYNSw" - }, - "source": [ - "To iterate over batches of preprocessed DSTC-2 we need to import `DatasetIterator`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "piBBcw9ZTgVK", - "scrolled": true - }, - "outputs": [], - "source": [ - "from deeppavlov.dataset_iterators.dialog_iterator import DialogDatasetIterator\n", - "\n", - "iterator = DialogDatasetIterator(data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jVU5JGnTTgVM" - }, - "source": [ - "You can now iterate over batches of preprocessed DSTC-2 dialogs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "colab_type": "code", - "id": "1RSwEH3CTgVN", - "outputId": "b2a0ecdb-89d1-4784-eeb9-749f7b754ff6" - }, - "outputs": [], - "source": [ - "from pprint import pprint\n", - "\n", - "for dialog in iterator.gen_batches(batch_size=1, data_type='train'):\n", - " turns_x, turns_y = dialog\n", - " \n", - " print(\"User utterances:\\n----------------\\n\")\n", - " pprint(turns_x[0], indent=4)\n", - " print(\"\\nSystem responses:\\n-----------------\\n\")\n", - " pprint(turns_y[0], indent=4)\n", - " \n", - " break" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "AKTZWtm8ZtPi" - }, - "source": [ - "In real-life annotation of data is expensive. To make our tutorial closer to production use-cases we take only 50 dialogues for training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "UlappYTbTgVT" - }, - "outputs": [], - "source": [ - "!cp my_data/simple-dstc2-trn.json my_data/simple-dstc2-trn.full.json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 33 - }, - "colab_type": "code", - "id": "tTU9yM-CTgVX", - "outputId": "1568aaed-7f8e-4f77-a637-cda5a9556740" - }, - "outputs": [], - "source": [ - "import json\n", - "\n", - "NUM_TRAIN = 50\n", - "\n", - "with open('my_data/simple-dstc2-trn.full.json', 'rt') as fin:\n", - " data = json.load(fin)\n", - "with open('my_data/simple-dstc2-trn.json', 'wt') as fout:\n", - " json.dump(data[:NUM_TRAIN], fout, indent=2)\n", - "print(f\"Train set is reduced to {NUM_TRAIN} dialogues (out of {len(data)}).\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "l5mjRphbTgVb" - }, - "source": [ - "## 1. Build Database of items" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "n597CLhqjqcd" - }, - "source": [ - "### Building database of restaurants" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "nJFkgfjTTgVf" - }, - "source": [ - "To assist with restaurant booking the chatbot should have access to a `database` of restaurants. The `database` contains task-specific information such as type of food, price range, location, etc.\n", - "\n", - " >> database([{'pricerange': 'cheap', 'area': 'south'}])\n", - " \n", - " Out[1]: \n", - " [[{'name': 'the lucky star',\n", - " 'food': 'chinese',\n", - " 'pricerange': 'cheap',\n", - " 'area': 'south',\n", - " 'addr': 'cambridge leisure park clifton way cherry hinton',\n", - " 'phone': '01223 244277',\n", - " 'postcode': 'c.b 1, 7 d.y'},\n", - " {'name': 'nandos',\n", - " 'food': 'portuguese',\n", - " 'pricerange': 'cheap',\n", - " 'area': 'south',\n", - " 'addr': 'cambridge leisure park clifton way',\n", - " 'phone': '01223 327908',\n", - " 'postcode': 'c.b 1, 7 d.y'}]]\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rNpewHp-TgVd" - }, - "source": [ - " \n", - "![gobot_database.png](https://github.com/deepmipt/DeepPavlov/blob/master/examples/img/gobot_database.png?raw=1)\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-TU-NLnNa9tk" - }, - "source": [ - "The chatbot should be trained to make api calls. For this, training dataset contains a `\"db_result\"` dictionary key. It annotates turns where system performs an api call to the database of items. Rusulting value is stored in `\"db_result\"`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "EVNRZmeiTgVh", - "outputId": "edba5e2b-235f-423f-8bfa-8d02506c4c7e" - }, - "outputs": [], - "source": [ - "!head -n 78 my_data/simple-dstc2-trn.json | tail +51" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "GT4YBHMnl0Xd" - }, - "source": [ - "Set `primary_keys` to a list of slot names that have unique values for different items (common SQL term). For the case of DSTC-2, the primary slot is a restaurant name." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "JjKbIAyaTgVk", - "outputId": "07620401-80f5-490a-cff2-5d5f013a365b" - }, - "outputs": [], - "source": [ - "from deeppavlov.core.data.sqlite_database import Sqlite3Database\n", - "\n", - "database = Sqlite3Database(primary_keys=[\"name\"],\n", - " save_path=\"my_bot/db.sqlite\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "a2e1u-z0TgVo" - }, - "source": [ - "\n", - "Let's find all `\"db_result\"` api call results and add them to our database of restaurants:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "RlKg5UtqTgVp", - "outputId": "a387df1f-4418-498b-a125-9e351a8e0cf9" - }, - "outputs": [], - "source": [ - "db_results = []\n", - "\n", - "for dialog in iterator.gen_batches(batch_size=1, data_type='all'):\n", - " turns_x, turns_y = dialog\n", - " db_results.extend(x['db_result'] for x in turns_x[0] if x.get('db_result'))\n", - "\n", - "print(f\"Adding {len(db_results)} items.\")\n", - "if db_results:\n", - " database.fit(db_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XeJMI9qaTgVt" - }, - "source": [ - "### Interacting with database" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2JLUF2b_TgVu" - }, - "source": [ - "We can now play with the database and make requests to it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "VRCU_MJnTgVv", - "outputId": "017803c4-36ab-49bc-ae40-7df87356f5c2" - }, - "outputs": [], - "source": [ - "database([{'pricerange': 'cheap', 'area': 'south'}])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "U2wOAIlpTgV1", - "outputId": "e83e53b9-3431-4d1c-9bed-0e841d2b6fc4" - }, - "outputs": [], - "source": [ - "!ls my_bot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "mBoO34NzTgV4" - }, - "source": [ - "## 2. Build Slot Filler" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "TGlJRwTCYkiQ" - }, - "source": [ - "`Slot Filler` is a component that finds slot values in user input:\n", - "\n", - " >> slot_filler(['I would like some chineese food'])\n", - " \n", - " Out[1]: [{'food': 'chinese'}]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "5RqXeLdTTgV4" - }, - "source": [ - " \n", - "![gobot_slotfiller.png](https://github.com/deepmipt/DeepPavlov/blob/master/examples/img/gobot_slotfiller.png?raw=1)\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "TcJGPFq4TgV5" - }, - "source": [ - "To implement a `Slot Filler` you need to provide\n", - " \n", - " - **slot types**,\n", - " - all possible **slot values**,\n", - " - also, it is good to have examples of mentions for every value of each slot.\n", - " \n", - "In this tutorial, a schema for `slot types` and `slot values` should be defined in `slot_vals.json` with the following format:\n", - "\n", - " {\n", - " 'food': {\n", - " 'chinese': ['chinese', 'chineese', 'chines'],\n", - " 'french': ['french', 'freench'],\n", - " 'dontcare': ['any food', 'any type of food']\n", - " }\n", - " }\n", - " \n", - "\n", - "Let's use a simple non-trainable slot filler that relies on Levenshtein distance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "zVi5XynnTgV6", - "outputId": "e9d68c8c-3bbb-4f80-98a5-92cbfe0eb5ac" - }, - "outputs": [], - "source": [ - "from deeppavlov.download import download_decompress\n", - "\n", - "download_decompress(url='http://files.deeppavlov.ai/deeppavlov_data/dstc_slot_vals.tar.gz',\n", - " download_path='my_bot/slotfill')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "NR1S3PXCTgV9", - "outputId": "013e9dba-427c-4255-aad5-0627477157e8" - }, - "outputs": [], - "source": [ - "!ls my_bot/slotfill" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-OZ9TqDKZ6Fv" - }, - "source": [ - "Print some `slot types` and `slot values`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "KqgfYr4RTgWE", - "outputId": "a6830aa3-0bcc-4011-a4ab-5b5e48e6a20f" - }, - "outputs": [], - "source": [ - "!head -n 10 my_bot/slotfill/dstc_slot_vals.json" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "eIufDAvATgWN" - }, - "source": [ - "Check performance of our slot filler on DSTC-2 dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "XUSj5R3uTgWP" - }, - "outputs": [], - "source": [ - "from deeppavlov import configs\n", - "from deeppavlov.core.common.file import read_json\n", - "\n", - "slotfill_config = read_json(configs.ner.slotfill_simple_dstc2_raw)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "pFda6_LBTgWT" - }, - "source": [ - "We take [original DSTC2 slot-filling config](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/ner/slotfill_dstc2_raw.json) from DeepPavlov and change variables determining data paths:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yr8MbFLwTgWV" - }, - "outputs": [], - "source": [ - "slotfill_config['metadata']['variables']['DATA_PATH'] = 'my_data'\n", - "slotfill_config['metadata']['variables']['SLOT_VALS_PATH'] = 'my_bot/slotfill/dstc_slot_vals.json'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ZxMTySrpaZVP" - }, - "source": [ - "Run evaluation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "CdrDW4bVTgWZ", - "outputId": "ac56ae74-b368-437e-c70f-01b418ba883f" - }, - "outputs": [], - "source": [ - "from deeppavlov import evaluate_model\n", - "\n", - "slotfill = evaluate_model(slotfill_config);" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "azulujiLTgWb" - }, - "source": [ - "We've got slot accuracy of **93% on valid** set and **95% on test** set." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "FkZvQ-yNig1u" - }, - "source": [ - "Building `Slot Filler` model from DeepPavlov config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "uWeXTtVhTgWc" - }, - "outputs": [], - "source": [ - "from deeppavlov import build_model\n", - "\n", - "slotfill = build_model(slotfill_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ihi4lpXUi-_V" - }, - "source": [ - "Testing the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "bMRSU_bnTgWf", - "outputId": "d224e4be-1537-428d-ff67-55076224946d" - }, - "outputs": [], - "source": [ - "slotfill(['i want cheap chinee food'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "U2PUxB5fTgWl" - }, - "source": [ - "Saving slotfill config file to disk (we will require it's path later)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "5MyFaEM7TgWl" - }, - "outputs": [], - "source": [ - "import json\n", - "\n", - "json.dump(slotfill_config, open('my_bot/slotfill_config.json', 'wt'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "colab_type": "code", - "id": "_ZlRvicuTgWo", - "outputId": "4f1c3d46-d3b1-4923-823e-e2df1027fc6f" - }, - "outputs": [], - "source": [ - "!ls my_bot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "E_InRKO6TgWt" - }, - "source": [ - "## 3. Build and Train a Bot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ySe2m9-5m6iW" - }, - "source": [ - "### Dialogue policy and response templates" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qjwbkeDl3TBg" - }, - "source": [ - "A policy module of the bot decides what action should be taken in the current dialogue state. The policy in our bot is implemented as a recurrent neural network (recurrency over user utterances) followed by a dense layer with softmax function on top. The network classifies user input into one of predefined system actions. Examples of possible actions are to say hello, to request user's location or to make api call to a database. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wLE1iebG3WJc" - }, - "source": [ - "![gobot_policy.png](https://github.com/deepmipt/DeepPavlov/blob/master/examples/img/gobot_policy.png?raw=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ghF-W56m3iW-" - }, - "source": [ - "All actions available for the system should be listed in a `simple-dstc2-templates.txt` file. Also, every action should be associated with a template string of the corresponding system response." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "TjDnGyiN3nIr" - }, - "source": [ - "![gobot_templates.png](https://github.com/deepmipt/DeepPavlov/blob/master/examples/img/gobot_templates.png?raw=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-xqGKtXBTgWu" - }, - "source": [ - "Templates for responses should be in the format `TAB