diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py index c8c17d77ab..f2d244a0e3 100644 --- a/python/mlc_llm/model/model_preset.py +++ b/python/mlc_llm/model/model_preset.py @@ -970,9 +970,10 @@ # "use_cache": True, # "vocab_size": 128256, # }, - "bert": { + "snowflake-arctic-embed-m": { "architectures": ["BertModel"], "attention_probs_dropout_prob": 0.1, + "classifier_dropout": None, "gradient_checkpointing": False, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, @@ -986,10 +987,34 @@ "num_hidden_layers": 12, "pad_token_id": 0, "position_embedding_type": "absolute", - "transformers_version": "4.6.0.dev0", + "torch_dtype": "float32", + "transformers_version": "4.36.1", "type_vocab_size": 2, + "use_cache": True, "vocab_size": 30522, }, + # "snowflake-arctic-embed-s": { + # "architectures": ["BertModel"], + # "attention_probs_dropout_prob": 0.1, + # "classifier_dropout": None, + # "hidden_act": "gelu", + # "hidden_dropout_prob": 0.1, + # "hidden_size": 384, + # "initializer_range": 0.02, + # "intermediate_size": 1536, + # "layer_norm_eps": 1e-12, + # "max_position_embeddings": 512, + # "model_type": "bert", + # "num_attention_heads": 12, + # "num_hidden_layers": 12, + # "pad_token_id": 0, + # "position_embedding_type": "absolute", + # "torch_dtype": "float32", + # "transformers_version": "4.36.1", + # "type_vocab_size": 2, + # "use_cache": True, + # "vocab_size": 30522, + # }, "stablelm-2-zephyr-1_6b": { "architectures": ["StableLmForCausalLM"], "bos_token_id": 100257,