This repository has been archived by the owner on Dec 16, 2024. It is now read-only.
forked from philschmid/terraform-aws-sagemaker-huggingface
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvariables.tf
136 lines (113 loc) · 5.33 KB
/
variables.tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# ------------------------------------------------------------------------------
# Variables
# ------------------------------------------------------------------------------
variable "name_prefix" {
description = "A prefix used for naming resources."
type = string
}
variable "transformers_version" {
description = "Transformers version you want to use for executing your model training code. Defaults to None. [List of supported versions](https://huggingface.co/docs/sagemaker/reference#inference-dlc-overview)"
type = string
}
variable "pytorch_version" {
description = "PyTorch version you want to use for executing your inference code. Defaults to `None`. Required unless `tensorflow_version` is provided. [List of supported versions](https://huggingface.co/docs/sagemaker/reference#inference-dlc-overview)"
type = string
default = null
}
variable "tensorflow_version" {
description = "TensorFlow version you want to use for executing your inference code. Defaults to `None`. Required unless `pytorch_version` is provided. [List of supported versions](https://huggingface.co/docs/sagemaker/reference#inference-dlc-overview)"
type = string
default = null
}
variable "image_tag" {
description = "The image tag you want to use for the container you want to use. Defaults to `None`. The module tries to derive the `image_tag` from the `pytorch_version`, `tensorflow_version` & `instance_type`. If you want to override this, you can provide the `image_tag` as a variable."
type = string
default = null
}
variable "instance_type" {
description = "The EC2 instance type to deploy this Model to. For example, `ml.p2.xlarge`."
type = string
default = null
}
variable "instance_count" {
description = "The initial number of instances to run in the Endpoint created from this Model. Defaults to 1."
type = number
default = 1
}
variable "hf_model_id" {
description = "The HF_MODEL_ID environment variable defines the model id, which will be automatically loaded from [hf.co/models](https://huggingface.co/models) when creating or SageMaker Endpoint."
type = string
default = null
}
variable "hf_task" {
description = "The HF_TASK environment variable defines the task for the used 🤗 Transformers pipeline. A full list of tasks can be find [here](https://huggingface.co/transformers/main_classes/pipelines.html)."
type = string
}
variable "hf_api_token" {
description = "The HF_API_TOKEN environment variable defines the your Hugging Face authorization token. The HF_API_TOKEN is used as a HTTP bearer authorization for remote files, like private models. You can find your token at your settings page."
type = string
default = null
}
variable "hf_model_revision" {
description = "The HF_MODEL_REVISION is an extension to HF_MODEL_ID and allows you to define/pin a revision of the model to make sure you always load the same model on your SageMaker Endpoint."
type = string
default = null
}
variable "model_data" {
description = "The S3 location of a SageMaker model data .tar.gz file (default: None). Not needed when using `hf_model_id`."
type = string
default = null
}
variable "sagemaker_execution_role" {
description = "An AWS IAM role Name to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role if it needs to access some AWS resources. If not specified, the role will created with with the `CreateModel` permissions from the [documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html#sagemaker-roles-createmodel-perms)"
type = string
default = null
}
variable "autoscaling" {
description = "A Object which defines the autoscaling target and policy for our SageMaker Endpoint. Required keys are `max_capacity` and `scaling_target_invocations` "
type = object({
min_capacity = optional(number),
max_capacity = number,
scaling_target_invocations = optional(number),
scale_in_cooldown = optional(number),
scale_out_cooldown = optional(number),
})
default = {
min_capacity = 1
max_capacity = null
scaling_target_invocations = null
scale_in_cooldown = 300
scale_out_cooldown = 66
}
}
variable "async_config" {
description = "(Optional) Specifies configuration for how an endpoint performs asynchronous inference. Required key is `s3_output_path`, which is the s3 bucket used for async inference."
type = object({
s3_output_path = string,
kms_key_id = optional(string),
sns_error_topic = optional(string),
sns_success_topic = optional(string),
})
default = {
s3_output_path = null
kms_key_id = null
sns_error_topic = null
sns_success_topic = null
}
}
variable "serverless_config" {
description = "(Optional) Specifies configuration for how an endpoint performs serverless inference. Required keys are `max_concurrency` and `memory_size_in_mb`"
type = object({
max_concurrency = number,
memory_size_in_mb = number
})
default = {
max_concurrency = null
memory_size_in_mb = null
}
}
variable "tags" {
description = "A map of tags (key-value pairs) passed to resources."
type = map(string)
default = {}
}