Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[azure] support for existing subnet #4417

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,21 @@ Available fields and semantics:
# Reference: https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview
storage_account: user-storage-account-name

# Specify subnet_id to use for instances (optional).
# SkyPilot created new vnet and subnet by default but it will reuse exisiting subnet if declared.
subnet_id: /subscriptions/<subscription-id>/resourceGroups/<resource-group-name>/providers/Microsoft.Network/virtualNetworks/<vnet-name>/subnets/<subnet-name>

# Should instances be assigned private IPs only? (optional)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use declarative?

#
# Set to true to use private IPs to communicate between the local client and
# any SkyPilot nodes. This requires the networking stack be properly set up.
#
# When set to true, SkyPilot will only use private subnets to launch nodes and won't expose
# instances on public IP addresses.
# Reference: https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-manage-subnet?tabs=azure-portal
# Default: false.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe move this right after the (optional) line?

use_internal_ips: true

# Advanced Kubernetes configurations (optional).
kubernetes:
# The networking mode for accessing SSH jump pod (optional).
Expand Down
10 changes: 10 additions & 0 deletions sky/clouds/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,14 @@ def make_deploy_resources_variables(
if resource_group_name is None:
resource_group_name = f'{cluster_name.name_on_cloud}-{region_name}'

# Determine subnet_id if configured
subnet_id = skypilot_config.get_nested(('azure', 'subnet_id'), None)

# Determine if internal IPs should be used
use_internal_ips = skypilot_config.get_nested(
('azure', 'use_internal_ips'), False)


# Setup commands to eliminate the banner and restart sshd.
# This script will modify /etc/ssh/sshd_config and add a bash script
# into .bashrc. The bash script will restart sshd if it has not been
Expand Down Expand Up @@ -423,6 +431,8 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]:
'azure_subscription_id': self.get_project_id(dryrun),
'resource_group': resource_group_name,
'use_external_resource_group': use_external_resource_group,
'subnet_id': subnet_id,
'use_internal_ips': use_internal_ips,
}

# Setting disk performance tier for high disk tier.
Expand Down
8 changes: 8 additions & 0 deletions sky/provision/azure/azure-config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
"metadata": {
"description": "Name of the Network Security Group associated with the SkyPilot cluster."
}
},
"existingSubnet": {
"type": "string",
"defaultValue": "",
"metadata": {
"description": "Existing subnet id to use."
}
}
},
"variables": {
Expand Down Expand Up @@ -86,6 +93,7 @@
"apiVersion": "2019-11-01",
"name": "[variables('vnetName')]",
"location": "[variables('location')]",
"condition": "[empty(parameters('existingSubnet'))]",
"properties": {
"addressSpace": {
"addressPrefixes": [
Expand Down
22 changes: 19 additions & 3 deletions sky/provision/azure/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def bootstrap_instances(
'use_external_resource_group field')
use_external_resource_group = provider_config['use_external_resource_group']

# TODO: test this
subnet_id = provider_config.get('subnet_id', None)

if 'tags' in provider_config:
params['tags'] = provider_config['tags']

Expand Down Expand Up @@ -142,12 +145,13 @@ def bootstrap_instances(
cluster_id, nsg_name = get_cluster_id_and_nsg_name(
resource_group=provider_config['resource_group'],
cluster_name_on_cloud=cluster_name_on_cloud)

# subnet_mask is ignored if subnet_id (of existing subnet) is provided
subnet_mask = provider_config.get('subnet_mask')
if subnet_mask is None:
# choose a random subnet, skipping most common value of 0
random.seed(cluster_id)
subnet_mask = f'10.{random.randint(1, 254)}.0.0/16'
logger.info(f'Using subnet mask: {subnet_mask}')

parameters = {
'properties': {
Expand All @@ -165,11 +169,23 @@ def bootstrap_instances(
},
'location': {
'value': params['location']
}
},
'existingSubnet': {
'value': subnet_id
},
},
}
}

if subnet_id:
# add existingSubnet to paremeters if set
parameters['properties']['parameters']['existingSubnet'] = {
'value': subnet_id
}
else:
# informa about using new subnet mask
logger.info(f'Using mask for new subnet: {subnet_mask}')

# Skip creating or updating the deployment if the deployment already exists
# and the cluster name is the same.
get_deployment = get_azure_sdk_function(client=resource_client.deployments,
Expand Down Expand Up @@ -215,6 +231,6 @@ def bootstrap_instances(
# append output resource ids to be used with vm creation
provider_config['msi'] = outputs['msi']['value']
provider_config['nsg'] = outputs['nsg']['value']
provider_config['subnet'] = outputs['subnet']['value']
provider_config['subnet'] = outputs['subnet']['value'] if subnet_id == '' else subnet_id

return config
4 changes: 4 additions & 0 deletions sky/templates/azure-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ provider:
# leakage.
disable_launch_config_check: true

{%- if subnet_id is not none %}
subnet_id: {{subnet_id}}
{%- endif %}
use_internal_ips: {{use_internal_ips}}

auth:
ssh_user: azureuser
Expand Down
6 changes: 6 additions & 0 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,12 @@ def get_config_schema():
'resource_group_vm': {
'type': 'string',
},
'subnet_id': {
'type': 'string',
},
'use_internal_ips': {
'type': 'boolean',
},
}
},
'kubernetes': {
Expand Down
Loading