Skip to content

Commit

Permalink
fix: Fix anti-affinity rules not evaluating a new and different node …
Browse files Browse the repository at this point in the history
…correctly.

Fixes: #67
Fixes: #71
  • Loading branch information
gyptazy committed Aug 31, 2024
1 parent 7ddb7ca commit 5ca172b
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .changelogs/1.0.3/67_fix_anti_affinity_rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fixed:
- Fix anti-affinity rules not evaluating a new and different node. [#67]
2 changes: 2 additions & 0 deletions .changelogs/1.0.3/71_fix_ignore_vm_node_handling_if_unset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fixed:
- Fix handling of unset `ignore_nodes` and `ignore_vms` resulted in an attribute error. [#71]
93 changes: 62 additions & 31 deletions proxlb
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ def initialize_config_options(config_path):
proxlb_config['vm_balancing_type'] = config['vm_balancing'].get('type', 'vm')
proxlb_config['vm_balanciness'] = config['vm_balancing'].get('balanciness', 10)
proxlb_config['vm_parallel_migrations'] = config['vm_balancing'].get('parallel_migrations', 1)
proxlb_config['vm_ignore_nodes'] = config['vm_balancing'].get('ignore_nodes', None)
proxlb_config['vm_ignore_vms'] = config['vm_balancing'].get('ignore_vms', None)
proxlb_config['vm_ignore_nodes'] = config['vm_balancing'].get('ignore_nodes', '')
proxlb_config['vm_ignore_vms'] = config['vm_balancing'].get('ignore_vms', '')
# Storage Balancing
proxlb_config['storage_balancing_enable'] = config['storage_balancing'].get('enable', 0)
proxlb_config['storage_balancing_method'] = config['storage_balancing'].get('method', 'disk_space')
Expand Down Expand Up @@ -504,7 +504,7 @@ def get_vm_statistics(api_object, ignore_vms, balancing_type):
info_prefix = 'Info: [vm-statistics]:'
warn_prefix = 'Warn: [vm-statistics]:'
vm_statistics = {}
ignore_vms_list = ignore_vms.split(',')
ignore_vms_list = ignore_vms.split(',')
group_include = None
group_exclude = None
vm_ignore = None
Expand Down Expand Up @@ -751,7 +751,10 @@ def __get_vm_tags(api_object, node, vmid, balancing_type):
if balancing_type == 'ct':
vm_config = api_object.nodes(node['node']).lxc(vmid).config.get()

logging.info(f'{info_prefix} Got VM/CT tag from API.')
if vm_config.get("tags", None) is None:
logging.info(f'{info_prefix} Got no VM/CT tag for VM {vm_config.get("name", None)} from API.')
else:
logging.info(f'{info_prefix} Got VM/CT tag {vm_config.get("tags", None)} for VM {vm_config.get("name", None)} from API.')
return vm_config.get('tags', None)


Expand All @@ -769,8 +772,16 @@ def __get_proxlb_groups(vm_tags):
logging.info(f'{info_prefix} Got PLB include group.')
group_include = group

if group.startswith('plb_exclude_'):
if group.startswith('plb_affinity_'):
logging.info(f'{info_prefix} Got PLB include group.')
group_include = group

if group.startswith('plb_exclude_'):
logging.info(f'{info_prefix} Got PLB exclude group.')
group_exclude = group

if group.startswith('plb_antiaffinity_'):
logging.info(f'{info_prefix} Got PLB exclude group.')
group_exclude = group

if group.startswith('plb_ignore_vm'):
Expand Down Expand Up @@ -1005,43 +1016,63 @@ def __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_metho

def __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method, balancing_mode):
""" Get VMs tags for exclude groups. """
info_prefix = 'Info: [rebalancing-tags-group-exclude]:'
info_prefix = 'Info: [rebalancing-tags-group-exclude]:'
tags_exclude_vms = {}
processed_vm = []

# Create groups of tags with belongings hosts.
for vm_name, vm_values in vm_statistics.items():
if vm_values.get('group_include', None):
if not tags_exclude_vms.get(vm_values['group_include'], None):
tags_exclude_vms[vm_values['group_include']] = [vm_name]
if vm_values.get('group_exclude', None):
if not tags_exclude_vms.get(vm_values['group_exclude'], None):
tags_exclude_vms[vm_values['group_exclude']] = {}
tags_exclude_vms[vm_values['group_exclude']]['nodes_used'] = []
tags_exclude_vms[vm_values['group_exclude']]['nodes_used'].append(vm_statistics[vm_name]['node_rebalance'])
tags_exclude_vms[vm_values['group_exclude']]['vms'] = [vm_name]
else:
tags_exclude_vms[vm_values['group_include']] = tags_exclude_vms[vm_values['group_include']] + [vm_name]
tags_exclude_vms[vm_values['group_exclude']]['vms'] = tags_exclude_vms[vm_values['group_exclude']]['vms'] + [vm_name]
tags_exclude_vms[vm_values['group_exclude']]['nodes_used'].append(vm_statistics[vm_name]['node_rebalance'])

# Update the VMs to the corresponding node to their group assignments.
for group, vm_names in tags_exclude_vms.items():
# Do not take care of tags that have only a single host included.
if len(vm_names) < 2:
logging.info(f'{info_prefix} Only one host in group assignment.')
return node_statistics, vm_statistics
# Evaluate all VMs assigned for each exclude groups and validate that they will be moved to another random node.
# However, if there are still more VMs than exclude nodes we need to deal with it.
for exclude_group, group_values in tags_exclude_vms.items():
for vm in group_values['vms']:
if vm_statistics[vm]['node_rebalance'] in group_values['nodes_used']:

vm_node_rebalance = False
logging.info(f'{info_prefix} Create exclude groups of VM hosts.')
for vm_name in vm_names:
if vm_name not in processed_vm:
if not vm_node_rebalance:
random_node = vm_statistics[vm_name]['node_parent']
# Get a random node and make sure that it is not by accident the
# currently assigned one.
while random_node == vm_statistics[vm_name]['node_parent']:
random_node = random.choice(list(node_statistics.keys()))
else:
_mocked_vm_object = (vm_name, vm_statistics[vm_name])
node_statistics, vm_statistics = __update_vm_resource_statistics(_mocked_vm_object, [random_node], vm_statistics, node_statistics, balancing_method, balancing_mode)
processed_vm.append(vm_name)
logging.info(f'{info_prefix} Rebalancing of VM {vm} is needed due to anti-affinity group policy.')
counter = 0
proceed = True

while proceed:
random_node, counter = __get_random_node(counter, node_statistics)
if random_node not in group_values['nodes_used']:
group_values['nodes_used'].append(random_node)
proceed = False

if random_node:
logging.info(f'{info_prefix} Rebalancing VM {vm} to node {random_node} due to anti-affinity group policy.')
vm_statistics[vm_name]['node_rebalance'] = random_node

else:
logging.info(f'{info_prefix} No rebalancing for VM {vm} needed due to any anti-affinity group policies.')

return node_statistics, vm_statistics


def __get_random_node(counter, node_statistics):
""" Get a random node within the Proxmox cluster. """
warning_prefix = 'Warning: [random-node-getter]:'
info_prefix = 'Info: [random-node-getter]:'

counter = counter + 1
random_node = None
if counter < 30:
random_node = random.choice(list(node_statistics.keys()))
logging.info(f'{info_prefix} New random node {random_node} evaluated in run {counter}.')
return random_node, counter
else:
logging.warning(f'{warning_prefix} Reached limit for random node evaluation. Unable to find a suitable new node.')
return random_node, counter


def __wait_job_finalized(api_object, node_name, job_id, counter):
""" Wait for a job to be finalized. """
error_prefix = 'Error: [job-status-getter]:'
Expand Down

0 comments on commit 5ca172b

Please sign in to comment.