Skip to content

Commit

Permalink
Update spark app id naming convention and limit length (#125)
Browse files Browse the repository at this point in the history
* Update spark app id naming convention and limit length

* Double underscore and mod timestamp
  • Loading branch information
edingroot authored Aug 17, 2023
1 parent 457b5d1 commit ebaf3bf
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
9 changes: 7 additions & 2 deletions service_configuration_lib/spark_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,12 +1089,17 @@ def get_spark_conf(
# We want to make the app name more unique so that we can search it
# from history server.
app_name = f'{app_base_name}_{ui_port}_{int(time.time())}'
is_jupyter = _is_jupyterhub_job(app_name)

# Explicitly setting app id: replace special characters to '_' to make it consistent
# in all places for metric systems:
# - since in the Promehteus metrics endpoint those will be converted to '_'
# - while the 'spark-app-selector' executor pod label will keep the original app id
app_id = re.sub(r'[\.,-]', '_', app_name)
if is_jupyter:
raw_app_id = app_name
else:
raw_app_id = f'{paasta_service}__{paasta_instance}__{int(time.time()) % 10000}'
app_id = re.sub(r'[\.,-]', '_', _get_k8s_resource_name_limit_size_with_hash(raw_app_id))

spark_conf.update({
'spark.app.name': app_name,
Expand Down Expand Up @@ -1149,7 +1154,7 @@ def get_spark_conf(
self.update_spark_srv_configs(spark_conf)

# configure spark Console Progress
if _is_jupyterhub_job(spark_conf.get('spark.app.name', '')):
if is_jupyter:
spark_conf = _append_spark_config(spark_conf, 'spark.ui.showConsoleProgress', 'true')

spark_conf = _append_aws_credentials_conf(spark_conf, *aws_creds, aws_region)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='service-configuration-lib',
version='2.18.4',
version='2.18.5',
provides=['service_configuration_lib'],
description='Start, stop, and inspect Yelp SOA services',
url='https://github.com/Yelp/service_configuration_lib',
Expand Down
14 changes: 13 additions & 1 deletion tests/spark_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,7 +1143,19 @@ def verify(output):
def assert_app_id(self):
def verify(output):
key = 'spark.app.id'
assert output[key] == re.sub(r'[\.,-]', '_', output['spark.app.name'])
app_name = output['spark.app.name']
is_jupyter = 'jupyterhub' in app_name
paasta_service = output['spark.executorEnv.PAASTA_SERVICE']
paasta_instance = output['spark.executorEnv.PAASTA_INSTANCE']

if is_jupyter:
raw_app_id_prefix = app_name
else:
raw_app_id_prefix = f'{paasta_service}__{paasta_instance}__'
app_id_prefix = re.sub(r'[\.,-]', '_', raw_app_id_prefix)
output_app_id = output[key]
assert output_app_id.startswith(app_id_prefix)
assert len(output_app_id) <= 63
return [key]
return verify

Expand Down

0 comments on commit ebaf3bf

Please sign in to comment.