Skip to content

Commit

Permalink
new: usr: JUPY-567, SDK-394: Add SDK bindings for JupyterNotebookComm…
Browse files Browse the repository at this point in the history
…and (#304)

* changes

* changes

* changes

* add tests

* fix tests

* changes to make scheduler create from command line work

* Remove non ASCII characters

Co-Authored-By: Joy Lal Chattaraj <[email protected]>

* Remove redundant parenthesis

Co-Authored-By: Joy Lal Chattaraj <[email protected]>

* Update qds_sdk/commands.py

Co-Authored-By: Joy Lal Chattaraj <[email protected]>

* changes

* changes

* add support for macros

* add name option

* add support for tags

* add support for notify and timeout

* update tests

* add support for pool

* update tests

* add support for retry and retry_delay

* update tests

* add support for jupyter_notebook_id

* update tests

* replace notebook id with uuid

* remove support for uuid

* seperate validate json input method

* fix style

* some more style fixes

* changes

Co-authored-by: Gavara Tarun <[email protected]>
Co-authored-by: Joy Lal Chattaraj <[email protected]>
  • Loading branch information
3 people committed Mar 18, 2020
1 parent b44b2a8 commit 9495e28
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 3 deletions.
5 changes: 3 additions & 2 deletions bin/qds.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
"shellcmd": ShellCommand,
"dbexportcmd": DbExportCommand,
"dbimportcmd": DbImportCommand,
"prestocmd": PrestoCommand
"prestocmd": PrestoCommand,
"jupyternotebookcmd": JupyterNotebookCommand
}

SensorClasses = {
Expand All @@ -46,7 +47,7 @@
usage_str = (
"Usage: qds.py [options] <subcommand>\n"
"\nCommand subcommands:\n"
" <hivecmd|hadoopcmd|prestocmd|pigcmd|shellcmd|dbexportcmd|dbimportcmd|dbtapquerycmd|sparkcmd> <action>\n"
" <hivecmd|hadoopcmd|prestocmd|pigcmd|shellcmd|dbexportcmd|dbimportcmd|dbtapquerycmd|sparkcmd|jupyternotebookcmd> <action>\n"
" submit [cmd-specific-args .. ] : submit cmd & print id\n"
" run [cmd-specific-args .. ] : submit cmd & wait. print results\n"
" check <id> <include-query-properties> : id -> print the cmd object for this id\n"
Expand Down
86 changes: 86 additions & 0 deletions qds_sdk/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,82 @@ def parse(cls, args):
v["command_type"] = "DbTapQueryCommand"
return v


class JupyterNotebookCommand(Command):
usage = "jupyternotebookcmd <submit|run> [options]"

optparser = GentleOptionParser(usage=usage)
optparser.add_option("--path", dest="path",
help="Path including name of the Jupyter notebook to \
be run with extension.")
optparser.add_option("--cluster-label", dest="label",
help="Label of the cluster on which the this command \
should be run. If this parameter is not specified \
then label = 'default' is used.")
optparser.add_option("--arguments", dest="arguments",
help="Valid JSON to be sent to the notebook. Specify \
the parameters in notebooks and pass the parameter value \
using the JSON format. key is the parameter's name and \
value is the parameter's value. Supported types in \
parameters are string, integer, float and boolean.")
optparser.add_option("--macros", dest="macros",
help="expressions to expand macros used in query")
optparser.add_option("--name", dest="name", help="Assign a name to this query")
optparser.add_option("--tags", dest="tags",
help="comma-separated list of tags to be associated with \
the query ( e.g. tag1 tag1,tag2 )")
optparser.add_option("--notify", action="store_true", dest="can_notify",
default=False, help="sends an email on command completion")
optparser.add_option("--timeout", dest="timeout", type="int",
help="Timeout for command execution in seconds")
optparser.add_option("--retry", dest="retry", choices=['1', '2', '3'],
help="Number of retries for a job")
optparser.add_option("--retry-delay", dest="retry_delay", type="int",
help="Time interval between the retries when a job fails.")
optparser.add_option("--pool", dest="pool",
help="Specify the Fairscheduler pool name for the \
command to use")
optparser.add_option("--print-logs", action="store_true", dest="print_logs",
default=False, help="Fetch logs and print them to stderr.")
optparser.add_option("--print-logs-live", action="store_true",
dest="print_logs_live", default=False, help="Fetch logs \
and print them to stderr while command is running.")

@classmethod
def parse(cls, args):
"""
Parse command line arguments to construct a dictionary of command
parameters that can be used to create a command
Args:
`args`: sequence of arguments
Returns:
Dictionary that can be used in create method
Raises:
ParseError: when the arguments are not correct
"""
try:
options, args = cls.optparser.parse_args(args)
if options.path is None:
raise ParseError("Notebook Path must be specified",
cls.optparser.format_help())
if options.arguments is not None:
validate_json_input(options.arguments, 'Arguments', cls)
if options.macros is not None:
options.macros = validate_json_input(options.macros, 'Macros', cls)
if options.retry is not None:
options.retry = int(options.retry)
except OptionParsingError as e:
raise ParseError(e.msg, cls.optparser.format_help())
except OptionParsingExit as e:
return None

params = vars(options)
params["command_type"] = "JupyterNotebookCommand"
return params

class SignalHandler:
"""
Catch terminate signals to allow graceful termination of run()
Expand All @@ -1367,6 +1443,16 @@ def handler(self, signum, frame):
if signum in self.term_signals:
self.received_term_signal = True


def validate_json_input(string, option_type, cls):
"""Converts String to JSON and throws ParseError if string is not valid JSON"""

try:
return json.loads(string)
except ValueError as e:
raise ParseError("Given %s is not valid JSON: %s" % (option_type, str(e)),
cls.optparser.format_help())

def _read_iteratively(key_instance, fp, delim):
key_instance.open_read()
while True:
Expand Down
2 changes: 1 addition & 1 deletion qds_sdk/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def filter_fields(schedule, fields):
def create(args):
with open(args.data) as f:
spec = json.load(f)
schedule = Scheduler(spec)
schedule = Scheduler.create(**spec)
return json.dumps(schedule.attributes, sort_keys=True, indent=4)

@staticmethod
Expand Down
250 changes: 250 additions & 0 deletions tests/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ def test_dbtapquerycmd(self):
qds.main()
Connection._api_call.assert_called_with("GET", "commands/123", params={'include_query_properties': 'false'})

def test_jupyternotebookcmd(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'check', '123']
print_command()
Connection._api_call = Mock(return_value={})
qds.main()
Connection._api_call.assert_called_with("GET", "commands/123", params={'include_query_properties': 'false'})

def test_includequeryproperty(self):
sys.argv = ['qds.py', 'hivecmd', 'check', '123', 'true']
print_command()
Expand Down Expand Up @@ -224,6 +231,14 @@ def test_dbtapquerycmd(self):
Connection._api_call.assert_called_with("PUT", "commands/123",
{'status': 'kill'})

def test_jupyternotebookcmd(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'cancel', '123']
print_command()
Connection._api_call = Mock(return_value={'kill_succeeded': True})
qds.main()
Connection._api_call.assert_called_with("PUT", "commands/123",
{'status': 'kill'})


class TestCommandGetJobs(QdsCliTestCase):

Expand Down Expand Up @@ -2029,6 +2044,241 @@ def test_submit_with_valid_local_script_location(self):
'command_type': 'DbTapQueryCommand',
'can_notify': False})

class TestJupyterNotebookCommand(QdsCliTestCase):

def test_submit_none(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit']
print_command()
with self.assertRaises(qds_sdk.exception.ParseError):
qds.main()

def test_submit_no_path(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--cluster-label', 'demo-cluster']
print_command()
with self.assertRaises(qds_sdk.exception.ParseError):
qds.main()

def test_submit_improper_macros(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--macros', '{"key1"}']
print_command()
with self.assertRaises(qds_sdk.exception.ParseError):
qds.main()

def test_submit_improper_arguments(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--arguments', '{"key1"}']
print_command()
with self.assertRaises(qds_sdk.exception.ParseError):
qds.main()

def test_submit_retry_more_than_3(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--retry', '4']
print_command()
with self.assertRaises(qds_sdk.exception.ParseError):
qds.main()

def test_submit_cluster_label(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--cluster-label', 'demo-cluster']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': 'demo-cluster',
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_macros(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--macros', '[{"key1":"11","key2":"22"}, {"key3":"key1+key2"}]']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': [{"key1":"11","key2":"22"}, {"key3":"key1+key2"}],
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_arguments(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--arguments', '{"key1":"val1", "key2":"val2"}']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': '{"key1":"val1", "key2":"val2"}',
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_tags(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--tags', 'abc,def']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': ['abc', 'def'],
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_name(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--name', 'demo']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': 'demo',
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_notify(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--notify']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': True,
'pool': None})

def test_submit_timeout(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--timeout', '10']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': 10,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_pool(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--pool', 'batch']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': 'batch'})

def test_submit_retry(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--retry', '1']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': 1,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': None,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

def test_submit_retry_delay(self):
sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file',
'--retry-delay', '2']
print_command()
Connection._api_call = Mock(return_value={'id': 1234})
qds.main()
Connection._api_call.assert_called_with('POST', 'commands',
{'retry': None,
'name': None,
'tags': None,
'label': None,
'macros': None,
'arguments': None,
'timeout': None,
'path': 'folder/file',
'retry_delay': 2,
'command_type': 'JupyterNotebookCommand',
'can_notify': False,
'pool': None})

class TestGetResultsCommand(QdsCliTestCase):

def test_result_with_enable_header_true(self):
Expand Down

0 comments on commit 9495e28

Please sign in to comment.